diff --git a/.gitignore b/.gitignore index 748b8ba931d..0dbc7ec1a4a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ bin/ .settings/ .classpath .project +.idea/ +*.iml diff --git a/.travis.yml b/.travis.yml index c8039e0f336..a93a1d45958 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,10 +19,11 @@ language: java +# https://docs.travis-ci.com/user/reference/xenial/#jvm-clojure-groovy-java-scala-support jdk: - - oraclejdk8 - - oraclejdk7 - - openjdk6 + - oraclejdk12 + - openjdk8 + - openjdk11 before_install: - sudo apt-get update -qq diff --git a/README.md b/README.md index 3df28e35bb4..6ef40069efb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,21 @@ -Apache PDFBox + + +Apache PDFBox =================================================== The Apache PDFBox library is an open source Java tool for working with PDF @@ -7,18 +24,18 @@ of existing documents and the ability to extract content from documents. PDFBox also includes several command line utilities. PDFBox is published under the Apache License, Version 2.0. -PDFBox is a project of the Apache Software Foundation . +PDFBox is a project of the Apache Software Foundation . Binary Downloads ---------------- You can download binary versions for releases currently under development or older -releases from out [Download Page](http://pdfbox.apache.org/download.cgi). +releases from our [Download Page](https://pdfbox.apache.org/download.cgi). Build ----- -You need Java 6 (or higher) and Maven 2 to +You need Java 6 (or higher) and Maven 2 to build PDFBox. The recommended build command is: mvn clean install @@ -33,7 +50,7 @@ Contribute There are various ways to help us improve PDFBox. - look at the [Issue Tracker](https://issues.apache.org/jira/browse/PDFBOX) to help us fix bugs. -- answer questions on our [Users Mailing List](http://pdfbox.apache.org/mailinglists.html "Subscribe to Mailing List"). +- answer questions on our [Users Mailing List](https://pdfbox.apache.org/mailinglists.html "Subscribe to Mailing List"). - help us enhance the [Examples](https://svn.apache.org/repos/asf/pdfbox/trunk/examples/) - help us to enhance the [PDFBox Documentation](https://git-wip-us.apache.org/repos/asf/pdfbox-docs) or on [GitHub](https://github.com/apache/pdfbox-docs). @@ -41,7 +58,7 @@ or on [GitHub](https://github.com/apache/pdfbox-docs). Support ------- -**Please follow the guidelines at our [Support Page](http://pdfbox.apache.org/support.html).** +**Please follow the guidelines at our [Support Page](https://pdfbox.apache.org/support.html).** If you have questions about how to use PDFBox do ask on the [Users Mailing List](/mailinglists.html "Subscribe to Mailing List"). @@ -49,8 +66,8 @@ This will get you help from the entire community. The PDFBox examples and the test code in the sources will also provide additional information. -And there are additonal resources available on sites such as -[Stack Overflow](http://stackoverflow.com/search?q=pdfbox "Stack Overflow"). +And there are additional resources available on sites such as +[Stack Overflow](https://stackoverflow.com/search?q=pdfbox "Stack Overflow"). If you are sure you have found a bug the please report the issue in our [Issue Tracker](https://issues.apache.org/jira/browse/PDFBOX). @@ -60,7 +77,7 @@ Known Limitations and Problems See the issue tracker at https://issues.apache.org/jira/browse/PDFBOX for the full list of known issues and requested features. Some of the more -commont issues are: +common issues are: 1. You get text like "G38G43G36G51G5" instead of what you expect when you are extracting text. This is because the characters are a meaningless internal @@ -91,7 +108,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -108,7 +125,7 @@ and/or re-export to another country, of encryption software. BEFORE using any encryption software, please check your country's laws, regulations and policies concerning the import, possession, or use, and re-export of encryption software, to see if this is permitted. See - for more information. + for more information. The U.S. Government Department of Commerce, Bureau of Industry and Security (BIS), has classified this software as Export Commodity Control diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt index b756fe0379e..9bf30abe777 100644 --- a/RELEASE-NOTES.txt +++ b/RELEASE-NOTES.txt @@ -1,1216 +1,67 @@ -Release Notes -- Apache PDFBox -- Version 2.0.0-RC3 +Release Notes -- Apache PDFBox -- Version 2.0.24 Introduction ------------ The Apache PDFBox library is an open source Java tool for working with PDF documents. -This is the third release candidate for the upcoming major release 2.0.0 of PDFBox. -This release contains a lot of improvements, fixes and refactorings. The API is -supposed to be stable, but we can't guarantee that there won't be any last changes -to it before providing the final release candidate. +This is an incremental bugfix release based on the earlier 2.0.23 release. It contains +a couple of fixes and small improvements. For more details on these changes and all the other fixes and improvements included in this release, please refer to the following issues on the PDFBox issue tracker at https://issues.apache.org/jira/browse/PDFBOX. -Sub-task - -[PDFBOX-1869] - Implementation for ShadingType 1 -[PDFBOX-1870] - PDFunctionType0 incorrect -[PDFBOX-2117] - AxialShadingContext is slow -[PDFBOX-2279] - Text with gradient not shown -[PDFBOX-2529] - Preflight: mention the page on which a problem has been found -[PDFBOX-2531] - better error message on not yet read stream -[PDFBOX-2535] - mention subtype in COSStream IOException -[PDFBOX-2536] - More specific TIFFFaxDecoder exceptions -[PDFBOX-2537] - do not discard underlying cause when creating validation error -[PDFBOX-2611] - possibly incorrect error message "Hexa String must have only Hexadecimal Characters" in preflight -[PDFBOX-2612] - error "Destination contains invalid page reference 'null'" is not detected by preflight -[PDFBOX-2613] - Conflicting /N information for OutputIntent not detected by preflight -[PDFBOX-2614] - missing /Type/FontDescriptor not detected by preflight -[PDFBOX-2619] - XMP dates contain time zone, while document info dates do not, and this isn't detected by preflight -[PDFBOX-2625] - Preflight error: The character with CID 0 should have a width equals to 57.0, but has 57.78 -[PDFBOX-2627] - Add block composer to handle multiline text -[PDFBOX-2630] - "loop in destinations" not detected by preflight -[PDFBOX-2647] - Check thumbnails in XMP metadata -[PDFBOX-2718] - Allow to create new AcroForm fields from scratch -[PDFBOX-2783] - Remove getCOSDictionary() method, adjust getCOSObject() return type -[PDFBOX-2849] - fix problems with setting existing AcroForm buttons -[PDFBOX-2863] - Support the comb flag for PDF forms -[PDFBOX-2877] - Wrong text placement for autosize fields compared to Adobe generated -[PDFBOX-2889] - Support appearance generation for choice fields -[PDFBOX-2900] - PDF Debugger doesn't print inline images correctly -[PDFBOX-2993] - Create a PDTransparencyGroup for added code clarity -[PDFBOX-2994] - Rename PDGroup to PDTransparencyGroupAttributes -[PDFBOX-3051] - COSArray.getObject() incorrect handling of indirect reference to COSNull -[PDFBOX-3052] - NPE in PDFStreamEngine.ShowText when no font set -[PDFBOX-3053] - Text extraction fails with type 3 fonts -[PDFBOX-3057] - NPE in CFFParser.parseType1Dicts() -[PDFBOX-3060] - Catalog cannot be found -[PDFBOX-3061] - Word concatenation in 2.0 not in 1.8 -[PDFBOX-3062] - Text extraction and height different in 2.0 -[PDFBOX-3068] - Null metadata in 2.0 in some files that had metadata in 1.8.10 with old parser -[PDFBOX-3112] - Avoid crazy /Length1 values in font descriptor -[PDFBOX-3123] - Text extraction garbled in this file, was OK in 1.8 -[PDFBOX-3125] - IndexOutOfBoundsException in PDFont.getWidth() -[PDFBOX-3126] - IndexOutOfBoundsException in PfbParser.parsePfb -[PDFBOX-3127] - Text with vertical font not extracted correctly -[PDFBOX-3129] - NullPointerException in PDFStreamEngine.showText() -[PDFBOX-3186] - Parsing fails when XRef stream object is 1 byte later - Bug -[PDFBOX-31] - bug with the Type3 font -[PDFBOX-37] - Text Extraction Weirdness -[PDFBOX-40] - Font problem when setting form value -[PDFBOX-53] - Problem getting value from PDRadioCollection -[PDFBOX-54] - please correct the SetField example -[PDFBOX-62] - Incorrect (zero) character widths returned in some docs -[PDFBOX-101] - ImportXFDF results in PDF with larger text fields -[PDFBOX-123] - too many space made in extracted text file -[PDFBOX-129] - Error when setting the value of a combo box to " " -[PDFBOX-159] - Field renaming character set problem -[PDFBOX-161] - java.util.EmptyStackException from PDFTextStripper.writeText -[PDFBOX-166] - ConvertColorSpace RGB to CMYK -[PDFBOX-198] - Tiff image problems -[PDFBOX-205] - Miscellaneous errors on valid files -[PDFBOX-239] - PDFToImage prints every word at the start of the line -[PDFBOX-283] - Character encoding/appearance issues when filling forms -[PDFBOX-297] - Printing fails -[PDFBOX-308] - Unknown encoding for 'UniJIS-UCS2-H' -[PDFBOX-317] - PDFont.getStringWidth() returns incorrect values -[PDFBOX-326] - TrueType and characterHorizontalDisplacement -[PDFBOX-412] - Failure to render PDFs with embedded fonts -[PDFBOX-427] - ArrayIndexOutOfBoundsException in drawString -[PDFBOX-447] - Image Convert Issue -[PDFBOX-451] - PDFImageWriter does not convert chinese PDF correctly -[PDFBOX-465] - invalid date formats -[PDFBOX-484] - Spaces, numbers and some letters not display correctly -[PDFBOX-488] - Invalid memory access of location 00000000 eip=968f5aa7 (MAC OS X) -[PDFBOX-490] - Pdf Printing of text from embedded fonts -[PDFBOX-501] - Open a trueType Font PDF, content become square box -[PDFBOX-538] - CryptographyException on Adobe Distiller generated file -[PDFBOX-587] - build script should support building without an internet connection -[PDFBOX-648] - PdfBox can't be buit from behind a firewall/proxy -[PDFBOX-649] - loading an fdf containing a file attachment throws IOException -[PDFBOX-657] - PDFToImage does not work with certain fonts (for eg. PDF documents created by MS Office and OpenOffice) -[PDFBOX-664] - Incorrect rendering of Slovak language PDF -[PDFBOX-677] - Lines not showing in PrintPDF print-out (Table borders and SVG figures) -[PDFBOX-723] - Our test hangs with custom pdf file on operation PDPage.convertToImage() -[PDFBOX-725] - Text extraction fails due to font problem with Type0, supplement-0 font -[PDFBOX-728] - Text extracted from a TeX-created PDF file comes in some form of hex encoding -[PDFBOX-778] - OutOfMemory when extracting text from pdf -[PDFBOX-785] - Spliting a PDF creates unnecessarily large files -[PDFBOX-823] - NullPointerException in DateConverter.toISO8601(DateConverter.java:221) -[PDFBOX-833] - Wrong encoding with Type1C font when specific encoding is defined -[PDFBOX-837] - Wrong RevisionNumber when disabling all permissions and using 128bit encryption -[PDFBOX-877] - processOperator breaks contract - never throws IOException -[PDFBOX-904] - Potential issue with COSString and UTF-16-encoded Strings. -[PDFBOX-905] - NullPointerException when writing pdf to image -[PDFBOX-923] - pdf gets messed up when updated with xfdf data -[PDFBOX-924] - Image not getting rendered correctly.. -[PDFBOX-932] - Swedish characters are garbled in form -[PDFBOX-934] - ImageToPDF.createPDFFromImage causes problems for certain TIFF inputs -[PDFBOX-940] - [pdmodel.font.PDFont] Error: Could not parse predefined CMAP file for 'PDFXC-Indentity0-0' -[PDFBOX-962] - All sort of Problems when importing Xfdf files into PDFs -> damaged pdfs and NPEs -[PDFBOX-965] - Printing of PDF with embedded OTF/TTF fonts is not working -[PDFBOX-984] - When create images from PDF File with characters from PT-BR it´s printing wrong -[PDFBOX-988] - pdmodel.font.PDSimpleFont hanging on TrueType font (ubuntu) -[PDFBOX-989] - Scale Pdf: Fit to Printable Area -[PDFBOX-1002] - Form field not rendered after being processed by pdfbox-1.1.0, wrong position of same field in pdfbox-1.5.0 -[PDFBOX-1007] - Maven performs textual filtering of binary resources [patch] -[PDFBOX-1019] - PDF conversion to image crashes the JVM -[PDFBOX-1020] - Can't read embedded font YDLRUT+ArialMT -[PDFBOX-1036] - FDFExport/Import gives strange results -[PDFBOX-1058] - Converting PDF to Image gives error and the image generated is of poor quality -[PDFBOX-1060] - convertToImage includes "ghost" annotation outlines -[PDFBOX-1069] - Ubuntu throws exceptions when fonts missing -[PDFBOX-1071] - Can not generate chinese character PDF file -[PDFBOX-1074] - TIFFFaxDecoder5 when using PDFImageWriter -[PDFBOX-1086] - Error when decoding CCITT compressed data that contains EOLs, fill bits etc. -[PDFBOX-1087] - FDF parsing is unreliable when xref are missing -[PDFBOX-1107] - PDF created by Bullzip PDF Printer / www.bullzip.com / Freeware Edition shows weird characters -[PDFBOX-1109] - Data corruption related to scratch file use -[PDFBOX-1134] - fontbox not decoding font correctly for all characters -[PDFBOX-1147] - Printing a PDF with an image inside show black. -[PDFBOX-1148] - PDF with embedded fonts (Identity-H) not print. -[PDFBOX-1152] - Gets scrambled japanese text while reading a PDF file -[PDFBOX-1155] - setSuppressDuplicateOverlappingText sometimes removes characters that it shouldn't -[PDFBOX-1164] - Inline image parsing error causes RuntimeException + FIX -[PDFBOX-1206] - TrueType glyphs render incorrectly -[PDFBOX-1207] - PDFPageProcessor.processStream() take 10 minutes to return -[PDFBOX-1219] - org.apache.jempbox.impl.DateConverter unable to parse correct date value -[PDFBOX-1231] - AcroForm appearance generator -[PDFBOX-1234] - NPE at org.apache.pdfbox.pdmodel.interactive.form.PDAppearance.calculateFontSize(PDAppearance.java:551) -[PDFBOX-1242] - Handle non ISO-8859-1 chars with drawString -[PDFBOX-1250] - CFF to Type1 Font conversion is missing/corrupting the font metrics -[PDFBOX-1268] - OutOfMemory Error because of huge colors -[PDFBOX-1273] - java.io.IOException: Error: Unknown annotation type null -[PDFBOX-1276] - java.lang.NullPointerException on trying to set value for PDTextBox in pdf file. -[PDFBOX-1278] - PDF file containing PDCIDFontType0 (PDType1CFont) does not render correctly to image -[PDFBOX-1282] - Unicode characters displayed with wrong glyps because of interpretation as 8 bit strings -[PDFBOX-1283] - Unicode characters displayed with wrong Advance -[PDFBOX-1292] - Rendering of certain documents results in large tracts of blank space - even though contents can be extracted -[PDFBOX-1296] - Warnung: Changing font on < > from to the default font -[PDFBOX-1301] - Wrong characters in HTML/TXT file from PDF containing scanned pages/images -[PDFBOX-1302] - Got ArrayIndexOutOfBoundsException in parsing a Chinese ttf file. -[PDFBOX-1304] - Text extraction meets "Could not parse predefined CMAP" and returns just a small part of the content containing garbage chars. -[PDFBOX-1307] - extracted images from a PDF sometimes come out inverted -[PDFBOX-1321] - PDF rendered as black box -[PDFBOX-1325] - Converting page to png creates an empty image -[PDFBOX-1332] - Some inline font is can not parsed out -[PDFBOX-1336] - JVM Crashes on Linux OS + Sun JVM + PDFBox -[PDFBOX-1342] - Tags not fully preserved when merging PDFs. -[PDFBOX-1348] - ExtractImages jpg and tiff picture from pdf but color wrong -[PDFBOX-1351] - False paragraph caused by superscript (1.7 regression) -[PDFBOX-1372] - NullPointerException with loadDescriptorDictionary -[PDFBOX-1391] - Document with "embedded subset" fonts is displayed incorrect -[PDFBOX-1403] - Retrieve FontDescription from descendant font -[PDFBOX-1405] - Non-Ascii chars are not decoded correctly by pdfbox but works fine with pdftotext -[PDFBOX-1412] - NullPointerException when getting fields from a PDF file -[PDFBOX-1413] - Spaces replaced by é when exporting image -[PDFBOX-1414] - EXCEPTION_ACCESS_VIOLATION in fontmanager.dll -[PDFBOX-1419] - PDField.setValue is not behave correctly -[PDFBOX-1426] - JVM crashes when trying to process the attached pdf's -[PDFBOX-1435] - text is obscured by the Images -[PDFBOX-1442] - bar chart converted from PDF is totally a black area. -[PDFBOX-1452] - Greek Pdfs print out wrong characters -[PDFBOX-1466] - Rendering of pattern colorspace fails -[PDFBOX-1474] - PDDocument.decrypt does not throws InvalidPasswordException -[PDFBOX-1478] - Problem with printing landscape document -[PDFBOX-1506] - Incorrect visualization of PDF document via PageDrawer -[PDFBOX-1511] - pdfMerger App produces Garbage -[PDFBOX-1512] - TextPositionComparator is not compatible with Java 7 -[PDFBOX-1533] - When merging certain PDF's several odd looking empty pages occur in the result -[PDFBOX-1541] - expected='endstream' actual='' failure to parse -[PDFBOX-1550] - Helv vs. Helvetica font names cause PDField.setValue to fail -[PDFBOX-1570] - PDFImageWriter creates black boxes for some images in the pdf -[PDFBOX-1574] - ImportFDF fails to do anything -[PDFBOX-1576] - StackOverflowError [COSDictionary.toString(COSDictionary.java:1418)] -[PDFBOX-1585] - org.apache.pdfbox.util.PDFTextStripper.getText() causes thread to block indefinitely -[PDFBOX-1595] - PDFMerger failed with the following exception: java.lang.NullPointerException -[PDFBOX-1604] - FontBox is not storing all subroutines for CID-Keyed OTF CFF fonts possibly leading to rendering / width issues -[PDFBOX-1606] - NonSequentialPDFParser produces garbage text in document info -[PDFBOX-1607] - StringIndexOutOfBoundsException in PDFParser -[PDFBOX-1608] - Rendering problem with Java 7 update 21 -[PDFBOX-1617] - Null pointer exception -[PDFBOX-1618] - Split PDF file to single page files, some files are inflated in size -[PDFBOX-1620] - Missing text in pdf reader view -[PDFBOX-1622] - TextNormalize init not thread-safe, may lead to infinite loop -[PDFBOX-1625] - java.lang.IndexOutOfBoundsException at writing PDF file -[PDFBOX-1627] - Exception in thread "main" java.lang.NullPointerException -[PDFBOX-1628] - Type 3 Fonts are not processed by PDPage.createImage -[PDFBOX-1629] - Null PointerException -[PDFBOX-1630] - An interesting Exception error -[PDFBOX-1631] - Group Exception -[PDFBOX-1632] - Exception with validation -[PDFBOX-1633] - DateConverter needs to work -[PDFBOX-1637] - Faulty documentation of PDStream.getInputStreamAsString() -[PDFBOX-1638] - PDCcitt doesn't use color space -[PDFBOX-1639] - Infinite loop with PDFParser used by tika. -[PDFBOX-1642] - NPE when parsing XMP schema definition with "closed Choice" value type -[PDFBOX-1643] - Check for missing validation processes does not work properly in Preflight -[PDFBOX-1651] - PDFBox doesn't read the permission bits correct. PDDocument.getCurrentAccessPermission().canPrint() is allways returning true irrespective of the document print permissions -[PDFBOX-1653] - Fix pdfbox eating up big chunks of memory for identical CID mappings -[PDFBOX-1654] - Wasted work in XMLUtil.getNodeValue -[PDFBOX-1655] - Wasted work (or incorrect behavior) in PDCIDFontType2Font.readCIDToGIDMapping -[PDFBOX-1657] - glyph contours missing -[PDFBOX-1658] - TTC fonts not supported for substitution -[PDFBOX-1659] - Preflight 2.0.0 doesn't properly identify PDFs with encryption -[PDFBOX-1660] - Error 6.2.4 results in description that looks more like the one belonging to 6.2.3 -[PDFBOX-1663] - Hello World using a TrueType font ArrayIndexOutOfBoundsException -[PDFBOX-1664] - NullPointerException in PDType1Font.java -[PDFBOX-1666] - Missing StemV font descriptor entry when embedding AFM fonts -[PDFBOX-1668] - Loading a Russian PDF never finishes -[PDFBOX-1670] - Printing pages rotated by 180 degrees is not working -[PDFBOX-1671] - Error printing document java.lang.ArrayIndexOutOfBoundsException: 346 -[PDFBOX-1672] - Some characteres are missing after print thru PDFBox -[PDFBOX-1674] - Preflight doesn't correctly parse PDF if obj identifier not followed by line terminator -[PDFBOX-1678] - Convert to image problem -[PDFBOX-1679] - java.io.IOException: Error: Expected an integer type, actual='f' -[PDFBOX-1681] - java.lang.IllegalArgumentException: Color parameter outside of expected range: Red -[PDFBOX-1683] - 2.0 build fails -[PDFBOX-1688] - File with embedded subset renders no text -[PDFBOX-1689] - Partial failure to render PDF -[PDFBOX-1691] - "Foreign" characters are not rendered -[PDFBOX-1692] - java.lang.OutOfMemoryError: Java heap space -[PDFBOX-1694] - Bug in org.apache.pdfbox.io.Ascii85InputStream -[PDFBOX-1696] - Bug in org.apache.pdfbox.io.Ascii85OutputStream -[PDFBOX-1699] - Problem with generate jpg from pdf -[PDFBOX-1705] - can not Write Hebrew and Chinese word into a PDF -[PDFBOX-1708] - IndexOutOfBoundsException on convertToImage with an embedded Fax-Image -[PDFBOX-1713] - [PATCH] Bullet character not rendered -[PDFBOX-1714] - Merging PDFs results in java.io.IOException: expected='R' actual='0' -[PDFBOX-1717] - Rendering to image has misplaced characters -[PDFBOX-1718] - wrong glyphs displayed -[PDFBOX-1719] - NPE while signing PDF - acroform without fields -[PDFBOX-1724] - Method createColorModel not implemented for PDCalGray -[PDFBOX-1725] - Character rendered at wrong position -[PDFBOX-1727] - Content outside the MediaBox should not be rendered -[PDFBOX-1730] - Image in PDF has extremely different colors when rendered -[PDFBOX-1733] - Rectangles have one rounded edge in rendered image only -[PDFBOX-1735] - Convert page pdf to image -[PDFBOX-1737] - Skip whitespaces when resolving a XRef -[PDFBOX-1740] - Umlaut not rendered correctly in TTF composite glyph -[PDFBOX-1741] - [PATCH] Text should be in italic but is rendered upright -[PDFBOX-1742] - type1CFont font with null encoding -[PDFBOX-1743] - OutOfMemoryError in fontbox -[PDFBOX-1749] - Out of memory exception when parsing TTF file -[PDFBOX-1750] - PDTextbox and PDAnnotationWidget are not correct initialized from it's own constructor . -[PDFBOX-1752] - Rendering PDF containing Jpeg2000 fails -[PDFBOX-1753] - The font gets gibbrish when adding a line of text to an existing PDF with a table -[PDFBOX-1754] - Preflight doesn't detect JavaScript for some PDFs -[PDFBOX-1756] - ClassCastException CosString cannot be cast to COSName -[PDFBOX-1758] - Preflight doesn't report Filespec dictionary that refers (indirectly) to an EmbeddedFile entry in some cases -[PDFBOX-1760] - Regressions 28 Oct 2013 -[PDFBOX-1763] - Exception caused by "Invalid ICC Profile Data" -[PDFBOX-1764] - PDFBox takes ages to render page 2 of the attached PDF -[PDFBOX-1765] - Null pointer exception in PDFToImage -[PDFBOX-1768] - cannot build last source code -[PDFBOX-1770] - ExtractText gets all "?" when pdf 's font is instance of PDType1Font -[PDFBOX-1771] - Cannot render FOP pdf with subsetted OTF CFF for both standard and CID-Keyed fonts -[PDFBOX-1773] - Regression? Type 3 Fonts are not processed by RenderUtil.convertToImage -[PDFBOX-1774] - StackOverflowError; Preflight->Font -[PDFBOX-1776] - Print pdf with font embedded(SimSun TrueType(CID) Identity-H) -[PDFBOX-1777] - memory leak in org.apache.pdfbox.cos.COSDocument -[PDFBOX-1778] - Rounding issue in generated PDF file -[PDFBOX-1780] - previous revision is damaged after signing -[PDFBOX-1789] - NullPointerException at PDPageContentStream.setFont -[PDFBOX-1790] - NPE during PDTrueTypeFont.loadTTF() on Mac TrueType font lacking Windows-platformID CMAPEncodingEntry -[PDFBOX-1791] - Type3 glyphs with partial black background -[PDFBOX-1794] - Rendering Problem with Type 3 Fonts -[PDFBOX-1796] - Infiniteloop BaseParser.java:1010 -[PDFBOX-1799] - NullPointerException when constructing a PDJPeg using a BufferedImage -[PDFBOX-1801] - xmp serializer does not generate valid xml for structured types -[PDFBOX-1802] - COSDictionary in COSArray setDirect(true) but dic written indirect -[PDFBOX-1804] - PDFTextStripper Issue related to word positions not correctly being parsed -[PDFBOX-1808] - PDFTextStripper.getText - hight memory usage -[PDFBOX-1810] - PDFToImage: Image of pdf is resized and drawn multiple times at top of output image -[PDFBOX-1811] - java.io.IOException: Object at offset does not end with 'endobj' -[PDFBOX-1812] - Illegal characters in XML output -[PDFBOX-1813] - Stack overflow error in Main (no output file produced) -[PDFBOX-1814] - In some cases PDPage converttoimage is extremely slow -[PDFBOX-1818] - Push back buffer is full error -[PDFBOX-1819] - Rendering problem with JPX image -[PDFBOX-1822] - Signature byte range is Invalid -[PDFBOX-1824] - [PATCH] CFF fonts render wrong glyphs -[PDFBOX-1825] - [PATCH] Many pdfbox tests are never run -[PDFBOX-1829] - PDF Extract Image Pixelmap Issue -[PDFBOX-1830] - Grey background rectangle rendered at different position -[PDFBOX-1831] - [PATCH] Fix: "Foreign" characters are not rendered -[PDFBOX-1845] - PDDocument.load() give Error: Expected a long type at offset 1633 -[PDFBOX-1849] - Isartor test 6-3-5-t01-fail-a does not return the expected error code -[PDFBOX-1860] - HTML converter escapes formatting close tags -[PDFBOX-1861] - Line is incorrectly dashed -[PDFBOX-1862] - Incomplete signature creation (regression in 1.8.3 with PDFBOX-1780) -[PDFBOX-1864] - Non-embedded fonts not detected (or are they?) -[PDFBOX-1865] - RenderUtil - rendering blank pages as images from PDF -[PDFBOX-1868] - Garbled / distorted fonts during PDF to image conversion on recent versions -[PDFBOX-1871] - Content appears a few px higher when rasterizing PDF -[PDFBOX-1872] - PDMetadata.exportXMPMetadata fails when Metadata has encrypted stream -[PDFBOX-1874] - PDFTextStripper.isParagraphSeparation(...) -[PDFBOX-1875] - Image and some text missing in rendered file -[PDFBOX-1876] - Incorrect color for DeviceN type 4 shading object -[PDFBOX-1877] - Radial Shading (type 3) fails Ghent Workgroup tests -[PDFBOX-1879] - Gibberish characters when converting pdf to image -[PDFBOX-1880] - [PATCH] Type 1 Shading must not ignore current transformation matrix -[PDFBOX-1882] - Negative array size exception when reading a string from a OTF font -[PDFBOX-1884] - Avoid NPE when encountering null PDComplexFileSpecification -[PDFBOX-1887] - Bugfixes + Optimization of Gouraud Shading -[PDFBOX-1888] - JBIG2Filter is creating an ImageInputStream (with temp file) and not closing it -[PDFBOX-1892] - Empty pages after rendering images: org.apache.pdfbox.util.operator.pagedrawer.Invoke -[PDFBOX-1895] - Type0 settings /Registry and /Ordering are not decrypted when writing document -[PDFBOX-1896] - Support MMType1 (Multiple Master) Fonts -[PDFBOX-1900] - ConvertToImage - pdf - checkbox wrongly rendered -[PDFBOX-1901] - null check confusing -[PDFBOX-1908] - Drop shadow is too heavy (Transparency Groups) -[PDFBOX-1910] - Text rendered as question marks -[PDFBOX-1911] - Orange background from the pdf gets turned into blue in the png files. -[PDFBOX-1916] - java.lang.ArrayIndexOutOfBoundsException in inlineimage -[PDFBOX-1917] - Rendering hangs -[PDFBOX-1918] - PDF with incorrect startxref -[PDFBOX-1922] - NonSequentialParser not reading version in header and trailer -[PDFBOX-1924] - Gouraud shading: detect empty triangles -[PDFBOX-1925] - DeviceCMYK Colorspace: PDFToImage gives wrong output -[PDFBOX-1928] - PDResources.getFonts() and PDresources.getXObjects() change underlying COSDictionary -[PDFBOX-1929] - Drop shadow on text appears as a box -[PDFBOX-1930] - TimesNewRoman font should be substituted -[PDFBOX-1931] - Radial shading is missing -[PDFBOX-1934] - converttoimage error and part of the pdf is not rendered -[PDFBOX-1940] - Faulty pdf->image rendering -[PDFBOX-1942] - Regression: java.lang.IndexOutOfBoundsException in shading -[PDFBOX-1944] - Regression: NPE in test file -[PDFBOX-1945] - Regression: NPE with inline image -[PDFBOX-1948] - Regression: page renders mostly empty, text missing -[PDFBOX-1950] - Inline image mask does not mask -[PDFBOX-1953] - java.lang.IllegalArgumentException in SampledImageReader.getRGBImage() -[PDFBOX-1954] - Regression: Some lines are too small / too long -[PDFBOX-1955] - Regression: Colors much lighter -[PDFBOX-1961] - Page with annotations renders fine with 1.8 but not with 2.0 -[PDFBOX-1965] - NPE in NonSequentialPDFParser when parseMinimal property is set to true -[PDFBOX-1966] - Type 1, 4 and 5 shadings for shFill() -[PDFBOX-1969] - JPEGFactory bug -[PDFBOX-1977] - LZWFilter fails -[PDFBOX-1978] - Type1FontUtilTest is non-deterministic -[PDFBOX-1979] - TypeTestingHelper is non-deterministic -[PDFBOX-1980] - TestCOSFloat is non-deterministic -[PDFBOX-1981] - CryptographyException for file that isn't encrypted -[PDFBOX-1983] - Unable to add TIF images, CCITTFactory not working -[PDFBOX-1984] - PDFont documentation correction needed for getFontWidth and getFontHeight -[PDFBOX-1988] - PDFBox ExtractText issue of PDF with no embedded fonts -[PDFBOX-1992] - text in pdf with convertToImage not rendered -[PDFBOX-1993] - Gray color images much lighter -[PDFBOX-1995] - AdobePDFSchema.getProducer() returns empty string -[PDFBOX-1997] - CIE LAB item missing in rendering -[PDFBOX-1999] - JBIG2Filter - FlateDecoded Globals Table -[PDFBOX-2000] - White page when converting first page to image -[PDFBOX-2001] - Digital Signature information (parser bug?) -[PDFBOX-2005] - JDK 1.8 build fails in TestTTFParser -[PDFBOX-2007] - Performance regression since PDFRenderer -[PDFBOX-2008] - Off-by-one error in BaseParser.readGenerationNumber() -[PDFBOX-2009] - PDFStreamEngine.processEncodedText incorrectly handling UTF-16 text with BOM FEFF -[PDFBOX-2015] - Hybrid reference pdf still contain XRefStm info in the trailer dictionary afterPDDocument#save -[PDFBOX-2016] - Stream parsing still incorrect if length value is wrong -[PDFBOX-2020] - PDF/A Validation raises NullPointerException for PDFs without ImageColorSpace -[PDFBOX-2021] - PDFPrinter problem with landscape and rotated pages -[PDFBOX-2022] - silentPrint(no args) doesn't use the printerJob field -[PDFBOX-2023] - Text extraction gets zero font height for type3 fonts -[PDFBOX-2024] - /Rotate 180 PDF is not displayed correctly in PDFReader app -[PDFBOX-2026] - cannot load jpg into new pdf -[PDFBOX-2032] - [PATCH] TTF Type12 IOException: Invalid Characters codes -[PDFBOX-2035] - Ignore badly formatted toUnicode CMaps -[PDFBOX-2036] - Add test with LZW fail sequence -[PDFBOX-2037] - Glyph in type1CFont not rendered -[PDFBOX-2038] - Method VisualSignatureParser#parse does not close COSDocument -[PDFBOX-2042] - ColorSpace with empty Range array -[PDFBOX-2044] - TrueType glyphs not displayed in rendering -[PDFBOX-2045] - Merging PDFs with a Form has no effect -[PDFBOX-2046] - [PATCH] Can't read the embedded Type1 font -[PDFBOX-2047] - read operations alter PDLab object -[PDFBOX-2050] - Add predictor to LZW filter -[PDFBOX-2054] - Remove System.out.println() -[PDFBOX-2057] - Importing BufferedImage into PDPixelMap is broken in 1.8.5 -[PDFBOX-2058] - The text of pdfs using Type1C can't be extracted correct -[PDFBOX-2062] - Setting a PDFFormField's value with a specific font size causes the font size to change on click -[PDFBOX-2063] - Incomplete EOF detection in ASCIIHexFilter -[PDFBOX-2065] - Missing getCOSObject() in PDCalRGB -[PDFBOX-2067] - Error creating JPEG image with SMask -[PDFBOX-2070] - Filter.decode() modifies PDF if there is a filter array -[PDFBOX-2072] - Wrong calculation of space char width in PDFStreamEngine -[PDFBOX-2073] - PDF files with unusual Japanese font can not be rewrite correctly -[PDFBOX-2074] - 4-bytes CMap entry causes exception -[PDFBOX-2079] - Extra new line characters extracted in 1.8.5 for embedded files leading to ZipFile exception in Java 1.6 -[PDFBOX-2082] - signing corrupts PDF when signature exactly fits allocated space -[PDFBOX-2091] - Some characters are not rendered (font with symbol encoding) -[PDFBOX-2095] - Useless memory allocation in GlyfDescript -[PDFBOX-2098] - Gouraud shading doesn't appear -[PDFBOX-2100] - Gouraud shading doesn't work with function -[PDFBOX-2101] - Surprising memory consumption when extracting images -[PDFBOX-2102] - Characters swallowed on COSString.getString() -[PDFBOX-2103] - JPXFilter fails to decode some Jpeg2000 images -[PDFBOX-2106] - getSuffix() returns null for RLE encoding -[PDFBOX-2108] - Type0 CFF Font with identity encoding rendered incorrectly -[PDFBOX-2109] - CFFParser uses String constructor without encoding -[PDFBOX-2110] - Font not found: CourierNew -[PDFBOX-2111] - Cast error in Gouraud shadings -[PDFBOX-2114] - ObjStm is being processed to late -[PDFBOX-2115] - Use unfiltered stream in gouraud shadings -[PDFBOX-2120] - Regression: Type 1 font corrupted -[PDFBOX-2122] - FontBox's TTFDataStream doesn't set timezone in readInternationalDate -[PDFBOX-2128] - CMYK images are not supported correctly -[PDFBOX-2133] - Parsing of a Type1 font fails with a NumberFormatException -[PDFBOX-2134] - Parsing of a Type1 font fails with a NPE -[PDFBOX-2140] - non embedded Type1 symbol glyph not rendered -[PDFBOX-2141] - Shading not applied to text -[PDFBOX-2147] - Clean up code with "inspect and transform" -[PDFBOX-2153] - Setting the correct clipping path for shading -[PDFBOX-2155] - Fix JavaDocs warnings -[PDFBOX-2156] - different shading patterns at different resolutions when ctm is null -[PDFBOX-2158] - ExtractText missing most of text in this PDF file, due to font bounding box with minus infinity -[PDFBOX-2160] - PDFTextStripper doesn't always write paragraph start -[PDFBOX-2163] - inline image with EI in the middle incorrectly parsed -[PDFBOX-2166] - AIOOBE with barcode ttf font -[PDFBOX-2168] - Different behavior of Undo feature when form was pre filled by PDFBox -[PDFBOX-2170] - java.lang.ClassCastException: org.apache.fontbox.cff.CharStringCommand cannot be cast to java.lang.Integer -[PDFBOX-2171] - UnsupportedOperationException for stencil image / pattern -[PDFBOX-2173] - Nullpointer when validating empty file -[PDFBOX-2176] - Ignore IllegalArgumentException when reading an ICCProfile -[PDFBOX-2177] - [PATCH] IndexOutOfBoundsException reading embedded OpenType font -[PDFBOX-2178] - Invalid color space kind: COSName{DeviceGray} -[PDFBOX-2179] - Regression: Some isartor tests are not passing in 2.0.0 -[PDFBOX-2181] - Regression: NPE in PreflightContentStream -[PDFBOX-2183] - COSArray cannot be cast to COSNumber -[PDFBOX-2184] - CMMException: Invalid profile data -[PDFBOX-2185] - Rotation and skew not applied on rectangles -[PDFBOX-2186] - java.io.IOException: Catalog cannot be found -[PDFBOX-2187] - ArrayIndexOutOfBoundsException in TIFFFaxDecoder -[PDFBOX-2188] - java.io.IOException: Expected a name or array but got: COSObject{1823, 0} -[PDFBOX-2189] - java.awt.geom.IllegalPathStateException: missing initial moveto in path definition -[PDFBOX-2191] - Identity function not implemented -[PDFBOX-2192] - "unknown command" in Type1CharString.handleCommand -[PDFBOX-2193] - ClassCastException in PDExtendedGraphicsState.getFontSetting() -[PDFBOX-2194] - Refactor predictor -[PDFBOX-2195] - Missing text when converting PDF to image -[PDFBOX-2198] - ClassCastException in COSArrayList.convertIntegerCOSArrayToList for font widths -[PDFBOX-2199] - Found Token[kind=NAME, text=dup] but expected begin -[PDFBOX-2200] - Memory leak with org.apache.pdfbox.pdmodel.font.PDFont#cmapObjects -[PDFBOX-2201] - getKeywords returns null although keywords are present -[PDFBOX-2202] - java.io.IOException: Found Token[kind=NAME, text=readonly] but expected def -[PDFBOX-2203] - java.lang.IllegalArgumentException: alpha value out of range -[PDFBOX-2204] - Indexed color space in JPX -[PDFBOX-2206] - Cannot save a document which has been closed -[PDFBOX-2207] - Stream parsing still incorrect if length value is wrong -[PDFBOX-2212] - OutOfMemoryError in GlyfCompositeDescrip -[PDFBOX-2214] - EmptyStackException in PDFStreamEngine -[PDFBOX-2215] - NPE in PDTrueTypeFont.makeFontDescriptor -[PDFBOX-2216] - java.io.IOException: Found Token[kind=NAME, text= ] but expected LITERAL for type1 font -[PDFBOX-2217] - Matrix transform ignored in axial and radial shadings (in PDFToImage output) -[PDFBOX-2221] - Text is pink -[PDFBOX-2222] - NPE in PDFStreamEngine -[PDFBOX-2225] - ClassCastException in PDFMergerUtility.appendDocument -[PDFBOX-2227] - java.io.IOException: Found Token[kind=NAME, text= ] but expected LITERAL for type1 font -[PDFBOX-2228] - LZW EarlyChange parameter isn't supported -[PDFBOX-2229] - NPE in GlyfCompositeDescript.getPointCount -[PDFBOX-2234] - [PATCH] Invalid Color space preflight error on Java 8 -[PDFBOX-2237] - java.io.IOException: Image stream is empty for inline image -[PDFBOX-2240] - ArrayIndexOutOfBoundsException PDImageXObject.applyMask -[PDFBOX-2241] - IOException: Expected INTEGER or REAL but got NAME -[PDFBOX-2243] - java.lang.IllegalArgumentException: negative dash phase -[PDFBOX-2244] - java.lang.IndexOutOfBoundsException in callothersubr -[PDFBOX-2245] - java.lang.StringIndexOutOfBoundsException in PDTrueTypeFont.getGIDForCharacterCode -[PDFBOX-2247] - Regression in text extraction between 1.8.5 and 1.8.6 -[PDFBOX-2251] - NoSuchElementException when reading cmap format 4 subtable -[PDFBOX-2256] - Text size renders wrong -[PDFBOX-2257] - BufferedInputStream wrapped in BufferedInputStream -[PDFBOX-2261] - Extremely long hang during getFields() on a few PDF files -[PDFBOX-2265] - ArrayIndexOutOfBoundsException in PDICCBased.loadICCProfile -[PDFBOX-2266] - NPE when converting page to image -[PDFBOX-2267] - IOException and partial rendering and colorspace creation error -[PDFBOX-2268] - AES-256 decryptions fails -[PDFBOX-2270] - PDField.getFullyQualifiedName() returns name adding suffix '.null' -[PDFBOX-2271] - Potential NPE in PDAppearanceString.java -[PDFBOX-2275] - ClassCastException in PDResources -[PDFBOX-2278] - Exception in thread "main" java.lang.IllegalStateException: Call to processSubStream() before processStream() or initStream() -[PDFBOX-2280] - Text not italic -[PDFBOX-2281] - Yellow box shown -[PDFBOX-2283] - Incorrect transform for annotations / appearance streams -[PDFBOX-2284] - NullPointerException in PDFieldTreeNode -[PDFBOX-2285] - debugLogMetadata doesn't log -[PDFBOX-2287] - [PATCH] COSStream loses contents in setFilters() -[PDFBOX-2291] - Differences in Overlay stamping between version 1.8.2 and 1.8.6 -[PDFBOX-2292] - Saving of decrypted version of password protected document gives an error -[PDFBOX-2293] - NonSequential parser gives an error -[PDFBOX-2295] - Checkboxes missing -[PDFBOX-2296] - Wrong stream length -[PDFBOX-2298] - Wrong scaling of embedded type 1 font -[PDFBOX-2299] - Isartor tests don't work anymore -[PDFBOX-2300] - Glyphs rendered at wrong position -[PDFBOX-2301] - RandomAccessBuffer consumes too much memory. -[PDFBOX-2304] - square glyphs missing -[PDFBOX-2306] - Error reading stream, expected='endstream' actual='endobj' -[PDFBOX-2307] - NPE in TrueTypeFont.getWidth -[PDFBOX-2309] - UnsupportedOperationException: not implemented: missing CFF -[PDFBOX-2310] - codeToGID NPE -[PDFBOX-2311] - color space 'COSName{DefaultRGB}' does not exist in the current page's resources -[PDFBOX-2312] - IllegalArgumentException: Built-in Encoding required for symbolic font -[PDFBOX-2313] - ExtractImages finds never-rendered images -[PDFBOX-2314] - Restore backward compatibility between Overlay and OverlayPDF -[PDFBOX-2315] - Found Token[kind=NAME, text=ND] but expected ND -[PDFBOX-2317] - ZapfDingbats should use its own glyph list -[PDFBOX-2318] - NPE in new DomXmpParser when no type is found -[PDFBOX-2319] - Date Converter needs to handle miliseconds and other formats -[PDFBOX-2320] - IOException: Could not read embedded TTF for font TimesNewRoman -[PDFBOX-2323] - More flexible image caching (OOM) -[PDFBOX-2324] - Failure to render DeviceN image -[PDFBOX-2325] - Failure to render OpenType (TrueType) -[PDFBOX-2326] - IllegalArgumentException: Use PDType1CFont for FontFile3 -[PDFBOX-2327] - Glyph list ligatures are decomposed too early -[PDFBOX-2330] - Typo on usage message; "PDFDBox" instead of "PDFBox" -[PDFBOX-2332] - Error reading stream, expected='endstream' actual='endstream8' at offset 1993 -[PDFBOX-2334] - codeToGID NPE -[PDFBOX-2338] - IllegalStateException: recursive definition -[PDFBOX-2339] - ArrayIndexOutOfBoundsException when type1 font is empty -[PDFBOX-2342] - WriteDecodedDoc cant decrypt pdf form correctly -[PDFBOX-2343] - Giving NullPoint exception when we call PDType1Font.HELVETICA_BOLD.getStringWidth("Some String") -[PDFBOX-2344] - NegativeArraySizeException in radial shading -[PDFBOX-2345] - IndexOutOfBoundsException reading encrypted pdf -[PDFBOX-2347] - NPE while creating security handler for decryption -[PDFBOX-2350] - Type1 Parser hangs indefinitely -[PDFBOX-2351] - /XRefStm content missing in saved file -[PDFBOX-2352] - NegativeArraySizeException in HorizontalMetricsTable.read -[PDFBOX-2355] - newDocuments is private in Splitter -[PDFBOX-2356] - Error Validating PDF Archive Document with half hour timezone -[PDFBOX-2357] - PDTrueTypeFont has no method to load font from stream -[PDFBOX-2358] - ExternalFonts uses classloader of class in font-box -[PDFBOX-2360] - PDFont had methods removed -[PDFBOX-2363] - wrong color in rendering -[PDFBOX-2364] - CCITT image renders incorrectly -[PDFBOX-2365] - NPE with file with PDFDocEncoding -[PDFBOX-2367] - Ligature glyph widths wrong -[PDFBOX-2372] - Trash Glyphs: Regressions 19.9.2014 -[PDFBOX-2373] - Rendering at 72 dpi crashes java -[PDFBOX-2376] - Small regression in text extraction with PDFBox 1.8.7 vs. 1.8.6 -[PDFBOX-2379] - glyphlist_ext is not OSGI compatible -[PDFBOX-2380] - Glyphlist .properties are not ordered -[PDFBOX-2381] - BaseParser - IOException: Push back buffer is full -[PDFBOX-2383] - PDFBox tests include copyright files -[PDFBOX-2384] - ExtractText should default to UTF-8 -[PDFBOX-2385] - inline image with EI at the end incorrectly parsed -[PDFBOX-2390] - PDExtendedGraphicsState is incorrectly named -[PDFBOX-2395] - Signing PDF document changes documentID -[PDFBOX-2396] - Comment on `org.apache.pdfbox.util.Splitter.createNewDocumentIfNecessary` is out of date -[PDFBOX-2399] - font.getFontDescriptor() for PDType1Font.HELVETICA is null -[PDFBOX-2401] - Image has wrong colors after Merge -[PDFBOX-2402] - NonSequentialPDFParser cannot recover from spurious closing brackets -[PDFBOX-2403] - false negative? "Font damaged, The FontFile can't be read" -[PDFBOX-2405] - false negatives: Invalid Font definition, Some required fields are missing from the Font ... ? -[PDFBOX-2406] - fix typo "AlpaConstant" -[PDFBOX-2407] - false negative: 2.4.3 : Invalid Color space, The operator "f" can't be used without Color Profile ? -[PDFBOX-2408] - false negative? 1.2.1 : Body Syntax error, Single space expected ... -[PDFBOX-2409] - got the wrong result from Arabic text extraction -[PDFBOX-2411] - Pushback buffer is full on seamingly small PDF -[PDFBOX-2412] - Loading XFDF document fails with ClassCastException -[PDFBOX-2413] - Loaded FDF document returns null fields -[PDFBOX-2415] - java.lang.ClassCastException: org.apache.pdfbox.pdmodel.font.PDType1CFont cannot be cast to org.apache.pdfbox.pdmodel.font.PDType1Font -[PDFBOX-2416] - xmp regression? 7.3 : Error on MetaData, Cannot find a definition for the namespace http://ns.adobe.com/xap/1.0/t/pg/ -[PDFBOX-2417] - xmp regression? 7.3 : Error on MetaData, Schema is not set in this document : http://ns.adobe.com/xap/1.0/sType/Dimensions# -[PDFBOX-2418] - xmp regression? 7.3 : Error on MetaData, Schema is not set in this document : http://ns.adobe.com/xap/1.0/g/img/ -[PDFBOX-2419] - XFDF export is not XML compliant -[PDFBOX-2421] - Poor text extraction and rendering of file with non embedded type1 font -[PDFBOX-2422] - PDFont.getStringWidth results in stackoverflow -[PDFBOX-2424] - ClassCastException in getMetaData if no real meta data -[PDFBOX-2426] - Make ExternalFonts.getProvider public -[PDFBOX-2428] - An error occured when reading table hmtx -[PDFBOX-2429] - Times New Roman rendered as Arial -[PDFBOX-2433] - PDFPrinter does not print Acroform fields -[PDFBOX-2434] - ClassCastException in readVersionInTrailer -[PDFBOX-2436] - Parsing error -[PDFBOX-2437] - PDFont isSymbolic() has unexpected return value -[PDFBOX-2439] - [PATCH] ArrayIndexOutOfBoundsException in multithreaded system -[PDFBOX-2441] - Improve XRef self healing mechanism when more than one xref table -[PDFBOX-2445] - Out of Memory - Extract text for Apache_Solr_4.7_Ref_Guide.pdf -[PDFBOX-2447] - "Cannot save a document which has been closed" when encrypting -[PDFBOX-2448] - ligatures and some glyphs missing -[PDFBOX-2453] - Building on OpenJDK throws javax.imageio.IIOException -[PDFBOX-2455] - NonSequentialParser does not tolerate missing %%EOF markers -[PDFBOX-2457] - LogFactory is intialized with a wrong class -[PDFBOX-2458] - Signing doesn't work anymore using BC 1.51 instead of 1.50 -[PDFBOX-2460] - fix TestPublicKeyEncryption.java -[PDFBOX-2462] - NullPointerException in (PDFStreamParser.java:109) -[PDFBOX-2465] - NPE in PdfaExtensionHelper.populateSchemaMapping -[PDFBOX-2466] - 2.4 : Invalid Color space, Unable to read default color space : Missing color space: DefaultRGB -[PDFBOX-2469] - javax.crypto.BadPaddingException in PDFBox 1.8.8-SNAPSHOT -[PDFBOX-2470] - Exception in PDDocument.addSignature(PDSignature sigObject, SignatureInterface signatureInterface, SignatureOptions options)) -[PDFBOX-2471] - AES encryption failing to write Acroform field names and values -[PDFBOX-2477] - NPE in DomXmpParser.createProperty -[PDFBOX-2478] - NPE in XObjImageValidator.checkColorSpaceAndImageMask -[PDFBOX-2479] - NPE in PDICCBased.getColorSpaceType -[PDFBOX-2481] - Adding large TYPE_BYTE_BINARY image to pdf document generates distorted result -[PDFBOX-2483] - StackOverflowError in preflight -[PDFBOX-2484] - Cannot decrypt AES256 encrypted files with nonSeq parser -[PDFBOX-2485] - IllegalArgumentException in TypeMapping.instanciateSimpleProperty -[PDFBOX-2486] - ClassCastException in preflight: PDNonTerminalField cannot be cast to PDField -[PDFBOX-2487] - ArrayIndexOutOfBoundsException in Type1CharString -[PDFBOX-2488] - NPE in FontValidator.isSubSet in preflight -[PDFBOX-2489] - StackOverflowError in PDSimpleFont.isFontSymbolic -[PDFBOX-2490] - Return value of COSDocument#isEncrypted is unclear -[PDFBOX-2491] - NPE in PDFAIdentificationValidation.checkConformanceLevel() -[PDFBOX-2492] - Java 8u25 IllegalBlockSizeException decrypting pdf -[PDFBOX-2495] - Black shapes in the background of some rendered pages of some PDFs -[PDFBOX-2496] - PNG filesize is larger -[PDFBOX-2498] - ArrayIndexOutOfBoundsException in PreflightParser.lastIndexOf -[PDFBOX-2499] - EOF and NPE in PDType1CFont.getFontMatrix -[PDFBOX-2500] - ClassCastException in StreamValidationProcess.checkFilters -[PDFBOX-2501] - Page render without barcode -[PDFBOX-2502] - false negative? 1.4.6 : Trailer Syntax error, ID is different in the first and the last trailer -[PDFBOX-2503] - false negative? 1: 7.2 : Error on MetaData, Producer present in the document catalog dictionary doesn't match with XMP information -[PDFBOX-2504] - ClassCastException in preflight: PDAnnotationWidget cannot be cast to PDField -[PDFBOX-2505] - ArrayIndexOutOfBoundsException in PDColor constructor -[PDFBOX-2507] - Annotation example not rendered to image -[PDFBOX-2508] - Text extraction getting zero font height, bad widths, and ? for text in this PDF with Type 3 Fonts -[PDFBOX-2509] - Korean Text font substitution issues -[PDFBOX-2513] - false negative? RuntimeException: EOL encountered in white run -[PDFBOX-2517] - Better error message on pdfA identification -[PDFBOX-2519] - Regression: Box color missing -[PDFBOX-2520] - Don't decrypt already decrypted pdfs -[PDFBOX-2521] - Don't throw IOException if stream length is missing in lenient mode -[PDFBOX-2523] - IOException: Error: Expected a long type at offset 1218571, instead got 'xref' -[PDFBOX-2525] - Overlay: data black & white after import -[PDFBOX-2526] - Arial black not black -[PDFBOX-2527] - IOException: Negative seek offset in NonSequentialPDFParser -[PDFBOX-2528] - IOException: Object must be defined and must not be compressed object: 0:0 -[PDFBOX-2533] - Poor rendering with non-sequential parser -[PDFBOX-2540] - ArrayIndexOutOfBoundsException in Type1Parser.parseASCII -[PDFBOX-2541] - ClassCastException in BaseParser.parseCOSDictionaryValue -[PDFBOX-2542] - IllegalArgumentException: root must be of type Pages -[PDFBOX-2543] - ClassCastException in PDFontDescriptor.getFontFile2 -[PDFBOX-2546] - IllegalArgumentException: resourceDictionary is null in PDFMerger -[PDFBOX-2549] - TIFF-Predictor with 16 bits per component not supported -[PDFBOX-2550] - ClassCastException in PDAnnotation.getColour -[PDFBOX-2552] - Blank rendering when negative page rotation -[PDFBOX-2553] - CalRGB colors different -[PDFBOX-2557] - Yellow text not using heavy font -[PDFBOX-2559] - TTF font cannot be loaded -[PDFBOX-2560] - Arial Truetype CID font rendering incorrect -[PDFBOX-2561] - Rendering of PDIndexed line incorrect -[PDFBOX-2563] - [PATCH] Use cmap for Type0/TTF fallback -[PDFBOX-2569] - COSNumber fails to parse numbers like "+018" in JRE <= 1.6 -[PDFBOX-2570] - ClassCastException in PDCalGray: COSFloat cannot be cast to COSArray -[PDFBOX-2571] - IllegalStateException: Not a CIDFont -[PDFBOX-2572] - ArrayIndexOutOfBoundsException in CmapSubtable.processSubtype12 -[PDFBOX-2573] - IllegalStateException: PDFBox bug: encoding should not be null! -[PDFBOX-2579] - Exception in thread "main" java.io.IOException: Error: Expected a long type at offset 1029, instead got '12688(Deleted' -[PDFBOX-2582] - Form fields missing entirely or incorrect in PDField list -[PDFBOX-2583] - Error when rendering a PDF with annotations -[PDFBOX-2586] - IllegalBlockSizeException: Input length must be multiple of 16 when decrypting with padded cipher -[PDFBOX-2588] - Text fields if initialy empty in AcroForms do not contain a COSName.V in its dictionary and therefore does not get rendered. -[PDFBOX-2595] - Pdfbox always sets the second part of documentID to the same value -[PDFBOX-2598] - IllegalArgumentException in CFFParser.readCharset -[PDFBOX-2599] - failure to render file with utf8 CID TT fonts -[PDFBOX-2601] - fix getHashObjectIdentifier in TSAClient -[PDFBOX-2605] - Multiple text operations on page cause NPE in TTFSubsetter -[PDFBOX-2606] - Support OS with no fonts -[PDFBOX-2607] - Failed reading embedded Font -[PDFBOX-2608] - false negative on pdf/A validation? -[PDFBOX-2615] - IllegalArgumentException in PDPageTree constructor: root cannot be null -[PDFBOX-2616] - JVM crashes while trying to convert PDF to JPG image (only on Windows) -[PDFBOX-2617] - Group of Button fields treated as a Radio Button group -[PDFBOX-2620] - Support named actions -[PDFBOX-2621] - Files created with CreatePDFA.java are not PDF/A-1b -[PDFBOX-2622] - PDAnnotationLink::getBorderStyle() don't understand external border style -[PDFBOX-2629] - PDAnnotation should not use PDGamma for colors -[PDFBOX-2632] - Lost output when mixing subset and non-subset of the same font -[PDFBOX-2634] - Multiple text operations on multiple pages cause NPE in TTFSubsetter -[PDFBOX-2635] - PrintImageLocations outputs utter nonsense -[PDFBOX-2636] - Colorspaces of annotations not treated correctly -[PDFBOX-2640] - Fields within a fields kids entry are not correctly recognized -[PDFBOX-2641] - ArrayIndexOutOfBoundsException in PDType1Font constructor -[PDFBOX-2646] - A text including single-quote is malformed with Embedded TTF font -[PDFBOX-2649] - Character widths incorrect in a loaded font -[PDFBOX-2650] - Type1Equivalent: TrueType must use 'cmap' when 'post' table is empty -[PDFBOX-2651] - Preflight doesn't check for valid destination syntax -[PDFBOX-2652] - Document Outlines (Bookmark) and Link Annotation validation do not validate /Dest item -[PDFBOX-2653] - Image extraction fails with attached PDF -[PDFBOX-2654] - NullPointerException when reading a GIF file with a transparent color -[PDFBOX-2655] - PDCIDFontType2Embedder.buildCIDSet() ArrayOutOfBounds -[PDFBOX-2656] - Trailer isn't written when signing a PDF -[PDFBOX-2660] - Text missing -[PDFBOX-2664] - PDDocumentInformation shouldn't throw IOException -[PDFBOX-2665] - PDType1Font (HELVETICA) encode getting NullPointerException -[PDFBOX-2667] - StandardSecurityHandler should throw InvalidPasswordException -[PDFBOX-2668] - intersectClippingPath does a shallow copy -[PDFBOX-2675] - PDOutlineNode.getParent uses /P item as fallback for /Parent -[PDFBOX-2676] - PDPageLabelRange.setLabelItem() should not allow negative startPage -[PDFBOX-2677] - Negative Outlines COUNT and various issues -[PDFBOX-2678] - possible NPE in ExtractText tool of trunk -[PDFBOX-2679] - Blank page rendered with wrong xref start objid -[PDFBOX-2687] - ClassCastException when trying to get OutputIntents or add to it -[PDFBOX-2693] - OutOfMemoryError at org.apache.fontbox.cff.IndexData.initData(IndexData.java:95) -[PDFBOX-2698] - PDFToImage IndexOutOfBoundsException -[PDFBOX-2702] - Merging PDFs created using "Nuance PDF Create" not possible -[PDFBOX-2704] - PDPageTree.indexOf doesn't find page numbers -[PDFBOX-2708] - PDDocument.removePage() deletes the last page regardless of parameter passed -[PDFBOX-2711] - Japanese text not extracted -[PDFBOX-2713] - Preserve the origin pdf version when splitting a pdf -[PDFBOX-2714] - Type1Fonts working on one computer, not another -[PDFBOX-2715] - Pages in a PDF being dropped with just an error-log message -[PDFBOX-2717] - Keep type and subtype for PDWidgetAnnotation created from field -[PDFBOX-2719] - The addSignature() method always set the visual signature on the last page of the PDF -[PDFBOX-2720] - Can't sign PDF document with forms or annotations -[PDFBOX-2723] - PDFBox*.tmp files not deleted by COSParser -[PDFBOX-2724] - Importing a XFDF file doesn't populate the field value -[PDFBOX-2726] - org.apache.pdfbox.cos.COSArray cannot be cast to org.apache.pdfbox.cos.COSDictionary -[PDFBOX-2728] - java.awt.geom.IllegalPathStateException: missing initial moveto in path definition -[PDFBOX-2730] - PDFSplit slow and keeps unused pages -[PDFBOX-2733] - Nullpointer exception in PDFXrefStreamParser.parse -[PDFBOX-2734] - Can't create PDF with DeviceN colorspace -[PDFBOX-2739] - Saving merged documents causes IOException -[PDFBOX-2741] - IndexOutOfBoundsException when calling PDSeparation.setAlternateColorSpace -[PDFBOX-2745] - PDPageXYZDestination zoom property can't be set lower than 100% -[PDFBOX-2746] - PDPageContentStream.saveGraphicsState() saves wrong nonStrokingColor and throws an exception -[PDFBOX-2747] - pdfbox: garbled japanese txt output -[PDFBOX-2750] - Rendering in poor quality in 2.0 but not in 1.8.* -[PDFBOX-2759] - NPE in BaseParser.parseCOSDictionaryValue() due to object reference in content stream -[PDFBOX-2760] - NPE in MoveText.process() -[PDFBOX-2767] - ClassCastException in PDDocument.addSignature -[PDFBOX-2769] - NPE when saving encrypted file -[PDFBOX-2771] - COSString encodes Euro sign wrongly -[PDFBOX-2772] - EI token lost for rewrite -[PDFBOX-2773] - ClassCastException in PDDocumentCatalog.java:339 -[PDFBOX-2774] - Can't encode Euro with WinAnsiEncoding -[PDFBOX-2775] - ArrayIndexOutOfBoundsException in PDFTextStripper.processTextPosition() -[PDFBOX-2778] - PDF to Image conversion fails with "Invalid code word encountered" -[PDFBOX-2781] - Opening pdf document after encrypting it with PDFBox throws IllegalBlockSizeException -[PDFBOX-2786] - PDPageDestination page index off by one -[PDFBOX-2789] - TTF encoding issues -[PDFBOX-2792] - Text extraction ignores bookmarks -[PDFBOX-2793] - /Dests dictionary isn't supported -[PDFBOX-2794] - UnsupportedOperationException: not supported for Type 3 fonts -[PDFBOX-2795] - PrintRequestAttributeSet is being ignored -[PDFBOX-2797] - PDJavascriptNameTreeNode does not support dictionaries -[PDFBOX-2798] - PDTextStream does not support UTF16 with BOM -[PDFBOX-2799] - PDOptionalContentProperties.setGroupEnabled not working -[PDFBOX-2801] - SecurityHandler does not tolerate plain-text COSString -[PDFBOX-2802] - TestFontEmbedding sometimes fails due to non-determinism -[PDFBOX-2803] - NullPointerException into class PDType0Font -[PDFBOX-2808] - Can't merge to files with bookmarks -[PDFBOX-2811] - Infinite loop within RandomAccessBuffer -[PDFBOX-2812] - NPE in PDColorSpaceFactory.createColorSpace with PDICCBased -[PDFBOX-2814] - Text not rendered in mode 7 -[PDFBOX-2816] - PDFBox makes disallowed changes when signing a signed document -[PDFBOX-2819] - invalid ICC Profile when reading from a byte array -[PDFBOX-2822] - infinite loop of searching for a key in PDResources -[PDFBOX-2824] - ArrayIndexOutOfBoundsException in GlyfSimpleDescript.readFlags() when multithreading -[PDFBOX-2826] - Mouse position shown when mouse outside of PDFReader window -[PDFBOX-2829] - PDBox 2.0 Throws IndexOutOfBoundsException (severe offset errors as well) -[PDFBOX-2830] - Can't draw color border around a PDTextBox + create example -[PDFBOX-2832] - Remove obsolete methods from fontbox's Encoding -[PDFBOX-2833] - Add an API to get the COSObjectKey of a given object -[PDFBOX-2834] - Violation in PDOutputIntent.getDestOutputProfile() method -[PDFBOX-2836] - COSName should be interpreted as UTF-8 -[PDFBOX-2837] - PDFBox creates files with EBCDIC code on z/OS -[PDFBOX-2843] - widthOfSpace() appears wrong in TextPosition -[PDFBOX-2844] - Printing has bigger margins than expected -[PDFBOX-2845] - Error parsing PDF -[PDFBOX-2846] - setValue failing with font issues. -[PDFBOX-2847] - mergeDocumentsNonSeq does not utilize scratchFile -[PDFBOX-2851] - getExportValue() non functional in PDRadioButton -[PDFBOX-2856] - Markedly slower processing for particular file in 2.0.0-trunk vs 1.8.9 -[PDFBOX-2862] - GlyphList doesn't appear to be thread safe in trunk...or user error? -[PDFBOX-2867] - Correct use of Float.NaN -[PDFBOX-2868] - NPE in Acroform getValueAsString -[PDFBOX-2869] - Corruption in ScratchFileBuffer -[PDFBOX-2871] - Performance issue when filling the first PDTextField of an AcroForm -[PDFBOX-2872] - Matrix.toCOSArray() has constant return -[PDFBOX-2875] - Type 1 fonts are embedded incorrectly -[PDFBOX-2876] - Better support for embedding of simple TrueType fonts -[PDFBOX-2881] - Radial and Axial shading steps are calculated incorrectly -[PDFBOX-2884] - NPE in FontMapper.getFont() -[PDFBOX-2885] - NPE in PDNonTerminalField.getChildren() -[PDFBOX-2886] - "IllegalArgumentException root cannot be null" in 2.0.0 for file that was parsed in 1.8.x -[PDFBOX-2887] - NPE in PDFXrefStreamParser in 2.0 trunk -[PDFBOX-2896] - XMPBox not creating valid "title" entry in DublinCoreSchema in trunk -[PDFBOX-2898] - Incorrect key for color space in PDGroup -[PDFBOX-2899] - Text not rendered in mode 7 (2) -[PDFBOX-2901] - High CPU load and OutOfMemoryError when rendering shading -[PDFBOX-2904] - IndexOutOfBoundsException in CFFType1Font.getType2CharString() -[PDFBOX-2906] - NullPointerException in PDFStreamEngine.showText -[PDFBOX-2908] - PDFTextStripper.writeText is slow -[PDFBOX-2909] - NullPointerException when rendering shading with no function -[PDFBOX-2911] - Merge does not close input streams -[PDFBOX-2916] - ArrayIndexOutOfBoundsException in CmapSubtable.processSubtype6 -[PDFBOX-2924] - ClassCastException when doing PDFSplit -[PDFBOX-2927] - Print with PrintRanges printRequestAttribute causing document to be cropped -[PDFBOX-2929] - "Illegal instruction: 4" with PDFToImage -[PDFBOX-2930] - PDFPageable does not rotate portrait document with 90°/270° rotation well -[PDFBOX-2932] - NPE in PDSignature.getValuesAsString() when field contains no value -[PDFBOX-2935] - Problem while extracting font from PDFontSetting (used in PDExtendedGraphicsState) -[PDFBOX-2937] - Field duplication in PDIndexed color space -[PDFBOX-2939] - PDFRenderer.renderImageWithDPI exception with certain PDFs -[PDFBOX-2946] - Symbol glyphs not aligned -[PDFBOX-2948] - NPE in PDStream.createInputStream -[PDFBOX-2949] - Rendering to ARGB brings black background -[PDFBOX-2950] - Chinese font substitution issue -[PDFBOX-2951] - quotedbl causes NullPointerException -[PDFBOX-2956] - PDFontDescriptor doesn't contain method getCIDSet. -[PDFBOX-2958] - TIFF-Predictor with 1 bit per component not supported -[PDFBOX-2959] - type3 font glyphs overlapped -[PDFBOX-2960] - ClassCastException when pattern name is indirect object -[PDFBOX-2961] - Checkbox with multiple widgets doesn't reflect check() state. -[PDFBOX-2965] - NPE in PDAcroForm.getField() if the /Fields entry is missing -[PDFBOX-2966] - Glyphs overlapping in rendering -[PDFBOX-2969] - RandomAccessBuffer clone is broken for non-default chunk size -[PDFBOX-2971] - CalGray white rendered as cyan -[PDFBOX-2972] - Exception when RenderingIntent value is not one of the predefined. -[PDFBOX-2976] - java.util.zip.DataFormatException: incorrect data check -[PDFBOX-2982] - fix ClassCastExceptions in operator methods -[PDFBOX-2985] - Potential NPE in PDMarkedContent#getMCID() -[PDFBOX-2986] - Potential resource leak in TTFParser's use of RAFDataStream -[PDFBOX-2989] - LZW decode filter shouldn't throw IndexOutOfBoundsException -[PDFBOX-2990] - PDDocument.load fails to load a PDF document. -[PDFBOX-2992] - Add .gitignore -[PDFBOX-2995] - PDAcroForm getDefaultAppearance throws NPE if DA is not defined -[PDFBOX-2996] - StackOverflow in Quicksort -[PDFBOX-3001] - FileSystemFontProvider cache instability -[PDFBOX-3002] - PDF files not closed after load fails -[PDFBOX-3003] - Incorrect color space processing for inline images -[PDFBOX-3005] - Incorrect property names for lists -[PDFBOX-3008] - Memory leak in preflight -[PDFBOX-3010] - SignatureOptions object must not be closed before calling saveIncremental in trunk's CreateVisibleSignature example -[PDFBOX-3012] - PDAcroForm flatten() throws ClassCastException -[PDFBOX-3013] - Incorrect accordance between attributes and properties -[PDFBOX-3014] - ZapfDingbats not finding a substitute in Windows 8.1 Pro -[PDFBOX-3018] - IOException "head is mandatory" when using getOriginalData() of TT font from TTC file -[PDFBOX-3019] - Unwanted spaces in text extraction -[PDFBOX-3021] - Class Cast Exception: COSString -> COSName -[PDFBOX-3022] - Maven repos should be https -[PDFBOX-3025] - Test case for unwanted spaces in text extraction -[PDFBOX-3027] - Incorrect enumeration of conformances for PDFAIdentificationSchema -[PDFBOX-3033] - Usage methods references incorrect package -[PDFBOX-3034] - Newly created XRef stream has direct root objects -[PDFBOX-3035] - Files with missing xref table must fail -[PDFBOX-3037] - Text extraction decodes image files -[PDFBOX-3038] - Text extraction shows glyphs with zero height -[PDFBOX-3041] - Wrong default type in Xref stream W0 element -[PDFBOX-3042] - Bad space calculation in text extraction -[PDFBOX-3056] - Make PDFTextStreamEngine public -[PDFBOX-3067] - Text strings being returned as single characters, regression from version 1.8 -[PDFBOX-3070] - Incorrect DefaultRGB color space obtain -[PDFBOX-3073] - Change to use media box for page size instead of cropbox. -[PDFBOX-3075] - Changed to the getHeight function for fonts so it will return a more accurate height -[PDFBOX-3076] - Type3 Font that is getting zero height text, even in latest 2.0 -[PDFBOX-3081] - Create example to draw glyph sizes in rendered images -[PDFBOX-3082] - High memory consumption while building font cache -[PDFBOX-3083] - Form fields are missing when rendering -[PDFBOX-3087] - Metadata stream should not be compressed -[PDFBOX-3090] - ArrayIndexOutOfBoundsException in CmapSubtable.processSubtype2 -[PDFBOX-3091] - java.lang.ClassCastException: org.apache.fontbox.cff.CharStringCommand cannot be cast to java.lang.Integer -[PDFBOX-3093] - Exception in TTFParser -[PDFBOX-3094] - Merging PDFs with a Form is not retaining the field name values -[PDFBOX-3095] - Space size NaN -[PDFBOX-3097] - ClassCastException in Axial / Radial shading when object reference in extends -[PDFBOX-3102] - getGlyphs returns empty array now -[PDFBOX-3105] - Image with mask missing in rendering -[PDFBOX-3106] - Allow access to font data -[PDFBOX-3107] - Asterisk character not displaying properly in Adobe Reader -[PDFBOX-3108] - Font cache is always rebuilt when font skipped -[PDFBOX-3109] - DrawPrintTextLocations with incorrect coordinates when cropbox -[PDFBOX-3110] - Extract by beads doesn't work -[PDFBOX-3114] - Visible signatures in different pages changes previous revision -[PDFBOX-3130] - Recent regression in PDFTextStripper, text getting garbled -[PDFBOX-3139] - Custom FontMapper cant be used -[PDFBOX-3140] - Different fallback font rendering first and second time -[PDFBOX-3141] - Link annotation borders not rendered -[PDFBOX-3143] - Added PDEmbeddedFile constructor with COSName parameter -[PDFBOX-3144] - NullPointerException in TTFSubsetter -[PDFBOX-3145] - Security manager fails for .pdfbox.cache -[PDFBOX-3146] - Ink annotation borders not rendered -[PDFBOX-3148] - Multiline fields won't get rendered correctly if there are multiple paragraphs in field value -[PDFBOX-3149] - Failure to decrypt empty strings (AES 128) -[PDFBOX-3151] - getStringWidth is terribly slow (and resulting document is invalid) -[PDFBOX-3152] - NullPointerException in PDType1Font.encode() with centered dot -[PDFBOX-3153] - Direct JPEG extraction results in invalid images in 2.0.0 releases. -[PDFBOX-3154] - PDDocumentCatalog.getDocumentCatalog().getPages().getCount() returns 0 - first page is -1 -[PDFBOX-3155] - org.apache.pdfbox.util.PDFTextStripper class initialization throws NumberFormatException with recent Verona-enabled Java 9 JVMs -[PDFBOX-3157] - PDOutputIntent has N=3 (RGB) hardcoded -[PDFBOX-3160] - Problem with org.apache.xmpbox.DateConverter -[PDFBOX-3164] - XFDF annotations partially incorrectly applied to existing PDF or exceptions when parsing -[PDFBOX-3167] - IllegalArgumentException: dash lengths all zero -[PDFBOX-3169] - SaveIncremental does not work without signature -[PDFBOX-3172] - PDPage.getContentStreams() always returns empty when content stream field is an array -[PDFBOX-3173] - Signature dictionary is not decrypted in encrypted files -[PDFBOX-3175] - PDFTextStreamEngine probably miscalculates text height -[PDFBOX-3179] - PDDocument.load() Error: Expected a long type at offset 2, instead got 'DF-1.4' -[PDFBOX-3181] - java.lang.ArrayIndexOutOfBoundsException: Coordinate out of bounds! in org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory.createFromImage -[PDFBOX-3184] - Throwing in PDType1Font.encode for chars above 255 is wrong. -[PDFBOX-3187] - NullPointerException CFFParser +[PDFBOX-5051] - Slow rendering for specific PDF file +[PDFBOX-5134] - Very slow rendering on PageDrawer.shadingFill +[PDFBOX-5135] - Glyphs missed in rendering +[PDFBOX-5137] - Wrong classification of an JPEG image leading to a blank image added to a pdf document +[PDFBOX-5138] - Embedded files not extracted from PDF files with multilevel EmbeddedFiles tree +[PDFBOX-5150] - 3.0.0-RC1: PDComboBox.setValue() throws IllegalArgumentException: /DA is a required entry +[PDFBOX-5151] - Issue with COSObjectKey::fixGeneration +[PDFBOX-5155] - Error extracting text from PDF - Can't read the embedded Type1 font FDFBJU+NewsGothic +[PDFBOX-5156] - Error in identification of PDF comment symbol % as a token separator with PDF names +[PDFBOX-5163] - Stack overflow when reading a corrupt dictionary +[PDFBOX-5168] - dash pattern [0] should be invisible +[PDFBOX-5175] - Behaviour change in 2.0.20 due to use of IOUtils.populateBuffer in SecurityHandler.prepareAESInitializationVector leading to IOException for certain PDF +[PDFBOX-5176] - java.io.IOException: Page tree root must be a dictionary +[PDFBOX-5180] - Snapshot Deploy not working +[PDFBOX-5187] - TSAClient with username+password +[PDFBOX-5188] - COSOutputStream.flush doesn't call super +[PDFBOX-5190] - BaseParser: stack overflow when reading a corrupt pdf +[PDFBOX-5191] - isEmbeddingPermitted() is too restrictive on TTFs with OS2 table versions 0-2 +[PDFBOX-5192] - Wild rendering when repeating truetype glyph flag is outside of range +[PDFBOX-5193] - v2.0.22 and v3.0.0-RC1 PDF Debugger app crashes with java.lang.NullPointerException +[PDFBOX-5194] - CreateCheckBox example draws too large, clipped checkmark +[PDFBOX-5196] - Wrong color space detected for some Jpeg images +[PDFBOX-5199] - Possible memory leak after calling decode filter +[PDFBOX-5204] - Ink annotation not rendered Improvement -[PDFBOX-193] - Getting tiff - PDCcitt.TiffWrapper object -[PDFBOX-408] - Optional logger calls could be added to COSDocument & PDJpeg when an error occurs. -[PDFBOX-678] - Support missing Text Rendering Modes when rendering a PDF -[PDFBOX-870] - PDF-To-IMAGE output is not anti-aliased -[PDFBOX-996] - need to insert a child as the first child of an outline but you can only append to the outline. -[PDFBOX-1083] - PDType0Font incomplete -[PDFBOX-1094] - Pattern colorspace support -[PDFBOX-1167] - PDFStreamEngine#processSubStream should throw original IOException instead of RuntimeException + FIX -[PDFBOX-1182] - Create a module for the commandline tools -[PDFBOX-1213] - Adding style information to the PDF to HTML converter -[PDFBOX-1270] - Change internal page resolution to float everywhere -[PDFBOX-1329] - Update PDPage to enum -[PDFBOX-1356] - Support lucene 3.6.0 -[PDFBOX-1384] - Proposals for a new PDNameTreeNode and PDNumberTreeNode -[PDFBOX-1402] - Improve handling of multiline text boxes -[PDFBOX-1444] - Capability to use custom PageDrawer in PDPage.convertToImage -[PDFBOX-1503] - Double logging of exceptions -[PDFBOX-1523] - Manifest should support Specification entries -[PDFBOX-1543] - Remove the ReplaceString example -[PDFBOX-1564] - Extending COSName to produce PDF/A with correct OutputIntents -[PDFBOX-1566] - reduce duplicated code and add caching to pdpagenode -[PDFBOX-1587] - Update the dependency on Bouncy Castle to 1.48 -[PDFBOX-1591] - Resources should implement java.io.closeable -[PDFBOX-1594] - Add support for AES256 Encryption -[PDFBOX-1596] - OverlayPDF logic should be moved into a library class -[PDFBOX-1613] - The ability to inject the time/random component into the COSWriter process to write a PDF document allows some advanced signature creation scenarios where the signature is generated on a separate server that does not hold the full PDF document. -[PDFBOX-1621] - Add setModifiedDate(Calendar c) to PDAnnotation -[PDFBOX-1645] - [PATCH] Improved the accuracy of the bounding box for each rendered CFF glyph -[PDFBOX-1648] - FontBox can't load CMaps with no spaces between tokens -[PDFBOX-1656] - Enable PDFMergeUtility to merge Encrypted PDFs -[PDFBOX-1665] - Replace external glyphlist.txt with our onw implementation -[PDFBOX-1667] - org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent throws Exception while it can throw IOException instead -[PDFBOX-1669] - Update the dependency on Bouncy Castle to 1.49 -[PDFBOX-1687] - add dispose() in pdfbox\pdmodel\PDPage.convertToImage() -[PDFBOX-1690] - Add description to embedded file -[PDFBOX-1695] - Improve pdfbox tests -[PDFBOX-1698] - Remove the print and the convertToImage stuff from PDPage and PDDocument -[PDFBOX-1702] - Performance improvement in PDPageContentStream.drawString -[PDFBOX-1707] - Add dispose() when done with graphics -[PDFBOX-1720] - BouncyCastle 1.49: ambigous constructor usage -[PDFBOX-1734] - ImageIoUtil.WriteImage doesn't work with tiff images -[PDFBOX-1738] - PDF with parsing IOException -[PDFBOX-1739] - Load document error for two RegisSTAR documents -[PDFBOX-1744] - Be resilient to PDFs with missing version info -[PDFBOX-1782] - Add getMaxLength() and setMaxLength() methods to PDTextbox -[PDFBOX-1784] - Update parent pom/rat plugin version -[PDFBOX-1798] - Performance problem with PDDocument.saveIncremental (when signing document) -[PDFBOX-1815] - Suggestion: close files in COSStream -[PDFBOX-1820] - Suggestion: close streams in PDIndex and PDJpeg -[PDFBOX-1828] - Remove not needed CMaps -[PDFBOX-1833] - BaseParser tidy up -[PDFBOX-1834] - Remove old Overlay implementation -[PDFBOX-1836] - Use the latest dependencies -[PDFBOX-1839] - PDFImageWriter default BufferedImage type makes output colors look poor -[PDFBOX-1840] - Automatically load isartor for preflight tests -[PDFBOX-1844] - [PATCH] Parser for Type 1 Fonts -[PDFBOX-1847] - TSA Time Signature -[PDFBOX-1850] - Speed up TestImageIOUtils -[PDFBOX-1851] - [PATCH] Improved CMYK color space conversion -[PDFBOX-1852] - [PATCH] Alternative patch to speed up TestImageIOUtils -[PDFBOX-1854] - Include AFM files for Core 14 fonts -[PDFBOX-1889] - Remove the ConvertColorspace class -[PDFBOX-1890] - Merge PdfDecompressor and WriteDecodedDoc -[PDFBOX-1891] - Remove the ant module -[PDFBOX-1893] - Refactor color spaces -[PDFBOX-1897] - There are some errors within the source code documentation (javadocs) -[PDFBOX-1902] - generics added to maputil -[PDFBOX-1903] - refactor pdmodel (pdpage) -[PDFBOX-1905] - Remove the PDPage reference from PageDrawer/PDFStreamEngine -[PDFBOX-1906] - Don't use a src subdirectory as output directory for a test case -[PDFBOX-1909] - Close open streams -[PDFBOX-1914] - Shading package: Move "function" methods to base class and more refactoring -[PDFBOX-1915] - Implement shading with Coons and tensor-product patch meshes -[PDFBOX-1941] - Refactor PageDrawer operators -[PDFBOX-1943] - Move pdfbox-tools to its own package -[PDFBOX-1959] - Remove AWT Fonts -[PDFBOX-1962] - Refactor the packages in the core pdfbox module -[PDFBOX-1963] - PDFImageWriter doesn't make use of PDFStreamEngine -[PDFBOX-1964] - PDFMergerUtility support merging using non sequential parser -[PDFBOX-1972] - WrappedIOException no longer needed in Java 1.6 -[PDFBOX-1973] - Exception Refactoring (Don't wrap Exceptions with COSVisitorException) -[PDFBOX-1976] - DocumentEncryption and PDFEncryption are deprecated and should be removed -[PDFBOX-1982] - Standardise AcroForm Fields -[PDFBOX-1985] - Replace List with List in PDDocument and PDPageNode -[PDFBOX-1986] - Move SecurityHandler to PDEncryptionDictionary -[PDFBOX-1989] - Save LZW and other encoded PDImageXObject resources -[PDFBOX-1990] - Support creating PDF from lossless encoded images -[PDFBOX-1991] - Shading PaintContexts should not depend on the page height -[PDFBOX-2002] - Show deprecation in the build / fix deprecated calls / delete longtime deprecated stuff -[PDFBOX-2034] - TestFilters is non-deterministic -[PDFBOX-2039] - Class PDDocument should implement java.io.Closeable -[PDFBOX-2051] - PDFPrinter does not use getPageable() -[PDFBOX-2052] - PDFCloneUtility does not handle COSStreamArray -[PDFBOX-2066] - RubberStampWithImage should support more image types -[PDFBOX-2068] - Add filter parameter to PDImageXObject(document, filteredStream) constructor -[PDFBOX-2071] - Insert inline image in page content stream -[PDFBOX-2088] - Support Bouncycastle 1.50 -[PDFBOX-2094] - Add PrintRequestAttributeSet parameter to silentPrint() -[PDFBOX-2097] - Remove pdfbox-war subproject -[PDFBOX-2099] - Improve handling and writing of header and trailer versions -[PDFBOX-2104] - Implement transparency groups -[PDFBOX-2105] - Support for multipage TIFFs in CCITTFactory, makes PDFBox capable of doing tiff2pdf -[PDFBOX-2107] - Make PDFBox XMP library agnostic -[PDFBOX-2113] - Update documentation to reflect the requirement for JBIG2 decoders -[PDFBOX-2118] - Remove ICU4J dependency -[PDFBOX-2123] - Optimize reading of 1-bit depth images in SampleImageReader -[PDFBOX-2126] - Optimize clipping -[PDFBOX-2127] - Optimize calls of getPixel in SampledImageReader and PDImageXObject -[PDFBOX-2129] - Add PDFBox version to the title -[PDFBOX-2131] - Avoid constructing debug messages if debug log is off -[PDFBOX-2132] - Provide a pluggable exception handler to PDFStreamEngine -[PDFBOX-2136] - Use the Type1Parser to extract the encoding -[PDFBOX-2144] - Provide a pluggable font manager -[PDFBOX-2146] - remove unused imports / fix imports -[PDFBOX-2148] - Handle the Fully Qualified Name of duplicate fields better -[PDFBOX-2149] - Font Refactoring -[PDFBOX-2151] - Replace log4j with commons logging -[PDFBOX-2152] - Unable to print the PDF with Acrobat shrink to fit print -[PDFBOX-2157] - Remove AFMFormatter -[PDFBOX-2174] - Suppress the Dock icon on OS X -[PDFBOX-2196] - [PATCH] Type safety in PDNameTreeNode and PDNumberTreeNode via generics -[PDFBOX-2205] - (Graphics) Operator Refactoring -[PDFBOX-2220] - [PATCH] Differences array without BaseEncoding (Type1C) -[PDFBOX-2239] - Add missing values to PDComplexFileSpecification -[PDFBOX-2250] - Improve XRef self healing mechanism -[PDFBOX-2262] - Remove usage of AWT fonts -[PDFBOX-2269] - Support for AES-256 Rev. 5 Decryption (Acrobat 9) -[PDFBOX-2294] - Improve vertical text drawing as an experiment -[PDFBOX-2302] - Make better use of RenderingHints -[PDFBOX-2303] - Lazy loading of glyphs in TrueType fonts -[PDFBOX-2328] - Give PDColor access to its underling PDColorSpace -[PDFBOX-2329] - add toString method to PDRange -[PDFBOX-2333] - Overhaul the appearance generation for PDF forms -[PDFBOX-2362] - Remove .properties file usage in PDFStreamEngine -[PDFBOX-2366] - Improve high-level font APIs -[PDFBOX-2370] - Move caching outside of PDResources -[PDFBOX-2374] - Make JavaDocs for trunk builds available via our website -[PDFBOX-2386] - Move operators and content streams out of "pdfbox.util" -[PDFBOX-2387] - ImageIOUtil, JPEGUtil, TIFFUtil and MetaUtil are not needed in "pdfbox" -[PDFBOX-2388] - Move printing classes into top-level "printing" package -[PDFBOX-2389] - Move Encoding classes into "font" package -[PDFBOX-2391] - Use an enum for RenderingIntent -[PDFBOX-2392] - PDPropertyList belongs in "markedcontent" package -[PDFBOX-2394] - Add example code to extract embedded files in annotations -[PDFBOX-2414] - Allow non-sequential parser for PDFMerger in app -[PDFBOX-2423] - Page tree handling needs rewriting -[PDFBOX-2430] - Make the non-sequential parser the default parser -[PDFBOX-2440] - xref stream is saved as table -[PDFBOX-2444] - Add radial shading example -[PDFBOX-2452] - Continuous log "Nonsymbolic Type 0 font: SNCFYS+ARStdKai" -[PDFBOX-2456] - create TestSymmetricKeyEncryption.java -[PDFBOX-2459] - Share functionality between Page Tree and Field Tree -[PDFBOX-2461] - Clear Checkstyle errors in source -[PDFBOX-2464] - Document crypto build dependencies -[PDFBOX-2467] - "Arial,Bold" always substituted with "Helvetica-Bold" -[PDFBOX-2468] - Switch FDFDocument.load from PDFParser to NonSequentialParser -[PDFBOX-2473] - Remove the CopyDoc example -[PDFBOX-2474] - Remove the direct usage of PDFParser -[PDFBOX-2515] - Improve the non sequential parser to be used when signing a pdf -[PDFBOX-2516] - Further align AcroForms and Fields PDModel with PDF specification -[PDFBOX-2530] - Improve PDFDebugger -[PDFBOX-2565] - Subset embedded TTF fonts -[PDFBOX-2566] - Remove logging from operator classes -[PDFBOX-2580] - Decouple implementation specific forms handling from interactive.form PD Model -[PDFBOX-2587] - PDF takes minutes to convert (sRGB) -[PDFBOX-2591] - Allow using custom Filters -[PDFBOX-2592] - Allow sharing of COS objects between different documents -[PDFBOX-2594] - Set default params in JBIG2Filter -[PDFBOX-2597] - Provide easier access to AcroForm field tree -[PDFBOX-2600] - Remove old parser -[PDFBOX-2623] - PDFPrinter.getPrintable returns Pageable instead of PDFPageable for easier extending -[PDFBOX-2628] - XmpSerializer will never throw XmpSerializationException -[PDFBOX-2645] - Open PDF file from byte array without temp file -[PDFBOX-2669] - Make internal PageDrawer font classes package-private -[PDFBOX-2670] - Move orphaned COSObjectKey class -[PDFBOX-2680] - Move multi-pdf classes from util into their own package -[PDFBOX-2683] - Remove SignatureInterface dependency from COSDocument -[PDFBOX-2689] - Implement page transitions -[PDFBOX-2695] - Iterate PDOutlineNode children -[PDFBOX-2700] - support JPEG color space code 11 (JCS_YCCK) -[PDFBOX-2703] - Remove javacc generated PDFParser from preflight -[PDFBOX-2707] - Remove redundant IOUtils.closeQuietly -[PDFBOX-2716] - Use saveIncremental() method on a document opened with an InputStream does not work -[PDFBOX-2727] - Cache color space instances -[PDFBOX-2735] - Keyboard shortcuts in PDFReader -[PDFBOX-2736] - First page and last page navigation with keyboard shortcuts in PDFReader -[PDFBOX-2744] - Add validation check for setNonStrokingColor and setStrokingColor -[PDFBOX-2748] - Recent files in PDF reader -[PDFBOX-2753] - Improve rendering of filled thin lines -[PDFBOX-2758] - Support additional annotation types when importing XFDF files -[PDFBOX-2764] - Allow setting extended graphics state in PDPageContentStream -[PDFBOX-2777] - Create convenience method to create an XImage object -[PDFBOX-2782] - Enhance toString() output for AcroForm fields -[PDFBOX-2791] - Provide access to Type 1 font data -[PDFBOX-2806] - The 'kern' table type is not supported. -[PDFBOX-2807] - The vertical layout table types 'vhea', 'vmtx', 'VORG' are not supported. -[PDFBOX-2838] - Please make PDPageContentStream non-final -[PDFBOX-2841] - Make it easier to work with RadioButton Groups -[PDFBOX-2842] - Overhaul font substitution -[PDFBOX-2865] - Downgrade logging "Using last-resort fallback for x font" to warn in 2.0.0? -[PDFBOX-2870] - Use animal sniffer maven plugin to detect non java 6 api usage -[PDFBOX-2878] - Align annotation and form public API -[PDFBOX-2880] - Allow Type 1 embedding without AFM file -[PDFBOX-2882] - Improve performance when using scratch file -[PDFBOX-2883] - Unify memory handling -[PDFBOX-2888] - setAllSecurityToBeRemoved(true) before calling protect() should have no effect -[PDFBOX-2892] - Invisible signature annotation violates PDF/A -[PDFBOX-2893] - Simplify COSStream encoding and decoding -[PDFBOX-2894] - Remove COSStreamArray / SequenceRandomAccessRead -[PDFBOX-2905] - Replace PDFReader with PDFDebugger -[PDFBOX-2922] - Printing issues with landscape pages -[PDFBOX-2928] - Add numPages parameter of Book in Printing.printWithPaper example -[PDFBOX-2931] - Make PDFPrintable margin-aligning and centering optional -[PDFBOX-2933] - Drop ant build including .NET build support -[PDFBOX-2943] - PDType3Font.getWidthFromFont not supported -[PDFBOX-2945] - PDType1Font.getNameInFont(String) very slow when Unicode fallback is used -[PDFBOX-2962] - Handle TIFF predictor for bpc 2 and 4 / optimize existing predictor code -[PDFBOX-2973] - Actions shortage -[PDFBOX-2978] - Add support for grouped checkboxes -[PDFBOX-2997] - Make FontMapper into a singleton interface -[PDFBOX-3072] - Allow missing page type -[PDFBOX-3088] - Cache glyph table to optimize concurrent access -[PDFBOX-3103] - Slow performance when printing PDF (fix provided) -[PDFBOX-3104] - Font Cache is taking a lot of time -[PDFBOX-3115] - Fix high memory usage during signing -[PDFBOX-3121] - Buffer save(File) -[PDFBOX-3122] - IllegalArgumentException: dash lengths all zero -[PDFBOX-3131] - Reduce amount of intermediate data and objects to reduce memory footprint/complexity -[PDFBOX-3133] - PDFBox 2.0.0-RC2 and earlier 2.0.0 SNAPSHOT Versions print performance is poor with systems having low RAM < 3GB and lower number of fonts. -[PDFBOX-3137] - Reduce/remove dependency on commons.io in preflight/xmpbox -[PDFBOX-3158] - Add constructor with BufferedImage to PDVisibleSignDesigner -[PDFBOX-3161] - No glyph for U+0009 in font ArialUnicodeMS -[PDFBOX-3163] - PDImageXObject.createFromFile should relies on header bytes -[PDFBOX-3176] - Add a removeRegion method in PDFTextSTripperByArea class -[PDFBOX-3178] - Make PDFTextStreamEngine class public +[PDFBOX-5093] - Pass PDFRenderer to PDFPrintable constructor +[PDFBOX-5141] - Create tests for HelloWorld examples +[PDFBOX-5145] - Faster PDImageXObject.applyMask +[PDFBOX-5154] - Custom folder for fonts in FontMapper +[PDFBOX-5157] - allow to make timestamp only signature "LTV" +[PDFBOX-5164] - Create portable collection PDF +[PDFBOX-5177] - Optimize memory footprint of PDFObjectStreamParser +[PDFBOX-5183] - Add getter/setter for suppressDuplicateOverlappingText in PDFMarkedContentExtractor +[PDFBOX-5200] - Cache PageTree in PDFPrintable +[PDFBOX-5201] - Add Adobe Illustrator COSNames +[PDFBOX-5208] - Make constructors of CIDSystemInfo and PDPanoseClassification public -Feature +Wish -[PDFBOX-52] - DCTFilter is not implemented yet -[PDFBOX-149] - Update encryption algorithms -[PDFBOX-151] - Correct calculation of Type0Font size. -[PDFBOX-615] - shfill operator needs implementation -[PDFBOX-830] - Setting of logical page numbers -[PDFBOX-922] - True type PDFont subclass only supports WinAnsiEncoding (hardcoded!) -[PDFBOX-953] - PDFBox fails to ExtractText from Adobe Acrobat X 256-bit AES encrypted documents -[PDFBOX-1054] - DateConverter: allow for external adding of potential date parsing formats -[PDFBOX-1209] - Add insertSiblingBefore() to PDOutlineItem -[PDFBOX-1223] - Strange color issues with convertToImage method -[PDFBOX-1462] - Use file backed buffer for FlateFilter? -[PDFBOX-1494] - PDF box color distortion -[PDFBOX-1589] - Switch to java 1.6 as minimum requirement for PDFBox -[PDFBOX-1766] - [PATCH] Visible Signature using PDFbox -[PDFBOX-2211] - Create sample code for creating a PDF with shading -[PDFBOX-2276] - Remove Jempbox subproject -[PDFBOX-2400] - Add insertPage() method -[PDFBOX-2624] - "CIDSet entry is missing for the Composite Subset" when creating PDF/A-1b file with PDType0Font.load() -[PDFBOX-2673] - Add output path prefix param in PDFSplit/Splitter -[PDFBOX-2752] - Support TTC font files -[PDFBOX-2766] - Missing PDDocument.load() overload -[PDFBOX-2821] - Add PDDocument(boolean) constructor for creating new documents using scratch files -[PDFBOX-3074] - Mark transparency groups +[PDFBOX-5198] - When merging multiple pdf ua documents, Tags become nested Task -[PDFBOX-1600] - COSDocument and PDDocument declare throws IOException when they don't -[PDFBOX-1675] - Preflight : improve error information -[PDFBOX-1685] - Verify interpretation of rdf:about for PDF/A -[PDFBOX-1975] - Improve TestImageIOUtils unit tests to check image resolution and compression -[PDFBOX-2197] - Add sample how to import a page as PDFormXObject -[PDFBOX-2480] - Add information about Snapshots to download section -[PDFBOX-2576] - Improve code quality -[PDFBOX-2610] - Expand Isartor test for Bavaria test suite and other tests -[PDFBOX-2674] - Remove two unused methods from COSStream -[PDFBOX-2712] - Remove commented out lines of code -[PDFBOX-2762] - remove parseCOSStream() call from PDFStreamParser -[PDFBOX-2768] - Remove VisualSignatureParser -[PDFBOX-3011] - Find out why trunk CreateVisibleSignature example produces incorrect output pdf -[PDFBOX-3020] - Set libraries to current versions for RC -[PDFBOX-3040] - Move website to local build tool - -Test - -[PDFBOX-1584] - Add unit test for RandomAccessFileOutputStream -[PDFBOX-1673] - Tests with selection of files from Adobe Acrobat Engineering website -[PDFBOX-2369] - how to convert pdf to image - -Wish - -[PDFBOX-1187] - Cut dependency between pdfbox and jempbox -[PDFBOX-1224] - Angle units are not consistent -[PDFBOX-1450] - document how to encrypt with AES 256 with the release of 2.0 -[PDFBOX-1540] - Add XML output option to preflight -[PDFBOX-1590] - Unify logging between preflight and other modules -[PDFBOX-1769] - Fix crash on invalid xref -[PDFBOX-1946] - Running within an Applet has many AccessControlException 's -[PDFBOX-2011] - Please extend base class "Encoding" with 2 methods to access global name2char and char2name maps -[PDFBOX-2012] - Extend CMAPEncodingEntry API -[PDFBOX-2013] - Please extend PDTrueTypeFont API -[PDFBOX-2190] - Disable console logging for preflight Isartor tests -[PDFBOX-2209] - [PATCH] Restore shading API -[PDFBOX-2692] - Possibility to use our own and/or overwrite PageDrawer class -[PDFBOX-2738] - Make org.apache.pdfbox.pdmodel.PDDocument#getFontsToSubset public -[PDFBOX-2770] - Provide the sources along with SNAPSHOT releases +[PDFBOX-5133] - Failing testFlattenPDFBox2469Filled on Ubuntu +[PDFBOX-5184] - Add test for PDFMarkedContentExtractor class +[PDFBOX-5186] - Create test for CreateGradientShadingPDF Release Contents ---------------- @@ -1219,10 +70,10 @@ This release consists of a single source archive packaged as a zip file. The archive can be unpacked with the jar tool from your JDK installation. See the README.txt file for instructions on how to build this release. -The source archive is accompanied by SHA1 and MD5 checksums and a PGP -signature that you can use to verify the authenticity of your download. +The source archive is accompanied by a SHA512 checksum and a PGP signature +that you can use to verify the authenticity of your download. The public key used for the PGP signature can be found at -https://svn.apache.org/repos/asf/pdfbox/KEYS. +https://www.apache.org/dist/pdfbox/KEYS. About Apache PDFBox ------------------- @@ -1233,7 +84,7 @@ documents and the ability to extract content from documents. Apache PDFBox also includes several command line utilities. Apache PDFBox is published under the Apache License, Version 2.0. -For more information, visit http://pdfbox.apache.org/ +For more information, visit https://pdfbox.apache.org/ About The Apache Software Foundation ------------------------------------ @@ -1245,4 +96,4 @@ enables individual and commercial users to easily deploy Apache software; the Foundation's intellectual property framework limits the legal exposure of its 2,500+ contributors. -For more information, visit http://www.apache.org/ +For more information, visit https://www.apache.org/ diff --git a/app/pom.xml b/app/pom.xml index ec6a2de6bf5..2841cb2a13a 100644 --- a/app/pom.xml +++ b/app/pom.xml @@ -23,7 +23,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml @@ -49,6 +49,11 @@ bcprov-jdk15on provided + + org.apache.pdfbox + jbig2-imageio + provided + @@ -61,9 +66,9 @@ org.apache.pdfbox.* true - *;scope=provided;inline=org/apache/**|org/bouncycastle/** + *;scope=provided;inline=org/apache/**|org/bouncycastle/**|META-INF/services/** ${project.url} - !junit.framework,!junit.textui,javax.*;resolution:=optional,org.apache.avalon.framework.logger;resolution:=optional,org.apache.log;resolution:=optional,org.apache.log4j;resolution:=optional,* + !junit.framework,!junit.textui,javax.*;resolution:=optional,org.apache.avalon.framework.logger;resolution:=optional,org.apache.log;resolution:=optional,* org.apache.pdfbox.tools.PDFBox diff --git a/app/src/main/appended-resources/META-INF/LICENSE b/app/src/main/appended-resources/META-INF/LICENSE index 57237248ea1..f581b421846 100644 --- a/app/src/main/appended-resources/META-INF/LICENSE +++ b/app/src/main/appended-resources/META-INF/LICENSE @@ -134,3 +134,33 @@ Glyphlist (http://www.adobe.com/devnet/opentype/archives/glyph.html) non-infringement of any third party rights regarding the Adobe materials. +Twelvemonkeys (https://github.com/haraldk/TwelveMonkeys/) + + Copyright (c) 2008-2016, Harald Kuhr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + o Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + o Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + o Neither the name "TwelveMonkeys" nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/app/src/main/appended-resources/META-INF/NOTICE b/app/src/main/appended-resources/META-INF/NOTICE index e8fea2ebf58..a0f9eac0893 100644 --- a/app/src/main/appended-resources/META-INF/NOTICE +++ b/app/src/main/appended-resources/META-INF/NOTICE @@ -1,2 +1,14 @@ Based on source code originally developed in the PDFBox and FontBox projects. Copyright (c) 2002-2007, www.pdfbox.org + +Includes the Adobe Glyph List +Copyright 1997, 1998, 2002, 2007, 2010 Adobe Systems Incorporated. + +Includes the Zapf Dingbats Glyph List +Copyright 2002, 2010 Adobe Systems Incorporated. + +Includes the Bidi Mirroring Glyph Property (BidiMirroring-8.0.0.txt) +Copyright 1991-2015 Unicode, Inc. + +Includes parts of TwelveMonkeys ImageIO +Copyright 2008-2016 Harald Kuhr diff --git a/debugger-app/README.md b/debugger-app/README.md new file mode 100644 index 00000000000..59db786bd31 --- /dev/null +++ b/debugger-app/README.md @@ -0,0 +1,29 @@ + + +# Apache PDFBox PDF Debugger App + +## Package for distribution on Windows, Linux and macOS + +To package the Apache PDFBox Debugger App for distribution on Windows, Linux +and macOS [packr](https://github.com/libgdx/packr) provides a way to do so. + +The projects [Readme](https://github.com/libgdx/packr#readme) describes the steps needed +for the various platforms. + +**NOTE:** The Apache PDFBox project provides no support to create such packages. +The Information shall be treated as a help for the interested developer. diff --git a/debugger-app/pom.xml b/debugger-app/pom.xml index 1b6432bfb95..83476346f2e 100644 --- a/debugger-app/pom.xml +++ b/debugger-app/pom.xml @@ -23,7 +23,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml @@ -49,6 +49,11 @@ bcprov-jdk15on provided + + org.apache.pdfbox + jbig2-imageio + provided + @@ -61,9 +66,9 @@ org.apache.pdfbox.* true - *;scope=provided;inline=org/apache/**|org/bouncycastle/** + *;scope=provided;inline=org/apache/**|org/bouncycastle/**|META-INF/services/** ${project.url} - !junit.framework,!junit.textui,javax.*;resolution:=optional,org.apache.avalon.framework.logger;resolution:=optional,org.apache.log;resolution:=optional,org.apache.log4j;resolution:=optional,* + !junit.framework,!junit.textui,javax.*;resolution:=optional,org.apache.avalon.framework.logger;resolution:=optional,org.apache.log;resolution:=optional,* org.apache.pdfbox.debugger.PDFDebugger diff --git a/debugger-app/src/main/appended-resources/META-INF/LICENSE b/debugger-app/src/main/appended-resources/META-INF/LICENSE index 57237248ea1..f581b421846 100644 --- a/debugger-app/src/main/appended-resources/META-INF/LICENSE +++ b/debugger-app/src/main/appended-resources/META-INF/LICENSE @@ -134,3 +134,33 @@ Glyphlist (http://www.adobe.com/devnet/opentype/archives/glyph.html) non-infringement of any third party rights regarding the Adobe materials. +Twelvemonkeys (https://github.com/haraldk/TwelveMonkeys/) + + Copyright (c) 2008-2016, Harald Kuhr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + o Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + o Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + o Neither the name "TwelveMonkeys" nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/debugger-app/src/main/appended-resources/META-INF/NOTICE b/debugger-app/src/main/appended-resources/META-INF/NOTICE index e8fea2ebf58..a0f9eac0893 100644 --- a/debugger-app/src/main/appended-resources/META-INF/NOTICE +++ b/debugger-app/src/main/appended-resources/META-INF/NOTICE @@ -1,2 +1,14 @@ Based on source code originally developed in the PDFBox and FontBox projects. Copyright (c) 2002-2007, www.pdfbox.org + +Includes the Adobe Glyph List +Copyright 1997, 1998, 2002, 2007, 2010 Adobe Systems Incorporated. + +Includes the Zapf Dingbats Glyph List +Copyright 2002, 2010 Adobe Systems Incorporated. + +Includes the Bidi Mirroring Glyph Property (BidiMirroring-8.0.0.txt) +Copyright 1991-2015 Unicode, Inc. + +Includes parts of TwelveMonkeys ImageIO +Copyright 2008-2016 Harald Kuhr diff --git a/debugger/pom.xml b/debugger/pom.xml index 35cf888b2a6..a0588121ffd 100644 --- a/debugger/pom.xml +++ b/debugger/pom.xml @@ -23,7 +23,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml @@ -48,14 +48,14 @@ pdfbox ${project.version} - - com.levigo.jbig2 - levigo-jbig2-imageio + org.apache.pdfbox + jbig2-imageio test + com.github.jai-imageio jai-imageio-core @@ -75,6 +75,19 @@ true + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.pdfbox.debugger + + + + + diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java b/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java index 11bc176ab45..e64a363594c 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java @@ -18,8 +18,10 @@ import java.awt.BorderLayout; import java.awt.Component; +import java.awt.Cursor; import java.awt.Dimension; import java.awt.FileDialog; +import java.awt.Frame; import java.awt.Toolkit; import java.awt.datatransfer.DataFlavor; import java.awt.datatransfer.Transferable; @@ -31,7 +33,6 @@ import java.awt.event.WindowEvent; import java.awt.print.PrinterException; import java.awt.print.PrinterJob; -import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; @@ -42,9 +43,14 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import javax.imageio.spi.IIORegistry; +import javax.print.attribute.HashPrintRequestAttributeSet; +import javax.print.attribute.PrintRequestAttributeSet; +import javax.print.attribute.standard.Sides; import javax.swing.AbstractAction; import javax.swing.Action; +import javax.swing.JCheckBoxMenuItem; import javax.swing.JComponent; import javax.swing.JFrame; import javax.swing.JLabel; @@ -90,17 +96,30 @@ import org.apache.pdfbox.debugger.ui.ErrorDialog; import org.apache.pdfbox.debugger.ui.ExtensionFileFilter; import org.apache.pdfbox.debugger.ui.FileOpenSaveDialog; +import org.apache.pdfbox.debugger.ui.ImageTypeMenu; +import org.apache.pdfbox.debugger.ui.LogDialog; import org.apache.pdfbox.debugger.ui.MapEntry; import org.apache.pdfbox.debugger.ui.OSXAdapter; import org.apache.pdfbox.debugger.ui.PDFTreeCellRenderer; import org.apache.pdfbox.debugger.ui.PDFTreeModel; import org.apache.pdfbox.debugger.ui.PageEntry; +import org.apache.pdfbox.debugger.ui.ReaderBottomPanel; import org.apache.pdfbox.debugger.ui.RecentFiles; +import org.apache.pdfbox.debugger.ui.RenderDestinationMenu; import org.apache.pdfbox.debugger.ui.RotationMenu; import org.apache.pdfbox.debugger.ui.Tree; +import org.apache.pdfbox.debugger.ui.WindowPrefs; import org.apache.pdfbox.debugger.ui.ZoomMenu; +import org.apache.pdfbox.filter.FilterFactory; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.common.PDPageLabels; +import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences; import org.apache.pdfbox.printing.PDFPageable; /** @@ -110,6 +129,7 @@ * @author Ben Litchfield * @author Khyrul Bashar */ +@SuppressWarnings({"serial","squid:MaximumInheritanceDepth","squid:S1948"}) public class PDFDebugger extends JFrame { private static final Set SPECIALCOLORSPACES = @@ -119,6 +139,7 @@ public class PDFDebugger extends JFrame new HashSet(Arrays.asList(COSName.ICCBASED, COSName.PATTERN, COSName.CALGRAY, COSName.CALRGB, COSName.LAB)); + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; private static final String VIEW_STRUCTURE = "-viewstructure"; @@ -127,6 +148,7 @@ public class PDFDebugger extends JFrame private TreeStatusPane statusPane; private RecentFiles recentFiles; + private WindowPrefs windowPrefs; private boolean isPageMode; private PDDocument document; @@ -135,10 +157,10 @@ public class PDFDebugger extends JFrame private static final String OS_NAME = System.getProperty("os.name").toLowerCase(); private static final boolean IS_MAC_OS = OS_NAME.startsWith("mac os x"); - private JScrollPane jScrollPane1; - private JScrollPane jScrollPane2; - private javax.swing.JSplitPane jSplitPane1; - private javax.swing.JTextPane jTextPane1; + private JScrollPane jScrollPaneRight; + private javax.swing.JSplitPane jSplitPane; + private javax.swing.JTextPane jTextPane; + private ReaderBottomPanel statusBar; private Tree tree; private final JPanel documentPanel = new JPanel(); @@ -147,6 +169,7 @@ public class PDFDebugger extends JFrame private JMenuItem saveMenuItem; private JMenu recentFilesMenu; private JMenuItem printMenuItem; + private JMenuItem reopenMenuItem; // edit > find menu private JMenu findMenu; @@ -157,6 +180,9 @@ public class PDFDebugger extends JFrame // view menu private JMenuItem viewModeItem; + public static JCheckBoxMenuItem allowSubsampling; + public static JCheckBoxMenuItem repairAcroFormMenuItem; + /** * Constructor. */ @@ -179,16 +205,16 @@ public PDFDebugger(boolean viewPages) */ private void initComponents() { - jSplitPane1 = new javax.swing.JSplitPane(); - jScrollPane1 = new JScrollPane(); + jSplitPane = new javax.swing.JSplitPane(); + JScrollPane jScrollPaneLeft = new JScrollPane(); tree = new Tree(this); - jScrollPane2 = new JScrollPane(); - jTextPane1 = new javax.swing.JTextPane(); + jScrollPaneRight = new JScrollPane(); + jTextPane = new javax.swing.JTextPane(); tree.setCellRenderer(new PDFTreeCellRenderer()); tree.setModel(null); - setTitle("PDFBox Debugger"); + setTitle("Apache PDFBox Debugger"); addWindowListener(new java.awt.event.WindowAdapter() { @@ -202,12 +228,14 @@ public void windowOpened(WindowEvent windowEvent) @Override public void windowClosing(WindowEvent evt) { - exitForm(evt); + exitMenuItemActionPerformed(null); } }); - - jScrollPane1.setBorder(new BevelBorder(BevelBorder.RAISED)); - jScrollPane1.setPreferredSize(new Dimension(300, 500)); + + windowPrefs = new WindowPrefs(this.getClass()); + + jScrollPaneLeft.setBorder(new BevelBorder(BevelBorder.RAISED)); + jSplitPane.setDividerLocation(windowPrefs.getDividerLocation()); tree.addTreeSelectionListener(new TreeSelectionListener() { @Override @@ -217,15 +245,14 @@ public void valueChanged(TreeSelectionEvent evt) } }); - jScrollPane1.setViewportView(tree); + jScrollPaneLeft.setViewportView(tree); - jSplitPane1.setRightComponent(jScrollPane2); - jSplitPane1.setDividerSize(3); - - jScrollPane2.setPreferredSize(new Dimension(300, 500)); - jScrollPane2.setViewportView(jTextPane1); + jSplitPane.setRightComponent(jScrollPaneRight); + jSplitPane.setDividerSize(3); - jSplitPane1.setLeftComponent(jScrollPane1); + jScrollPaneRight.setViewportView(jTextPane); + + jSplitPane.setLeftComponent(jScrollPaneLeft); JScrollPane documentScroller = new JScrollPane(); documentScroller.setViewportView(documentPanel); @@ -235,7 +262,10 @@ public void valueChanged(TreeSelectionEvent evt) statusPane.getPanel().setPreferredSize(new Dimension(300, 25)); getContentPane().add(statusPane.getPanel(), BorderLayout.PAGE_START); - getContentPane().add(jSplitPane1, BorderLayout.CENTER); + getContentPane().add(jSplitPane, BorderLayout.CENTER); + + statusBar = new ReaderBottomPanel(); + getContentPane().add(statusBar, BorderLayout.SOUTH); // create menus JMenuBar menuBar = new JMenuBar(); @@ -244,8 +274,8 @@ public void valueChanged(TreeSelectionEvent evt) menuBar.add(createViewMenu()); setJMenuBar(menuBar); - Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); - setBounds((screenSize.width-700)/2, (screenSize.height-600)/2, 700, 600); + setExtendedState(windowPrefs.getExtendedState()); + setBounds(windowPrefs.getBounds()); // drag and drop to open files setTransferHandler(new TransferHandler() @@ -266,19 +296,30 @@ public boolean importData(TransferSupport transferSupport) List files = (List) transferable.getTransferData( DataFlavor.javaFileListFlavor); readPDFFile(files.get(0), ""); - return true; } catch (IOException e) { - throw new RuntimeException(e); + new ErrorDialog(e).setVisible(true); } catch (UnsupportedFlavorException e) { throw new RuntimeException(e); } + return true; } }); + initGlobalEventHandlers(); + } + + /** + * Initialize application global event handlers. Protected to allow + * subclasses to override this method if they don't want the global event + * handler overridden. + */ + @SuppressWarnings("WeakerAccess") + protected void initGlobalEventHandlers() + { // Mac OS X file open/quit handler if (IS_MAC_OS) { @@ -314,6 +355,7 @@ public void actionPerformed(ActionEvent evt) JMenu fileMenu = new JMenu("File"); fileMenu.add(openMenuItem); + fileMenu.setMnemonic('F'); JMenuItem openUrlMenuItem = new JMenuItem("Open URL..."); openUrlMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_U, SHORCUT_KEY_MASK)); @@ -338,6 +380,33 @@ public void actionPerformed(ActionEvent evt) } }); fileMenu.add(openUrlMenuItem); + + reopenMenuItem = new JMenuItem("Reopen"); + reopenMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_R, SHORCUT_KEY_MASK)); + reopenMenuItem.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent evt) + { + try + { + if (currentFilePath.startsWith("http")) + { + readPDFurl(currentFilePath, ""); + } + else + { + readPDFFile(currentFilePath, ""); + } + } + catch (IOException e) + { + new ErrorDialog(e).setVisible(true); + } + } + }); + reopenMenuItem.setEnabled(false); + fileMenu.add(reopenMenuItem); try { @@ -354,6 +423,7 @@ public void actionPerformed(ActionEvent evt) fileMenu.add(recentFilesMenu); printMenuItem = new JMenuItem("Print"); + printMenuItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_P, SHORCUT_KEY_MASK)); printMenuItem.setEnabled(false); printMenuItem.addActionListener(new ActionListener() { @@ -364,25 +434,22 @@ public void actionPerformed(ActionEvent evt) } }); - if (!IS_MAC_OS) - { - fileMenu.addSeparator(); - fileMenu.add(printMenuItem); - } + fileMenu.addSeparator(); + fileMenu.add(printMenuItem); - JMenuItem exitMenuItem = new JMenuItem("Exit"); - exitMenuItem.setAccelerator(KeyStroke.getKeyStroke("alt F4")); - exitMenuItem.addActionListener(new ActionListener() + if (!IS_MAC_OS) { - @Override - public void actionPerformed(ActionEvent evt) + JMenuItem exitMenuItem = new JMenuItem("Exit"); + exitMenuItem.setAccelerator(KeyStroke.getKeyStroke("alt F4")); + exitMenuItem.addActionListener(new ActionListener() { - exitMenuItemActionPerformed(evt); - } - }); + @Override + public void actionPerformed(ActionEvent evt) + { + exitMenuItemActionPerformed(evt); + } + }); - if (!IS_MAC_OS) - { fileMenu.addSeparator(); fileMenu.add(exitMenuItem); } @@ -393,6 +460,7 @@ public void actionPerformed(ActionEvent evt) private JMenu createEditMenu() { JMenu editMenu = new JMenu("Edit"); + editMenu.setMnemonic('E'); JMenuItem cutMenuItem = new JMenuItem("Cut"); cutMenuItem.setEnabled(false); @@ -418,6 +486,7 @@ private JMenu createEditMenu() private JMenu createViewMenu() { JMenu viewMenu = new JMenu("View"); + viewMenu.setMnemonic('V'); if (isPageMode) { viewModeItem = new JMenuItem("Show Internal Structure"); @@ -456,7 +525,27 @@ public void actionPerformed(ActionEvent actionEvent) RotationMenu rotationMenu = RotationMenu.getInstance(); rotationMenu.setEnableMenu(false); viewMenu.add(rotationMenu.getMenu()); + + ImageTypeMenu imageTypeMenu = ImageTypeMenu.getInstance(); + imageTypeMenu.setEnableMenu(false); + viewMenu.add(imageTypeMenu.getMenu()); + + RenderDestinationMenu renderDestinationMenu = RenderDestinationMenu.getInstance(); + renderDestinationMenu.setEnableMenu(false); + viewMenu.add(renderDestinationMenu.getMenu()); + viewMenu.addSeparator(); + + allowSubsampling = new JCheckBoxMenuItem("Allow subsampling"); + allowSubsampling.setEnabled(false); + viewMenu.add(allowSubsampling); + + viewMenu.addSeparator(); + + repairAcroFormMenuItem = new JCheckBoxMenuItem("Repair AcroForm"); + repairAcroFormMenuItem.setEnabled(false); + viewMenu.add(repairAcroFormMenuItem); + return viewMenu; } @@ -507,7 +596,7 @@ public JMenu getFindMenu() } /** - * Returns the Edit > Find > Find menu item. + * Returns the Edit > Find > Find menu item. */ public JMenuItem getFindMenuItem() { @@ -515,7 +604,7 @@ public JMenuItem getFindMenuItem() } /** - * Returns the Edit > Find > Find Next menu item. + * Returns the Edit > Find > Find Next menu item. */ public JMenuItem getFindNextMenuItem() { @@ -523,7 +612,7 @@ public JMenuItem getFindNextMenuItem() } /** - * Returns the Edit > Find > Find Previous menu item. + * Returns the Edit > Find > Find Previous menu item. */ public JMenuItem getFindPreviousMenuItem() { @@ -563,15 +652,15 @@ private void openMenuItemActionPerformed(ActionEvent evt) openDialog.setFilenameFilter(new FilenameFilter() { @Override - public boolean accept(File file, String s) + public boolean accept(File dir, String name) { - return file.getName().toLowerCase().endsWith(".pdf"); + return name.toLowerCase().endsWith(".pdf"); } }); openDialog.setVisible(true); if (openDialog.getFile() != null) { - readPDFFile(openDialog.getFile(), ""); + readPDFFile(new File(openDialog.getDirectory(),openDialog.getFile()), ""); } } else @@ -602,6 +691,8 @@ private void jTree1ValueChanged(TreeSelectionEvent evt) { Object selectedNode = path.getLastPathComponent(); + statusBar.getStatusLabel().setText(""); + if (isPage(selectedNode)) { showPage(selectedNode); @@ -635,12 +726,12 @@ && isFlagNode(selectedNode, path.getParentPath().getLastPathComponent())) showString(selectedNode); return; } - if (jSplitPane1.getRightComponent() == null - || !jSplitPane1.getRightComponent().equals(jScrollPane2)) + if (jSplitPane.getRightComponent() == null + || !jSplitPane.getRightComponent().equals(jScrollPaneRight)) { - replaceRightComponent(jScrollPane2); + replaceRightComponent(jScrollPaneRight); } - jTextPane1.setText(convertToString(selectedNode)); + jTextPane.setText(convertToString(selectedNode)); } catch (Exception e) { @@ -711,6 +802,7 @@ private boolean isFlagNode(Object selectedNode, Object parentNode) (COSName.F.equals(key) && isAnnot(parentNode)) || COSName.FF.equals(key) || COSName.PANOSE.equals(key) || + COSName.SIG_FLAGS.equals(key) || (COSName.P.equals(key) && isEncrypt(parentNode)); } return false; @@ -721,7 +813,7 @@ private boolean isEncrypt(Object obj) if (obj instanceof MapEntry) { MapEntry entry = (MapEntry) obj; - return (COSName.ENCRYPT.equals(entry.getKey()) && entry.getValue() instanceof COSDictionary); + return COSName.ENCRYPT.equals(entry.getKey()) && entry.getValue() instanceof COSDictionary; } return false; } @@ -730,16 +822,14 @@ private boolean isFontDescriptor(Object obj) { Object underneathObject = getUnderneathObject(obj); return underneathObject instanceof COSDictionary && - ((COSDictionary) underneathObject).containsKey(COSName.TYPE) && - ((COSDictionary) underneathObject).getCOSName(COSName.TYPE).equals(COSName.FONT_DESC); + COSName.FONT_DESC.equals(((COSDictionary) underneathObject).getCOSName(COSName.TYPE)); } private boolean isAnnot(Object obj) { Object underneathObject = getUnderneathObject(obj); return underneathObject instanceof COSDictionary && - ((COSDictionary) underneathObject).containsKey(COSName.TYPE) && - ((COSDictionary) underneathObject).getCOSName(COSName.TYPE).equals(COSName.ANNOT); + COSName.ANNOT.equals(((COSDictionary) underneathObject).getCOSName(COSName.TYPE)); } private boolean isStream(Object selectedNode) @@ -757,26 +847,23 @@ private boolean isFont(Object selectedNode) selectedNode = getUnderneathObject(selectedNode); if (selectedNode instanceof COSDictionary) { - COSDictionary dic = (COSDictionary)selectedNode; - return dic.containsKey(COSName.TYPE) && - dic.getCOSName(COSName.TYPE).equals(COSName.FONT) && - !isCIDFont(dic); + COSDictionary dic = (COSDictionary) selectedNode; + return COSName.FONT.equals(dic.getCOSName(COSName.TYPE)) && !isCIDFont(dic); } return false; } private boolean isCIDFont(COSDictionary dic) { - return dic.containsKey(COSName.SUBTYPE) && - (dic.getCOSName(COSName.SUBTYPE).equals(COSName.CID_FONT_TYPE0) - || dic.getCOSName(COSName.SUBTYPE).equals(COSName.CID_FONT_TYPE2)); + return COSName.CID_FONT_TYPE0.equals(dic.getCOSName(COSName.SUBTYPE)) || + COSName.CID_FONT_TYPE2.equals(dic.getCOSName(COSName.SUBTYPE)); } /** * Show a Panel describing color spaces in more detail and interactive way. * @param csNode the special color space containing node. */ - private void showColorPane(Object csNode) + private void showColorPane(Object csNode) throws IOException { csNode = getUnderneathObject(csNode); @@ -824,7 +911,7 @@ private void showPage(Object selectedNode) COSBase typeItem = page.getItem(COSName.TYPE); if (COSName.PAGE.equals(typeItem)) { - PagePane pagePane = new PagePane(document, page); + PagePane pagePane = new PagePane(document, page, statusBar.getStatusLabel()); replaceRightComponent(new JScrollPane(pagePane.getPanel())); } } @@ -836,7 +923,9 @@ private void showFlagPane(Object parentNode, Object selectedNode) { selectedNode = ((MapEntry)selectedNode).getKey(); selectedNode = getUnderneathObject(selectedNode); - FlagBitsPane flagBitsPane = new FlagBitsPane((COSDictionary) parentNode, (COSName) selectedNode); + FlagBitsPane flagBitsPane = new FlagBitsPane(document, + (COSDictionary) parentNode, + (COSName) selectedNode); replaceRightComponent(flagBitsPane.getPane()); } } @@ -865,7 +954,8 @@ else if (COSName.CONTENTS.equals(parentKey) || COSName.CHAR_PROCS.equals(parentK isContentStream = true; } else if (COSName.FORM.equals(stream.getCOSName(COSName.SUBTYPE)) || - COSName.PATTERN.equals(stream.getCOSName(COSName.TYPE))) + COSName.PATTERN.equals(stream.getCOSName(COSName.TYPE)) || + stream.getInt(COSName.PATTERN_TYPE) == 1) { if (stream.containsKey(COSName.RESOURCES)) { @@ -875,7 +965,6 @@ else if (COSName.FORM.equals(stream.getCOSName(COSName.SUBTYPE)) || } else if (COSName.THUMB.equals(key)) { - resourcesDic = null; isThumb = true; } else if (COSName.IMAGE.equals((stream).getCOSName(COSName.SUBTYPE))) @@ -898,7 +987,7 @@ private void showFont(Object selectedNode, TreePath path) if (pane == null) { // unsupported font type - replaceRightComponent(jScrollPane2); + replaceRightComponent(jScrollPaneRight); return; } replaceRightComponent(pane); @@ -907,9 +996,9 @@ private void showFont(Object selectedNode, TreePath path) // replace the right component while keeping divider position private void replaceRightComponent(Component pane) { - int div = jSplitPane1.getDividerLocation(); - jSplitPane1.setRightComponent(pane); - jSplitPane1.setDividerLocation(div); + int div = jSplitPane.getDividerLocation(); + jSplitPane.setRightComponent(pane); + jSplitPane.setDividerLocation(div); } private void showString(Object selectedNode) @@ -988,23 +1077,21 @@ else if( selectedNode instanceof COSString ) } else if( selectedNode instanceof COSStream ) { + COSStream stream = (COSStream) selectedNode; + InputStream in = null; try { - COSStream stream = (COSStream)selectedNode; - InputStream ioStream = stream.createInputStream(); - ByteArrayOutputStream byteArray = new ByteArrayOutputStream(); - byte[] buffer = new byte[1024]; - int amountRead; - while( (amountRead = ioStream.read( buffer, 0, buffer.length ) ) != -1 ) - { - byteArray.write( buffer, 0, amountRead ); - } - data = byteArray.toString(); + in = stream.createInputStream(); + data = new String(IOUtils.toByteArray(in)); } - catch( IOException e ) + catch (IOException e) { throw new RuntimeException(e); } + finally + { + IOUtils.closeQuietly(in); + } } else if( selectedNode instanceof MapEntry ) { @@ -1017,7 +1104,7 @@ else if( selectedNode instanceof ArrayEntry ) return data; } - private void exitMenuItemActionPerformed(ActionEvent evt) + private void exitMenuItemActionPerformed(ActionEvent ignored) { if( document != null ) { @@ -1035,51 +1122,75 @@ private void exitMenuItemActionPerformed(ActionEvent evt) throw new RuntimeException(e); } } + windowPrefs.setExtendedState(getExtendedState()); + this.setExtendedState(Frame.NORMAL); + windowPrefs.setBounds(getBounds()); + windowPrefs.setDividerLocation(jSplitPane.getDividerLocation()); + performApplicationExit(); + } + + /** + * Exit the application after the window is closed. This is protected to let + * subclasses override the behavior. + */ + @SuppressWarnings("WeakerAccess") + protected void performApplicationExit() + { System.exit(0); } private void printMenuItemActionPerformed(ActionEvent evt) { - if( document != null ) + if (document == null) { - try - { - PrinterJob job = PrinterJob.getPrinterJob(); - job.setPageable(new PDFPageable(document)); - if (job.printDialog()) - { - job.print(); - } - } - catch (PrinterException e) - { - throw new RuntimeException(e); - } + return; } - } - - /** - * Exit the Application. - */ - private void exitForm(WindowEvent evt) - { - if( document != null ) + AccessPermission ap = document.getCurrentAccessPermission(); + if (!ap.canPrint()) { - try + JOptionPane.showMessageDialog(this, "You do not have permission to print"); + return; + } + + try + { + PrinterJob job = PrinterJob.getPrinterJob(); + job.setPageable(new PDFPageable(document)); + PrintRequestAttributeSet pras = new HashPrintRequestAttributeSet(); + PDViewerPreferences vp = document.getDocumentCatalog().getViewerPreferences(); + if (vp != null && vp.getDuplex() != null) { - document.close(); - if (!currentFilePath.startsWith("http")) + String dp = vp.getDuplex(); + if (PDViewerPreferences.DUPLEX.DuplexFlipLongEdge.toString().equals(dp)) { - recentFiles.addFile(currentFilePath); + pras.add(Sides.TWO_SIDED_LONG_EDGE); + } + else if (PDViewerPreferences.DUPLEX.DuplexFlipShortEdge.toString().equals(dp)) + { + pras.add(Sides.TWO_SIDED_SHORT_EDGE); + } + else if (PDViewerPreferences.DUPLEX.Simplex.toString().equals(dp)) + { + pras.add(Sides.ONE_SIDED); } - recentFiles.close(); } - catch( IOException e ) + if (job.printDialog(pras)) { - throw new RuntimeException(e); + setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); + try + { + job.print(pras); + } + finally + { + setCursor(Cursor.getDefaultCursor()); + } } } - System.exit(0); + catch (PrinterException e) + { + throw new RuntimeException(e); + } } /** @@ -1091,8 +1202,11 @@ private void exitForm(WindowEvent evt) public static void main(String[] args) throws Exception { UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName()); - System.setProperty("apple.laf.useScreenMenuBar", "true"); - + if (System.getProperty("apple.laf.useScreenMenuBar") == null) + { + System.setProperty("apple.laf.useScreenMenuBar", "true"); + } + // handle uncaught exceptions Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { @@ -1102,9 +1216,10 @@ public void uncaughtException(Thread thread, Throwable throwable) new ErrorDialog(throwable).setVisible(true); } }); - + // open file, if any String filename = null; + @SuppressWarnings({"squid:S2068"}) String password = ""; boolean viewPages = true; @@ -1129,8 +1244,25 @@ else if( args[i].equals(VIEW_STRUCTURE) ) } } final PDFDebugger viewer = new PDFDebugger(viewPages); - - + + // use our custom logger + // this works only if there is no "LogFactory.getLog()" in this class, + // and if there are no methods that call logging, even invisible + // use reduced file from PDFBOX-3653 to see logging + LogDialog.init(viewer, viewer.statusBar.getLogLabel()); + System.setProperty("org.apache.commons.logging.Log", "org.apache.pdfbox.debugger.ui.DebugLog"); + + // trigger premature initializations for more accurate rendering benchmarks + // See discussion in PDFBOX-3988 + if (PDType1Font.COURIER.isStandard14()) + { + // Yes this is always true + PDDeviceCMYK.INSTANCE.toRGB(new float[] { 0, 0, 0, 0} ); + PDDeviceRGB.INSTANCE.toRGB(new float[] { 0, 0, 0 } ); + IIORegistry.getDefaultInstance(); + FilterFactory.INSTANCE.getFilter(COSName.FLATE_DECODE); + } + if (filename != null) { File file = new File(filename); @@ -1148,7 +1280,7 @@ private void readPDFFile(String filePath, String password) throws IOException readPDFFile(file, password); } - private void readPDFFile(File file, String password) throws IOException + private void readPDFFile(final File file, String password) throws IOException { if( document != null ) { @@ -1160,7 +1292,18 @@ private void readPDFFile(File file, String password) throws IOException } currentFilePath = file.getPath(); recentFiles.removeFile(file.getPath()); - parseDocument( file, password ); + LogDialog.instance().clear(); + DocumentOpener documentOpener = new DocumentOpener(password) + { + @Override + PDDocument open() throws IOException + { + return PDDocument.load(file, password); + } + }; + document = documentOpener.parse(); + printMenuItem.setEnabled(true); + reopenMenuItem.setEnabled(true); initTree(); @@ -1176,7 +1319,7 @@ private void readPDFFile(File file, String password) throws IOException addRecentFileItems(); } - private void readPDFurl(String urlString, String password) throws IOException + private void readPDFurl(final String urlString, String password) throws IOException { if (document != null) { @@ -1187,8 +1330,18 @@ private void readPDFurl(String urlString, String password) throws IOException } } currentFilePath = urlString; - URL url = new URL(urlString); - document = PDDocument.load(url.openStream(), password); + LogDialog.instance().clear(); + DocumentOpener documentOpener = new DocumentOpener(password) + { + @Override + PDDocument open() throws IOException + { + return PDDocument.load(new URL(urlString).openStream(), password); + } + }; + document = documentOpener.parse(); + printMenuItem.setEnabled(true); + reopenMenuItem.setEnabled(true); initTree(); @@ -1212,6 +1365,10 @@ private void initTree() { File file = new File(currentFilePath); DocumentEntry documentEntry = new DocumentEntry(document, file.getName()); + ZoomMenu.getInstance().resetZoom(); + RotationMenu.getInstance().setRotationSelection(RotationMenu.ROTATE_0_DEGREES); + ImageTypeMenu.getInstance().setImageTypeSelection(ImageTypeMenu.IMAGETYPE_RGB); + RenderDestinationMenu.getInstance().setRenderDestinationSelection(RenderDestinationMenu.RENDER_DESTINATION_EXPORT); tree.setModel(new PDFTreeModel(documentEntry)); // Root/Pages/Kids/[0] is not always the first page, so use the first row instead: tree.setSelectionPath(tree.getPathForRow(1)); @@ -1222,44 +1379,65 @@ private void initTree() tree.setSelectionPath(treeStatus.getPathForString("Root")); } } - + /** - * This will parse a document. - * - * @param file The file addressing the document. - * - * @throws IOException If there is an error parsing the document. + * Internal class to avoid double code in password entry loop. */ - private void parseDocument( File file, String password )throws IOException + abstract static class DocumentOpener { - while (true) + String password; + + DocumentOpener(String password) { - try - { - document = PDDocument.load(file, password); - } - catch (InvalidPasswordException ipe) + this.password = password; + } + + /** + * Override to load the actual input type (File, URL, stream), don't call it directly! + * + * @return the PDDocument instance + * @throws IOException Cannot read document + */ + abstract PDDocument open() throws IOException; + + /** + * Call this! + * + * @return the PDDocument instance + * @throws IOException Cannot read document + */ + final PDDocument parse() throws IOException + { + while (true) { - // https://stackoverflow.com/questions/8881213/joptionpane-to-get-password - JPanel panel = new JPanel(); - JLabel label = new JLabel("Password:"); - JPasswordField pass = new JPasswordField(10); - panel.add(label); - panel.add(pass); - String[] options = new String[] {"OK", "Cancel"}; - int option = JOptionPane.showOptionDialog(null, panel, "Enter password", - JOptionPane.NO_OPTION, JOptionPane.PLAIN_MESSAGE, - null, options, ""); - if (option == 0) + try { - password = new String(pass.getPassword()); - continue; + return open(); + } + catch (InvalidPasswordException ipe) + { + // https://stackoverflow.com/questions/8881213/joptionpane-to-get-password + JPanel panel = new JPanel(); + JLabel label = new JLabel("Password:"); + JPasswordField pass = new JPasswordField(10); + panel.add(label); + panel.add(pass); + String[] options = new String[] + { + "OK", "Cancel" + }; + int option = JOptionPane.showOptionDialog(null, panel, "Enter password", + JOptionPane.NO_OPTION, JOptionPane.PLAIN_MESSAGE, + null, options, ""); + if (option == 0) + { + password = new String(pass.getPassword()); + continue; + } + throw ipe; } - throw ipe; } - break; - } - printMenuItem.setEnabled(true); + } } private void addRecentFileItems() @@ -1311,4 +1489,33 @@ private static void usage() System.err.println(message); System.exit(1); } + + /** + * Convenience method to get the page label if available. + * + * @param document + * @param pageIndex 0-based page number. + * @return a page label or null if not available. + */ + public static String getPageLabel(PDDocument document, int pageIndex) + { + PDPageLabels pageLabels; + try + { + pageLabels = document.getDocumentCatalog().getPageLabels(); + } + catch (IOException ex) + { + return ex.getMessage(); + } + if (pageLabels != null) + { + String[] labels = pageLabels.getLabelsByPageIndices(); + if (labels[pageIndex] != null) + { + return labels[pageIndex]; + } + } + return null; + } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSArrayBased.java b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSArrayBased.java index b68d0cebe0f..b0072df2cc6 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSArrayBased.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSArrayBased.java @@ -19,12 +19,14 @@ import java.awt.Component; import java.awt.Dimension; import java.awt.Font; +import java.awt.color.ColorSpace; import java.io.IOException; import javax.swing.BoxLayout; import javax.swing.JLabel; import javax.swing.JPanel; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDICCBased; import org.apache.pdfbox.pdmodel.graphics.color.PDPattern; /** @@ -84,6 +86,44 @@ private void initUI() colorCountLabel.setFont(new Font(Font.MONOSPACED, Font.BOLD, 20)); panel.add(colorCountLabel); } + + if (colorSpace instanceof PDICCBased) + { + PDICCBased icc = (PDICCBased) colorSpace; + int colorSpaceType = icc.getColorSpaceType(); + String cs; + switch (colorSpaceType) + { + case ColorSpace.CS_LINEAR_RGB: + cs = "linear RGB"; + break; + case ColorSpace.CS_CIEXYZ: + cs = "CIEXYZ"; + break; + case ColorSpace.CS_GRAY: + cs = "linear gray"; + break; + case ColorSpace.CS_sRGB: + cs = "sRGB"; + break; + case ColorSpace.TYPE_RGB: + cs = "RGB"; + break; + case ColorSpace.TYPE_GRAY: + cs = "gray"; + break; + case ColorSpace.TYPE_CMYK: + cs = "CMYK"; + break; + default: + cs = "type " + colorSpaceType; + break; + } + JLabel otherLabel = new JLabel("Colorspace type: " + cs); + otherLabel.setAlignmentX(Component.CENTER_ALIGNMENT); + otherLabel.setFont(new Font(Font.MONOSPACED, Font.BOLD, 20)); + panel.add(otherLabel); + } } /** diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSDeviceN.java b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSDeviceN.java index 23f1a4e46ff..3826d1a0516 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSDeviceN.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSDeviceN.java @@ -31,36 +31,27 @@ import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceN; - -/** - *@author Khyrul Bashar. - */ - /** * A class that provides the necessary UI and functionalities to show the DeviceN color space. + * + * @author Khyrul Bashar. + * */ public class CSDeviceN { - private PDDeviceN deviceN; + private final PDDeviceN deviceN; private JPanel panel; /** * Constructor * - * @param array COSArray instance that holds DeviceN color space + * @param array COSArray instance that holds the DeviceN color space */ - public CSDeviceN(COSArray array) + public CSDeviceN(COSArray array) throws IOException { - try - { - deviceN = new PDDeviceN(array); - DeviceNColorant[] colorants = getColorantData(); - initUI(colorants); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + deviceN = new PDDeviceN(array); + DeviceNColorant[] colorants = getColorantData(); + initUI(colorants); } /** diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSIndexed.java b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSIndexed.java index 32edf8b0686..9251ae0db34 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSIndexed.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSIndexed.java @@ -33,41 +33,33 @@ import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.pdmodel.graphics.color.PDIndexed; -/** - * @author Khyrul Bashar. - */ - /** * A class that provides the necessary UI and functionalities to show the Indexed colorspace. + * + * @author Khyrul Bashar. */ public class CSIndexed { - private PDIndexed indexed; + private final PDIndexed indexed; private JPanel panel; - private int colorCount; + private final int colorCount; /** * Constructor. - * @param array COSArray instance for Indexed Colorspace. + * @param array COSArray instance for Indexed color space. + * @throws java.io.IOException */ - public CSIndexed(COSArray array) + public CSIndexed(COSArray array) throws IOException { - try - { - indexed = new PDIndexed(array); - colorCount = getHival(array); - initUI(getColorantData()); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + indexed = new PDIndexed(array); + colorCount = getHival(array) + 1; + initUI(getColorantData()); } /** * Parses the colorant data from the array and return. * - * @return + * @return the colorant data */ private IndexedColorant[] getColorantData() { diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSSeparation.java b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSSeparation.java index 7eb210a37be..5e613494a1d 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSSeparation.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/CSSeparation.java @@ -40,12 +40,10 @@ import org.apache.pdfbox.pdmodel.graphics.color.PDSeparation; /** + * A class that provides the necessary UI and functionalities to show the Separation color space. + * * @author Khyrul Bashar. */ - -/** - *A class that provides the necessary UI and functionalities to show the Separation color space. - */ public class CSSeparation implements ChangeListener, ActionListener { private JSlider slider; @@ -53,23 +51,19 @@ public class CSSeparation implements ChangeListener, ActionListener private JLabel colorBar; private JPanel panel; - private PDSeparation separation; + private final PDSeparation separation; private float tintValue = 1; /** * Constructor - * @param array COSArray instance of the separation color space. + * + * @param array COSArray instance of the Separation color space. + * + * @throws java.io.IOException */ - public CSSeparation(COSArray array) + public CSSeparation(COSArray array) throws IOException { - try - { - separation = new PDSeparation(array); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + separation = new PDSeparation(array); initUI(); initValues(); } @@ -77,7 +71,7 @@ public CSSeparation(COSArray array) /** * initialize all the UI elements and arrange them. */ - private void initUI() + private void initUI() throws IOException { Font boldFont = new Font(Font.MONOSPACED, Font.BOLD, 20); @@ -90,7 +84,8 @@ private void initUI() slider.setMajorTickSpacing(50); slider.setPaintTicks(true); - Dictionary labelTable = new Hashtable(); + @SuppressWarnings({"squid:S1149"}) + Dictionary labelTable = new Hashtable(); JLabel lightest = new JLabel("lightest"); lightest.setFont(new Font(Font.MONOSPACED, Font.BOLD, 10)); JLabel darkest = new JLabel("darkest"); @@ -204,10 +199,17 @@ public JPanel getPanel() @Override public void stateChanged(ChangeEvent changeEvent) { - int value = slider.getValue(); - tintValue = getFloatRepresentation(value); - tintField.setText(Float.toString(tintValue)); + int value = slider.getValue(); + tintValue = getFloatRepresentation(value); + tintField.setText(Float.toString(tintValue)); + try + { updateColorBar(); + } + catch (IOException ex) + { + tintField.setText(ex.getMessage()); + } } /** @@ -228,36 +230,26 @@ public void actionPerformed(ActionEvent actionEvent) { tintField.setText(Float.toString(tintValue)); } + catch (IOException ex) + { + tintField.setText(ex.getMessage()); + } } - private void updateColorBar() + private void updateColorBar() throws IOException { - try - { - float[] rgbValues = separation.toRGB(new float[] {tintValue}); - colorBar.setBackground(new Color(rgbValues[0], rgbValues[1], rgbValues[2])); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + float[] rgbValues = separation.toRGB(new float[] {tintValue}); + colorBar.setBackground(new Color(rgbValues[0], rgbValues[1], rgbValues[2])); } /** * Set a little border around colorbar. color of the border is the darkest of the colorant. */ - private void setColorBarBorder() + private void setColorBarBorder() throws IOException { - try - { - float[] rgbValues = separation.toRGB(new float[] {1}); - Color darkest= new Color(rgbValues[0], rgbValues[1], rgbValues[2]); - colorBar.setBorder(new BevelBorder(BevelBorder.LOWERED, darkest, darkest)); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + float[] rgbValues = separation.toRGB(new float[] {1}); + Color darkest= new Color(rgbValues[0], rgbValues[1], rgbValues[2]); + colorBar.setBorder(new BevelBorder(BevelBorder.LOWERED, darkest, darkest)); } private float getFloatRepresentation(int value) diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/DeviceNTableModel.java b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/DeviceNTableModel.java index 346ccb274bb..5fb66e188f5 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/DeviceNTableModel.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/DeviceNTableModel.java @@ -26,6 +26,7 @@ /** * This the table model for showing DeviceN color space which extends AbstractTableModel. */ +@SuppressWarnings({"serial","squid:S1948"}) public class DeviceNTableModel extends AbstractTableModel { private static final String[] COLUMNNAMES = new String[] { "Colorant", "Maximum", "Minimum"}; diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/IndexedTableModel.java b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/IndexedTableModel.java index fd32f42b4f1..b9a272a83a8 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/IndexedTableModel.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/colorpane/IndexedTableModel.java @@ -26,6 +26,7 @@ /** * This the table model for showing Indexed color space which extends AbstractTableModel. */ +@SuppressWarnings({"serial","squid:S1948"}) public class IndexedTableModel extends AbstractTableModel { diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/AnnotFlag.java b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/AnnotFlag.java index 24da4bc1a1d..f6bcb5b8ecc 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/AnnotFlag.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/AnnotFlag.java @@ -67,7 +67,7 @@ Object[][] getFlagBits() new Object[]{7, "ReadOnly", annotation.isReadOnly()}, new Object[]{8, "Locked", annotation.isLocked()}, new Object[]{9, "ToggleNoView", annotation.isToggleNoView()}, - new Object[]{10, "LockedContents", annotation.isLocked()} + new Object[]{10, "LockedContents", annotation.isLockedContents()} }; } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FieldFlag.java b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FieldFlag.java index c835308331f..bf45d45dfff 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FieldFlag.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FieldFlag.java @@ -53,7 +53,7 @@ else if (COSName.CH.equals(fieldType)) { return "Choice field flag"; } - return null; + return "Field flag"; } @Override @@ -80,7 +80,7 @@ else if (COSName.CH.equals(fieldType)) { return getChoiceFieldFlagBits(flagValue); } - return null; + return getFieldFlagBits(flagValue); } private Object[][] getTextFieldFlagBits(final int flagValue) @@ -127,6 +127,15 @@ private Object[][] getChoiceFieldFlagBits(final int flagValue) }; } + private Object[][] getFieldFlagBits(final int flagValue) + { + return new Object[][]{ + new Object[]{1, "ReadOnly", isFlagBitSet(flagValue, 1)}, + new Object[]{2, "Required", isFlagBitSet(flagValue, 2)}, + new Object[]{3, "NoExport", isFlagBitSet(flagValue, 3)} + }; + } + /** * Check the corresponding flag bit if set or not * @param flagValue the flag integer diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FlagBitsPane.java b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FlagBitsPane.java index 7b8520c4460..42279080a4a 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FlagBitsPane.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FlagBitsPane.java @@ -20,6 +20,7 @@ import javax.swing.JPanel; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; /** * @author Khyrul Bashar @@ -29,14 +30,16 @@ public class FlagBitsPane { private FlagBitsPaneView view; + private final PDDocument document; /** * Constructor. * @param dictionary COSDictionary instance. * @param flagType COSName instance. */ - public FlagBitsPane(final COSDictionary dictionary, COSName flagType) + public FlagBitsPane(PDDocument document, final COSDictionary dictionary, COSName flagType) { + this.document = document; createPane(dictionary, flagType); } @@ -77,6 +80,12 @@ private void createPane(final COSDictionary dictionary, final COSName flagType) view = new FlagBitsPaneView( flag.getFlagType(), flag.getFlagValue(), flag.getFlagBits(), flag.getColumnNames()); } + if (COSName.SIG_FLAGS.equals(flagType)) + { + flag = new SigFlag(document, dictionary); + view = new FlagBitsPaneView( + flag.getFlagType(), flag.getFlagValue(), flag.getFlagBits(), flag.getColumnNames()); + } } /** diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FlagBitsPaneView.java b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FlagBitsPaneView.java index 6d9b69c01b9..62c2523ed23 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FlagBitsPaneView.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/FlagBitsPaneView.java @@ -35,11 +35,11 @@ */ class FlagBitsPaneView { - final private JPanel panel; - final private String flagHeader; - final private String flagValue; - final private Object[][] tableData; - final private String[] columnNames; + private final JPanel panel; + private final String flagHeader; + private final String flagValue; + private final Object[][] tableData; + private final String[] columnNames; /** * Constructor diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/PanoseFlag.java b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/PanoseFlag.java index ffb24e9f058..256a60da2b8 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/PanoseFlag.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/PanoseFlag.java @@ -257,11 +257,7 @@ private String getXHeightValue(int index) public final byte[] getPanoseBytes(COSDictionary style) { - if (style != null) - { - COSString panose = (COSString)style.getDictionaryObject(COSName.PANOSE); - return panose.getBytes(); - } - return null; + COSString panose = (COSString)style.getDictionaryObject(COSName.PANOSE); + return panose.getBytes(); } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/SigFlag.java b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/SigFlag.java new file mode 100644 index 00000000000..4df39c81945 --- /dev/null +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/flagbitspane/SigFlag.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.debugger.flagbitspane; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; + +/** + * @author Tilman Hausherr + * + * A class that provides signature flag bits. + */ +public class SigFlag extends Flag +{ + private final PDDocument document; + private final COSDictionary acroFormDictionary; + + /** + * Constructor + * + * @param acroFormDictionary COSDictionary instance. + */ + SigFlag(PDDocument document, COSDictionary acroFormDictionary) + { + this.document = document; + this.acroFormDictionary = acroFormDictionary; + } + + @Override + String getFlagType() + { + return "Signature flag"; + } + + @Override + String getFlagValue() + { + return "Flag value: " + acroFormDictionary.getInt(COSName.SIG_FLAGS); + } + + @Override + Object[][] getFlagBits() + { + PDAcroForm acroForm = new PDAcroForm(document, acroFormDictionary); + return new Object[][]{ + new Object[]{1, "SignaturesExist", acroForm.isSignaturesExist()}, + new Object[]{2, "AppendOnly", acroForm.isAppendOnly()}, + }; + } +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/FontEncodingPaneController.java b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/FontEncodingPaneController.java index 0891fba8f6f..0f5a8b8fba9 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/FontEncodingPaneController.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/FontEncodingPaneController.java @@ -16,19 +16,49 @@ package org.apache.pdfbox.debugger.fontencodingpane; +import java.awt.geom.GeneralPath; +import java.awt.geom.Rectangle2D; import java.io.IOException; import javax.swing.JPanel; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.font.PDCIDFontType2; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDSimpleFont; import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.font.PDType3Font; -interface FontPane +abstract class FontPane { - JPanel getPanel(); + abstract JPanel getPanel(); + + /** + * Calculate vertical bounds common to all rendered glyphs. + * + * @param tableData + * @param glyphIndex the table index that has glyphs. + * @return an array with two elements: min lower bound (but max 0), and max upper bound (but min + * 0). + */ + double[] getYBounds(Object[][] tableData, int glyphIndex) + { + double minY = 0; + double maxY = 0; + for (Object[] aTableData : tableData) + { + GeneralPath path = (GeneralPath) aTableData[glyphIndex]; + Rectangle2D bounds2D = path.getBounds2D(); + if (bounds2D.isEmpty()) + { + continue; + } + minY = Math.min(minY, bounds2D.getMinY()); + maxY = Math.max(maxY, bounds2D.getMaxY()); + } + return new double[]{minY, maxY}; + } } /** @@ -39,6 +69,8 @@ interface FontPane */ public class FontEncodingPaneController { + private static final Log LOG = LogFactory.getLog(FontEncodingPaneController.class); + private FontPane fontPane; /** @@ -52,19 +84,22 @@ public FontEncodingPaneController(COSName fontName, COSDictionary dictionary) try { PDFont font = resources.getFont(fontName); - if (font instanceof PDSimpleFont) + if (font instanceof PDType3Font) + { + fontPane = new Type3Font((PDType3Font) font, resources); + } + else if (font instanceof PDSimpleFont) { fontPane = new SimpleFont((PDSimpleFont) font); } - else if (font instanceof PDType0Font - && ((PDType0Font) font).getDescendantFont() instanceof PDCIDFontType2) + else if (font instanceof PDType0Font) { - fontPane = new Type0Font((PDCIDFontType2) ((PDType0Font) font).getDescendantFont(), font); + fontPane = new Type0Font(((PDType0Font) font).getDescendantFont(), (PDType0Font) font); } } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/FontEncodingView.java b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/FontEncodingView.java index 48fbf6b167e..13da1fb7053 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/FontEncodingView.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/FontEncodingView.java @@ -20,8 +20,16 @@ import java.awt.Component; import java.awt.Dimension; import java.awt.Font; +import java.awt.Graphics2D; +import java.awt.GraphicsEnvironment; import java.awt.GridBagConstraints; import java.awt.GridBagLayout; +import java.awt.Rectangle; +import java.awt.RenderingHints; +import java.awt.geom.AffineTransform; +import java.awt.geom.GeneralPath; +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; import java.util.Iterator; import java.util.Map; import javax.swing.JLabel; @@ -30,25 +38,31 @@ import javax.swing.JTable; import javax.swing.SwingConstants; import javax.swing.table.TableCellRenderer; +import org.apache.pdfbox.debugger.ui.HighResolutionImageIcon; /** * @author Khyrul Bashar + * @author Tilman Hausherr * A class that creates the UI for font encoding pane. */ class FontEncodingView { private JPanel panel; + private static final AffineTransform DEFAULT_TRANSFORM = GraphicsEnvironment.getLocalGraphicsEnvironment(). + getDefaultScreenDevice().getDefaultConfiguration().getDefaultTransform(); + /** * Constructor. * @param tableData Object[][] instance as table data. * @param headerAttributes Map instance which contains info for showing in header * panel. Here keys will be info type. * @param columnNames String array containing the columns name. + * @param yBounds min low and max high bound of all glyphs. */ - FontEncodingView(Object[][] tableData, Map headerAttributes, String[] columnNames) + FontEncodingView(Object[][] tableData, Map headerAttributes, String[] columnNames, double[] yBounds) { - createView(getHeaderPanel(headerAttributes), getTable(tableData, columnNames)); + createView(getHeaderPanel(headerAttributes), getTable(tableData, columnNames, yBounds)); } private void createView(JPanel headerPanel, JTable table) @@ -78,11 +92,11 @@ private void createView(JPanel headerPanel, JTable table) panel.add(scrollPane, gbc); } - private JTable getTable(Object[][] tableData, String[] columnNames) + private JTable getTable(Object[][] tableData, String[] columnNames, double[] yBounds) { JTable table = new JTable(tableData, columnNames); table.setRowHeight(40); - table.setDefaultRenderer(Object.class, new GlyphCellRenderer()); + table.setDefaultRenderer(Object.class, new GlyphCellRenderer(yBounds)); return table; } @@ -95,7 +109,6 @@ private JPanel getHeaderPanel(Map attributes) Iterator keys = attributes.keySet().iterator(); int row = 0; while (keys.hasNext()) - { String key = keys.next(); JLabel encodingNameLabel = new JLabel(key + ": " + attributes.get(key)); @@ -108,7 +121,6 @@ private JPanel getHeaderPanel(Map attributes) gbc.anchor = GridBagConstraints.LINE_START; headerPanel.add(encodingNameLabel, gbc); - } } return headerPanel; @@ -121,14 +133,68 @@ JPanel getPanel() private static final class GlyphCellRenderer implements TableCellRenderer { + private final double[] yBounds; + + private GlyphCellRenderer(double[] yBounds) + { + this.yBounds = yBounds; + } @Override - public Component getTableCellRendererComponent(JTable jTable, Object o, boolean b, boolean b1, int i, int i1) + public Component getTableCellRendererComponent(JTable jTable, Object o, boolean b, boolean b1, int row, int col) { + if (o instanceof GeneralPath) + { + GeneralPath path = (GeneralPath) o; + Rectangle2D bounds2D = path.getBounds2D(); + if (bounds2D.isEmpty()) + { + JLabel label = new JLabel(SimpleFont.NO_GLYPH, SwingConstants.CENTER); + label.setFont(new Font(Font.SANS_SERIF, Font.PLAIN, 25)); + label.setForeground(Color.RED); + return label; + } + Rectangle cellRect = jTable.getCellRect(row, col, false); + BufferedImage bim = renderGlyph(path, bounds2D, cellRect); + return new JLabel(new HighResolutionImageIcon( + bim, + (int) Math.ceil(bim.getWidth() / DEFAULT_TRANSFORM.getScaleX()), + (int) Math.ceil(bim.getHeight() / DEFAULT_TRANSFORM.getScaleY())), + SwingConstants.CENTER); + } + if (o instanceof BufferedImage) + { + Rectangle cellRect = jTable.getCellRect(row, col, false); + BufferedImage glyphImage = (BufferedImage) o; + BufferedImage cellImage = new BufferedImage( + (int) (cellRect.getWidth() * DEFAULT_TRANSFORM.getScaleX()), + (int) (cellRect.getHeight() * DEFAULT_TRANSFORM.getScaleY()), + BufferedImage.TYPE_INT_RGB); + Graphics2D g = (Graphics2D) cellImage.getGraphics(); + g.setBackground(Color.white); + g.clearRect(0, 0, cellImage.getWidth(), cellImage.getHeight()); + + double scale = 1 / (glyphImage.getHeight() / cellRect.getHeight()); + + // horizontal center + g.translate((cellRect.getWidth() - glyphImage.getWidth() * scale) / 2 * DEFAULT_TRANSFORM.getScaleX(), 0); + + // scale from the glyph to the cell + g.scale(scale * DEFAULT_TRANSFORM.getScaleX(), scale * DEFAULT_TRANSFORM.getScaleY()); + + g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); + g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + g.drawImage(glyphImage, 0, 0, null); + g.dispose(); + return new JLabel(new HighResolutionImageIcon( + cellImage, + (int) Math.ceil(cellImage.getWidth() / DEFAULT_TRANSFORM.getScaleX()), + (int) Math.ceil(cellImage.getHeight() / DEFAULT_TRANSFORM.getScaleY()))); + } if (o != null) { - JLabel label = new JLabel(o.toString()); - label.setHorizontalAlignment(SwingConstants.CENTER); + JLabel label = new JLabel(o.toString(), SwingConstants.CENTER); label.setFont(new Font(Font.SANS_SERIF, Font.PLAIN, 25)); if (SimpleFont.NO_GLYPH.equals(o) || ".notdef".equals(o)) { @@ -139,7 +205,39 @@ public Component getTableCellRendererComponent(JTable jTable, Object o, boolean } return new JLabel(); } + + private BufferedImage renderGlyph(GeneralPath path, Rectangle2D bounds2D, Rectangle cellRect) + { + BufferedImage bim = new BufferedImage( + (int) (cellRect.getWidth() * DEFAULT_TRANSFORM.getScaleX()), + (int) (cellRect.getHeight() * DEFAULT_TRANSFORM.getScaleY()), + BufferedImage.TYPE_INT_RGB); + Graphics2D g = (Graphics2D) bim.getGraphics(); + g.setBackground(Color.white); + g.clearRect(0, 0, bim.getWidth(), bim.getHeight()); + + double scale = 1 / ((yBounds[1] - yBounds[0]) / cellRect.getHeight()); + + // flip + g.scale(1, -1); + g.translate(0, -bim.getHeight()); + + // horizontal center + g.translate((cellRect.getWidth() - bounds2D.getWidth() * scale) / 2 * DEFAULT_TRANSFORM.getScaleX(), 0); + + // scale from the glyph to the cell + g.scale(scale * DEFAULT_TRANSFORM.getScaleX(), scale * DEFAULT_TRANSFORM.getScaleY()); + + // Adjust for negative y min bound + g.translate(0, -yBounds[0]); + + g.setColor(Color.black); + g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); + g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + g.fill(path); + g.dispose(); + return bim; + } } } - - diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/SimpleFont.java b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/SimpleFont.java index 2418b9fe204..768de686ff4 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/SimpleFont.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/SimpleFont.java @@ -16,65 +16,96 @@ package org.apache.pdfbox.debugger.fontencodingpane; +import java.awt.geom.GeneralPath; import java.io.IOException; import java.util.LinkedHashMap; import java.util.Map; import javax.swing.JPanel; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.pdmodel.font.PDSimpleFont; +import org.apache.pdfbox.pdmodel.font.PDVectorFont; /** * @author Khyrul Bashar * A class that shows the glyph table along with unicode characters for SimpleFont. */ -class SimpleFont implements FontPane +class SimpleFont extends FontPane { - public static final String NO_GLYPH = "No glyph"; - private int totalAvailableGlyph = 0; + private static final Log LOG = LogFactory.getLog(SimpleFont.class); + + public static final String NO_GLYPH = "None"; private final FontEncodingView view; + private int totalAvailableGlyph = 0; /** * Constructor. - * @param font PDSimpleFont instance. + * @param font PDSimpleFont instance, but not a type 3 font. * @throws IOException If fails to parse unicode characters. */ SimpleFont(PDSimpleFont font) throws IOException { Object[][] tableData = getGlyphs(font); + + double[] yBounds = getYBounds(tableData, 3); Map attributes = new LinkedHashMap(); - attributes.put("Encoding", getEncodingName(font)); attributes.put("Font", font.getName()); - attributes.put("Glyph count", Integer.toString(totalAvailableGlyph)); + attributes.put("Encoding", getEncodingName(font)); + attributes.put("Glyphs", Integer.toString(totalAvailableGlyph)); + attributes.put("Standard 14", Boolean.toString(font.isStandard14())); - view = new FontEncodingView(tableData, attributes, new String[] {"Code", "Glyph Name","Unicode Character"}); + view = new FontEncodingView(tableData, attributes, + new String[] {"Code", "Glyph Name", "Unicode Character", "Glyph"}, yBounds); } private Object[][] getGlyphs(PDSimpleFont font) throws IOException { - Object[][] glyphs = new Object[256][3]; + Object[][] glyphs = new Object[256][4]; for (int index = 0; index <= 255; index++) { glyphs[index][0] = index; - if (font.getEncoding().contains(index)) + if (font.getEncoding().contains(index) || font.toUnicode(index) != null) { - glyphs[index][1] = font.getEncoding().getName(index); + String glyphName = font.getEncoding().getName(index); + glyphs[index][1] = glyphName; glyphs[index][2] = font.toUnicode(index); + try + { + if (font instanceof PDVectorFont) + { + // using names didn't work with the file from PDFBOX-3445 + glyphs[index][3] = ((PDVectorFont) font).getPath(index); + } + else + { + // type 1 font isn't a vector font in 2.0 + glyphs[index][3] = font.getPath(glyphName); + } + } + catch (IOException ex) + { + LOG.error("Couldn't render code " + index + " ('" + glyphName + "') of font " + + font.getName(), ex); + glyphs[index][3] = new GeneralPath(); + } totalAvailableGlyph++; } else { glyphs[index][1] = NO_GLYPH; glyphs[index][2] = NO_GLYPH; + glyphs[index][3] = font.getPath(".notdef"); } } return glyphs; } - private String getEncodingName(PDSimpleFont font) { - return font.getEncoding().getClass().getSimpleName(); + return font.getEncoding().getClass().getSimpleName() + " / " + font.getEncoding().getEncodingName(); } @Override diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/Type0Font.java b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/Type0Font.java index e773660855f..d532c7c750c 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/Type0Font.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/Type0Font.java @@ -16,49 +16,99 @@ package org.apache.pdfbox.debugger.fontencodingpane; -import java.awt.Dimension; -import java.io.IOException; -import java.io.InputStream; -import java.util.LinkedHashMap; -import java.util.Map; -import javax.swing.JPanel; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.pdmodel.font.PDCIDFontType2; +import org.apache.pdfbox.pdmodel.font.PDCIDFont; import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType0Font; + +import javax.swing.JPanel; +import java.awt.Dimension; +import java.awt.geom.GeneralPath; +import java.io.IOException; +import java.io.InputStream; +import java.util.LinkedHashMap; +import java.util.Map; /** * @author Khyrul Bashar * A class that shows the CIDToGID table along with unicode characters for Type0Fonts when descendent * font is of type PDCIDFontType2. */ -class Type0Font implements FontPane +class Type0Font extends FontPane { - private FontEncodingView view; - + public static final String NO_GLYPH = "No glyph"; + private final FontEncodingView view; + private int totalAvailableGlyph = 0; + /** * Constructor. * @param descendantFont PDCIDFontType2 instance. * @param parentFont PDFont instance. * @throws IOException If fails to parse cidtogid map. */ - Type0Font(PDCIDFontType2 descendantFont, PDFont parentFont) throws IOException + Type0Font(PDCIDFont descendantFont, PDType0Font parentFont) throws IOException { Object[][] cidtogid = readCIDToGIDMap(descendantFont, parentFont); if (cidtogid != null) { Map attributes = new LinkedHashMap(); attributes.put("Font", descendantFont.getName()); - attributes.put("CID count", Integer.toString(cidtogid.length)); + attributes.put("CIDs", Integer.toString(cidtogid.length)); + + view = new FontEncodingView(cidtogid, attributes, + new String[]{"CID", "GID", "Unicode Character", "Glyph"}, getYBounds(cidtogid, 3)); + } + else + { + Object[][] tab = readMap(descendantFont, parentFont); + Map attributes = new LinkedHashMap(); + attributes.put("Font", descendantFont.getName()); + attributes.put("CIDs", Integer.toString(tab.length)); + attributes.put("Glyphs", Integer.toString(totalAvailableGlyph)); + attributes.put("Standard 14", Boolean.toString(parentFont.isStandard14())); - view = new FontEncodingView(cidtogid, attributes, new String[]{"CID", "GID", "Unicode Character"}); + view = new FontEncodingView(tab, attributes, + new String[]{"Code", "CID", "GID", "Unicode Character", "Glyph"}, getYBounds(tab, 4)); + } + } + + private Object[][] readMap(PDCIDFont descendantFont, PDType0Font parentFont) throws IOException + { + int codes = 0; + for (int code = 0; code < 65535; ++code) + { + if (descendantFont.hasGlyph(code)) + { + ++codes; + } + } + Object[][] tab = new Object[codes][5]; + int index = 0; + for (int code = 0; code < 65535; ++code) + { + if (descendantFont.hasGlyph(code)) + { + tab[index][0] = code; + tab[index][1] = descendantFont.codeToCID(code); + tab[index][2] = descendantFont.codeToGID(code); + tab[index][3] = parentFont.toUnicode(code); + GeneralPath path = descendantFont.getPath(code); + tab[index][4] = path; + if (!path.getBounds2D().isEmpty()) + { + ++totalAvailableGlyph; + } + ++index; + } } + return tab; } - private Object[][] readCIDToGIDMap(PDCIDFontType2 font, PDFont parentFont) throws IOException + private Object[][] readCIDToGIDMap(PDCIDFont font, PDFont parentFont) throws IOException { Object[][] cid2gid = null; COSDictionary dict = font.getCOSObject(); @@ -71,7 +121,7 @@ private Object[][] readCIDToGIDMap(PDCIDFontType2 font, PDFont parentFont) throw byte[] mapAsBytes = IOUtils.toByteArray(is); IOUtils.closeQuietly(is); int numberOfInts = mapAsBytes.length / 2; - cid2gid = new Object[numberOfInts][3]; + cid2gid = new Object[numberOfInts][4]; int offset = 0; for (int index = 0; index < numberOfInts; index++) { @@ -82,14 +132,18 @@ private Object[][] readCIDToGIDMap(PDCIDFontType2 font, PDFont parentFont) throw { cid2gid[index][2] = parentFont.toUnicode(index); } + GeneralPath path = font.getPath(index); + cid2gid[index][3] = path; + if (!path.getBounds2D().isEmpty()) + { + ++totalAvailableGlyph; + } offset += 2; } } return cid2gid; } - - @Override public JPanel getPanel() { @@ -101,4 +155,4 @@ public JPanel getPanel() panel.setPreferredSize(new Dimension(300, 500)); return panel; } -} \ No newline at end of file +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/Type3Font.java b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/Type3Font.java new file mode 100644 index 00000000000..90e679aa200 --- /dev/null +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/fontencodingpane/Type3Font.java @@ -0,0 +1,205 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.debugger.fontencodingpane; + +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import javax.swing.JPanel; + +import org.apache.fontbox.util.BoundingBox; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDType3CharProc; +import org.apache.pdfbox.pdmodel.font.PDType3Font; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.util.Charsets; +import org.apache.pdfbox.util.Matrix; + +/** + * @author Khyrul Bashar + * @author Tilman Hausherr + * + * A class that shows the glyph table along with unicode characters for PDType3Font. + */ +class Type3Font extends FontPane +{ + public static final String NO_GLYPH = "No glyph"; + private final FontEncodingView view; + private int totalAvailableGlyph = 0; + private PDRectangle fontBBox; + private final PDResources resources; + + /** + * Constructor. + * @param font PDSimpleFont instance. + * @throws IOException If fails to parse unicode characters. + */ + Type3Font(PDType3Font font, PDResources resources) throws IOException + { + this.resources = resources; + + calcBBox(font); + + Object[][] tableData = getGlyphs(font); + + Map attributes = new LinkedHashMap(); + attributes.put("Font", font.getName()); + attributes.put("Encoding", getEncodingName(font)); + attributes.put("Glyphs", Integer.toString(totalAvailableGlyph)); + + view = new FontEncodingView(tableData, attributes, + new String[] {"Code", "Glyph Name", "Unicode Character", "Glyph"}, null); + } + + private void calcBBox(PDType3Font font) throws IOException + { + double minX = 0; + double maxX = 0; + double minY = 0; + double maxY = 0; + for (int index = 0; index <= 255; ++index) + { + PDType3CharProc charProc = font.getCharProc(index); + if (charProc == null) + { + continue; + } + PDRectangle glyphBBox = charProc.getGlyphBBox(); + if (glyphBBox == null) + { + continue; + } + minX = Math.min(minX, glyphBBox.getLowerLeftX()); + maxX = Math.max(maxX, glyphBBox.getUpperRightX()); + minY = Math.min(minY, glyphBBox.getLowerLeftY()); + maxY = Math.max(maxY, glyphBBox.getUpperRightY()); + } + fontBBox = new PDRectangle((float) minX, (float) minY, (float) (maxX - minX), (float) (maxY - minY)); + if (fontBBox.getWidth() <= 0 || fontBBox.getHeight() <= 0) + { + // less reliable, but good as a fallback solution for PDF.js issue 10717 + BoundingBox boundingBox = font.getBoundingBox(); + fontBBox = new PDRectangle(boundingBox.getLowerLeftX(), + boundingBox.getLowerLeftY(), + boundingBox.getWidth(), + boundingBox.getHeight()); + } + } + + private Object[][] getGlyphs(PDType3Font font) throws IOException + { + boolean isEmpty = fontBBox.toGeneralPath().getBounds2D().isEmpty(); + Object[][] glyphs = new Object[256][4]; + + // map needed to lessen memory footprint for files with duplicates + // e.g. PDF.js issue 10717 + Map map = new HashMap(); + + for (int index = 0; index <= 255; index++) + { + glyphs[index][0] = index; + if (font.getEncoding().contains(index) || font.toUnicode(index) != null) + { + String name = font.getEncoding().getName(index); + glyphs[index][1] = name; + glyphs[index][2] = font.toUnicode(index); + if (isEmpty) + { + glyphs[index][3] = NO_GLYPH; + } + else if (map.containsKey(name)) + { + glyphs[index][3] = map.get(name); + } + else + { + BufferedImage image = renderType3Glyph(font, index); + map.put(name, image); + glyphs[index][3] = image; + } + totalAvailableGlyph++; + } + else + { + glyphs[index][1] = NO_GLYPH; + glyphs[index][2] = NO_GLYPH; + glyphs[index][3] = NO_GLYPH; + } + } + return glyphs; + } + + // Kindof an overkill to create a PDF for one glyph, but there is no better way at this time. + // Isn't called if no bounds are available + private BufferedImage renderType3Glyph(PDType3Font font, int index) throws IOException + { + PDDocument doc = new PDDocument(); + int scale = 1; + if (fontBBox.getWidth() < 72 || fontBBox.getHeight() < 72) + { + // e.g. T4 font of PDFBOX-2959 + scale = (int) (72 / Math.min(fontBBox.getWidth(), fontBBox.getHeight())); + } + PDPage page = new PDPage(new PDRectangle(fontBBox.getWidth() * scale, fontBBox.getHeight() * scale)); + page.setResources(resources); + + PDPageContentStream cs = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, false); + try + { + // any changes here must be done carefully and each file must be tested again + // just inverting didn't work with + // https://www.treasury.gov/ofac/downloads/sdnlist.pdf (has rotated matrix) + // also PDFBOX-4228-type3.pdf (identity matrix) + // Root/Pages/Kids/[0]/Resources/XObject/X1/Resources/XObject/X3/Resources/Font/F10 + // PDFBOX-1794-vattenfall.pdf (scale 0.001) + float scalingFactorX = font.getFontMatrix().getScalingFactorX(); + float scalingFactorY = font.getFontMatrix().getScalingFactorY(); + float translateX = scalingFactorX > 0 ? -fontBBox.getLowerLeftX() : fontBBox.getUpperRightX(); + float translateY = scalingFactorY > 0 ? -fontBBox.getLowerLeftY() : fontBBox.getUpperRightY(); + cs.transform(Matrix.getTranslateInstance(translateX * scale, translateY * scale)); + cs.beginText(); + cs.setFont(font, scale / Math.min(Math.abs(scalingFactorX), Math.abs(scalingFactorY))); + // can't use showText() because there's no guarantee we have the unicode + cs.appendRawCommands(String.format("<%02X> Tj%n", index).getBytes(Charsets.ISO_8859_1)); + cs.endText(); + cs.close(); + doc.addPage(page); + return new PDFRenderer(doc).renderImage(0); + } + finally + { + doc.close(); + } + } + + private String getEncodingName(PDType3Font font) + { + return font.getEncoding().getClass().getSimpleName() + " / " + font.getEncoding().getEncodingName(); + } + + @Override + public JPanel getPanel() + { + return view.getPanel(); + } +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/ASCIIPane.java b/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/ASCIIPane.java index ce95f1bc616..ced49920f2e 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/ASCIIPane.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/ASCIIPane.java @@ -30,6 +30,7 @@ * This class shows corresponding ASCII characters for bytes. For every 16 byte there is one line. * This paints the only visible contents at one time. */ +@SuppressWarnings({"serial","squid:S1948"}) class ASCIIPane extends JComponent implements HexModelChangeListener { private final HexModel model; diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexEditor.java b/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexEditor.java index f5fe52f6dcc..b054e32f09a 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexEditor.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexEditor.java @@ -43,8 +43,9 @@ /** * @author Khyrul Bashar * - * This class hosts all the UI components of Hex view and cordinate among them. + * This class hosts all the UI components of Hex view and coordinates among them. */ +@SuppressWarnings({"serial","squid:S1948"}) class HexEditor extends JPanel implements SelectionChangeListener { private final HexModel model; @@ -135,6 +136,7 @@ private JScrollPane getScrollPane() @Override public void actionPerformed(ActionEvent actionEvent) { + // do nothing } }; diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexModel.java b/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexModel.java index bb770c78161..677f5e90ff4 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexModel.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexModel.java @@ -23,7 +23,7 @@ /** * @author Khyrul Bashar * - * A class that acts as a model for the hex viewer. It holds the data and provide the data as ncessary. + * A class that acts as a model for the hex viewer. It holds the data and provide the data as necessary. * It'll let listen for any underlying data changes. */ class HexModel implements HexChangeListener diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexPane.java b/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexPane.java index af8fd4d0625..ecdd98f79af 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexPane.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/hexviewer/HexPane.java @@ -40,6 +40,7 @@ * HexPane shows the byte in a Grid table where every row has 16 bytes. It only draws bytes those are * only visible at a given time. */ +@SuppressWarnings("squid:S1948") class HexPane extends JPanel implements KeyListener, MouseListener, MouseMotionListener, HexModelChangeListener { private final HexModel model; @@ -170,15 +171,15 @@ private void setDefault(Graphics g) */ private int getIndexForPoint(Point point) { - if (point.x <= 20 || point.x >= (16 * HexView.CHAR_WIDTH)+20 ) + if (point.x <= 20 || point.x >= (16 * HexView.CHAR_WIDTH) + 20) { return -1; } int y = point.y; - int lineNumber = (y+ (HexView.CHAR_HEIGHT -(y % HexView.CHAR_HEIGHT)))/ HexView.CHAR_HEIGHT; + int lineNumber = (y + (HexView.CHAR_HEIGHT - (y % HexView.CHAR_HEIGHT))) / HexView.CHAR_HEIGHT; int x = point.x - 20; - int elementNumber = (x / HexView.CHAR_WIDTH); - return (lineNumber-1) * 16 + elementNumber; + int elementNumber = x / HexView.CHAR_WIDTH; + return (lineNumber - 1) * 16 + elementNumber; } /** @@ -306,7 +307,7 @@ public void keyPressed(KeyEvent keyEvent) @Override public void keyReleased(KeyEvent keyEvent) { - + // do nothing } @Override @@ -324,36 +325,37 @@ public void mouseClicked(MouseEvent mouseEvent) @Override public void mousePressed(MouseEvent mouseEvent) { - + // do nothing } @Override public void mouseReleased(MouseEvent mouseEvent) { + // do nothing } @Override public void mouseEntered(MouseEvent mouseEvent) { - + // do nothing } @Override public void mouseExited(MouseEvent mouseEvent) { - + // do nothing } @Override public void mouseDragged(MouseEvent mouseEvent) { - + // do nothing } @Override public void mouseMoved(MouseEvent mouseEvent) { - + // do nothing } private static boolean isHexChar(char c) diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java b/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java index 40b30b9930d..650760785b7 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java @@ -18,19 +18,33 @@ import java.awt.Color; import java.awt.Component; +import java.awt.Cursor; +import java.awt.Desktop; import java.awt.Font; +import java.awt.GraphicsEnvironment; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; +import java.awt.event.MouseEvent; +import java.awt.event.MouseListener; +import java.awt.event.MouseMotionListener; +import java.awt.geom.AffineTransform; import java.awt.image.BufferedImage; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; import java.util.concurrent.ExecutionException; import javax.swing.BoxLayout; -import javax.swing.ImageIcon; import javax.swing.JLabel; import javax.swing.JPanel; import javax.swing.SwingWorker; import javax.swing.event.AncestorEvent; import javax.swing.event.AncestorListener; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.debugger.ui.ImageUtil; import org.apache.pdfbox.debugger.ui.RotationMenu; @@ -38,6 +52,25 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.debugger.PDFDebugger; +import org.apache.pdfbox.debugger.ui.ErrorDialog; +import org.apache.pdfbox.debugger.ui.HighResolutionImageIcon; +import org.apache.pdfbox.debugger.ui.ImageTypeMenu; +import org.apache.pdfbox.debugger.ui.RenderDestinationMenu; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.fixup.AcroFormDefaultFixup; +import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup; +import org.apache.pdfbox.pdmodel.interactive.action.PDAction; +import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo; +import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDNamedDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDField; /** * Display the page number and a page rendering. @@ -45,21 +78,122 @@ * @author Tilman Hausherr * @author John Hewson */ -public class PagePane implements ActionListener, AncestorListener +public class PagePane implements ActionListener, AncestorListener, MouseMotionListener, MouseListener { + private static final Log LOG = LogFactory.getLog(PagePane.class); private JPanel panel; private int pageIndex = -1; private final PDDocument document; private JLabel label; private ZoomMenu zoomMenu; private RotationMenu rotationMenu; + private ImageTypeMenu imageTypeMenu; + private RenderDestinationMenu renderDestinationMenu; + private final JLabel statuslabel; + private final PDPage page; + private String labelText = ""; + private String currentURI = ""; + private final Map rectMap = new HashMap(); + private final AffineTransform defaultTransform = GraphicsEnvironment.getLocalGraphicsEnvironment(). + getDefaultScreenDevice().getDefaultConfiguration().getDefaultTransform(); - public PagePane(PDDocument document, COSDictionary page) + public PagePane(PDDocument document, COSDictionary pageDict, JLabel statuslabel) { - PDPage pdPage = new PDPage(page); - pageIndex = document.getPages().indexOf(pdPage); + page = new PDPage(pageDict); + pageIndex = document.getPages().indexOf(page); this.document = document; + this.statuslabel = statuslabel; initUI(); + initRectMap(); + } + + private void initRectMap() + { + try + { + collectFieldLocations(); + collectLinkLocations(); + } + catch (IOException ex) + { + LOG.error(ex.getMessage(), ex); + } + } + + private void collectLinkLocations() throws IOException + { + for (PDAnnotation annotation : page.getAnnotations()) + { + if (annotation instanceof PDAnnotationLink) + { + collectLinkLocation((PDAnnotationLink) annotation); + } + } + } + + private void collectLinkLocation(PDAnnotationLink linkAnnotation) throws IOException + { + PDAction action = linkAnnotation.getAction(); + if (action instanceof PDActionURI) + { + PDActionURI uriAction = (PDActionURI) action; + rectMap.put(linkAnnotation.getRectangle(), "URI: " + uriAction.getURI()); + return; + } + PDDestination destination; + if (action instanceof PDActionGoTo) + { + PDActionGoTo goToAction = (PDActionGoTo) action; + destination = goToAction.getDestination(); + } + else + { + destination = linkAnnotation.getDestination(); + } + if (destination instanceof PDNamedDestination) + { + destination = document.getDocumentCatalog(). + findNamedDestinationPage((PDNamedDestination) destination); + } + if (destination instanceof PDPageDestination) + { + PDPageDestination pageDestination = (PDPageDestination) destination; + int pageNum = pageDestination.retrievePageNumber(); + if (pageNum != -1) + { + rectMap.put(linkAnnotation.getRectangle(), "Page destination: " + (pageNum + 1)); + } + } + } + + private void collectFieldLocations() throws IOException + { + // get Acroform without applying fixups to enure that we get the original content + boolean repairSelected = PDFDebugger.repairAcroFormMenuItem.isSelected(); + PDDocumentFixup fixup = repairSelected ? new AcroFormDefaultFixup(document) : null; + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(fixup); + + if (acroForm == null) + { + return; + } + Set dictionarySet = new HashSet(); + for (PDAnnotation annotation : page.getAnnotations()) + { + dictionarySet.add(annotation.getCOSObject()); + } + for (PDField field : acroForm.getFieldTree()) + { + for (PDAnnotationWidget widget : field.getWidgets()) + { + // check if the annotation widget is on this page + // (checking widget.getPage() also works, but it is sometimes null) + if (dictionarySet.contains(widget.getCOSObject())) + { + rectMap.put(widget.getRectangle(), "Field name: " + field.getFullyQualifiedName()); + } + } + } } private void initUI() @@ -68,7 +202,14 @@ private void initUI() panel.setLayout(new BoxLayout(panel, BoxLayout.Y_AXIS)); String pageLabelText = pageIndex < 0 ? "Page number not found" : "Page " + (pageIndex + 1); - + + // append PDF page label, if available + String lbl = PDFDebugger.getPageLabel(document, pageIndex); + if (lbl != null) + { + pageLabelText += " - " + lbl; + } + JLabel pageLabel = new JLabel(pageLabelText); pageLabel.setAlignmentX(Component.CENTER_ALIGNMENT); pageLabel.setFont(new Font(Font.MONOSPACED, Font.BOLD, 30)); @@ -76,14 +217,16 @@ private void initUI() panel.add(pageLabel); label = new JLabel(); + label.addMouseMotionListener(this); + label.addMouseListener(this); label.setBackground(panel.getBackground()); label.setAlignmentX(Component.CENTER_ALIGNMENT); panel.add(label); panel.addAncestorListener(this); - // render in a background thread: rendering is read-only, so this should be ok, despite - // the fact that PDDocument is not officially thread safe - new RenderWorker(1, 0).execute(); + zoomMenu = ZoomMenu.getInstance(); + zoomMenu.changeZoomSelection(zoomMenu.getPageZoomScale()); + startRendering(); } /** @@ -100,24 +243,55 @@ public Component getPanel() public void actionPerformed(ActionEvent actionEvent) { String actionCommand = actionEvent.getActionCommand(); - if (ZoomMenu.isZoomMenu(actionCommand) || RotationMenu.isRotationMenu(actionCommand)) + if (actionEvent.getSource() == PDFDebugger.repairAcroFormMenuItem) + { + boolean repairSelected = PDFDebugger.repairAcroFormMenuItem.isSelected(); + PDDocumentFixup fixup = repairSelected ? new AcroFormDefaultFixup(document) : null; + document.getDocumentCatalog().getAcroForm(fixup); + startRendering(); + } + else if (ZoomMenu.isZoomMenu(actionCommand) || + RotationMenu.isRotationMenu(actionCommand) || + ImageTypeMenu.isImageTypeMenu(actionCommand) || + RenderDestinationMenu.isRenderDestinationMenu(actionCommand) || + actionEvent.getSource() == PDFDebugger.allowSubsampling) { - new RenderWorker(ZoomMenu.getZoomScale(), RotationMenu.getRotationDegrees()).execute(); + startRendering(); + zoomMenu.setPageZoomScale(ZoomMenu.getZoomScale()); } } + private void startRendering() + { + // render in a background thread: rendering is read-only, so this should be ok, despite + // the fact that PDDocument is not officially thread safe + new RenderWorker().execute(); + zoomMenu.setPageZoomScale(ZoomMenu.getZoomScale()); + } + @Override public void ancestorAdded(AncestorEvent ancestorEvent) { - zoomMenu = ZoomMenu.getInstance(); zoomMenu.addMenuListeners(this); - zoomMenu.setZoomSelection(ZoomMenu.ZOOM_100_PERCENT); zoomMenu.setEnableMenu(true); rotationMenu = RotationMenu.getInstance(); rotationMenu.addMenuListeners(this); - rotationMenu.setRotationSelection(RotationMenu.ROTATE_0_DEGREES); rotationMenu.setEnableMenu(true); + + imageTypeMenu = ImageTypeMenu.getInstance(); + imageTypeMenu.addMenuListeners(this); + imageTypeMenu.setEnableMenu(true); + + renderDestinationMenu = RenderDestinationMenu.getInstance(); + renderDestinationMenu.addMenuListeners(this); + renderDestinationMenu.setEnableMenu(true); + + PDFDebugger.allowSubsampling.setEnabled(true); + PDFDebugger.allowSubsampling.addActionListener(this); + + PDFDebugger.repairAcroFormMenuItem.setEnabled(true); + PDFDebugger.repairAcroFormMenuItem.addActionListener(this); } @Override @@ -125,34 +299,157 @@ public void ancestorRemoved(AncestorEvent ancestorEvent) { zoomMenu.setEnableMenu(false); rotationMenu.setEnableMenu(false); + imageTypeMenu.setEnableMenu(false); + renderDestinationMenu.setEnableMenu(false); + + PDFDebugger.allowSubsampling.setEnabled(false); + PDFDebugger.allowSubsampling.removeActionListener(this); } @Override public void ancestorMoved(AncestorEvent ancestorEvent) { + // do nothing + } + + @Override + public void mouseDragged(MouseEvent e) + { + // do nothing } /** - * Note that PDDocument is not officially thread safe, caution advised. + * Catch mouse event to display cursor position in PDF coordinates in the status bar. + * + * @param e mouse event with position */ - private final class RenderWorker extends SwingWorker + @Override + public void mouseMoved(MouseEvent e) { - private final float scale; - private final int rotation; + PDRectangle cropBox = page.getCropBox(); + float height = cropBox.getHeight(); + float width = cropBox.getWidth(); + float offsetX = cropBox.getLowerLeftX(); + float offsetY = cropBox.getLowerLeftY(); + float zoomScale = zoomMenu.getPageZoomScale(); + float x = e.getX() / zoomScale * (float) defaultTransform.getScaleX(); + float y = e.getY() / zoomScale * (float) defaultTransform.getScaleY(); + int x1; + int y1; + switch ((RotationMenu.getRotationDegrees() + page.getRotation()) % 360) + { + case 90: + x1 = (int) (y + offsetX); + y1 = (int) (x + offsetY); + break; + case 180: + x1 = (int) (width - x + offsetX); + y1 = (int) (y - offsetY); + break; + case 270: + x1 = (int) (width - y + offsetX); + y1 = (int) (height - x + offsetY); + break; + case 0: + default: + x1 = (int) (x + offsetX); + y1 = (int) (height - y + offsetY); + break; + } + String text = "x: " + x1 + ", y: " + y1; - private RenderWorker(float scale, int rotation) + // are we in a field widget or a link annotation? + Cursor cursor = Cursor.getDefaultCursor(); + currentURI = ""; + for (Map.Entry entry : rectMap.entrySet()) { - this.scale = scale; - this.rotation = rotation; + if (entry.getKey().contains(x1, y1)) + { + String s = rectMap.get(entry.getKey()); + text += ", " + s; + if (s.startsWith("URI: ")) + { + currentURI = s.substring(5); + cursor = Cursor.getPredefinedCursor(Cursor.HAND_CURSOR); + } + break; + } } + panel.setCursor(cursor); + + statuslabel.setText(text); + } + @Override + public void mouseClicked(MouseEvent e) + { + if (!currentURI.isEmpty() && + Desktop.isDesktopSupported() && Desktop.getDesktop().isSupported(Desktop.Action.BROWSE)) + { + try + { + Desktop.getDesktop().browse(new URI(currentURI)); + } + catch (URISyntaxException ex) + { + new ErrorDialog(ex).setVisible(true); + } + catch (IOException ex) + { + new ErrorDialog(ex).setVisible(true); + } + } + } + + @Override + public void mousePressed(MouseEvent e) + { + // do nothing + } + + @Override + public void mouseReleased(MouseEvent e) + { + // do nothing + } + + @Override + public void mouseEntered(MouseEvent e) + { + // do nothing + } + + @Override + public void mouseExited(MouseEvent e) + { + statuslabel.setText(labelText); + } + + /** + * Note that PDDocument is not officially thread safe, caution advised. + */ + private final class RenderWorker extends SwingWorker + { @Override protected BufferedImage doInBackground() throws IOException { + // rendering can take a long time, so remember all options that are used later + float scale = ZoomMenu.getZoomScale(); + int rotation = RotationMenu.getRotationDegrees(); + label.setIcon(null); - label.setText("Loading..."); + labelText = "Rendering..."; + label.setText(labelText); + PDFRenderer renderer = new PDFRenderer(document); - BufferedImage bim = renderer.renderImage(pageIndex, scale); + renderer.setSubsamplingAllowed(PDFDebugger.allowSubsampling.isSelected()); + + long t0 = System.currentTimeMillis(); + statuslabel.setText(labelText); + BufferedImage bim = renderer.renderImage(pageIndex, scale, ImageTypeMenu.getImageType(), RenderDestinationMenu.getRenderDestination()); + float t = (System.currentTimeMillis() - t0) / 1000f; + labelText = "Rendered in " + t + " second" + (t > 1 ? "s" : ""); + statuslabel.setText(labelText); return ImageUtil.getRotatedImage(bim, rotation); } @@ -161,7 +458,16 @@ protected void done() { try { - label.setIcon(new ImageIcon(get())); + BufferedImage image = get(); + + // We cannot use "label.setIcon(new ImageIcon(get()))" here + // because of blurry upscaling in JDK9. Instead, the label is now created with + // a smaller size than the image to compensate that the + // image is scaled up with some screen configurations (e.g. 125% on windows). + // See PDFBOX-3665 for more sample code and discussion. + label.setSize((int) Math.ceil(image.getWidth() / defaultTransform.getScaleX()), + (int) Math.ceil(image.getHeight() / defaultTransform.getScaleY())); + label.setIcon(new HighResolutionImageIcon(image, label.getWidth(), label.getHeight())); label.setText(null); } catch (InterruptedException e) diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/OperatorMarker.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/OperatorMarker.java index 3bbf8991a36..b6546220dd3 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/OperatorMarker.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/OperatorMarker.java @@ -24,19 +24,29 @@ import javax.swing.text.StyleConstants; import javax.swing.text.StyleContext; +import org.apache.pdfbox.contentstream.operator.OperatorName; + /** * @author Khyrul Bashar */ final class OperatorMarker { - public static final String BEGIN_TEXT_OBJECT = "BT"; - public static final String END_TEXT_OBJECT = "ET"; - public static final String SAVE_GRAPHICS_STATE = "q"; - public static final String RESTORE_GRAPHICS_STATE = "Q"; - public static final String CONCAT = "cm"; - public static final String INLINE_IMAGE_BEGIN = "BI"; - public static final String IMAGE_DATA = "ID"; - public static final String INLINE_IMAGE_END = "EI"; + @Deprecated + public static final String BEGIN_TEXT_OBJECT = OperatorName.BEGIN_TEXT; + @Deprecated + public static final String END_TEXT_OBJECT = OperatorName.END_TEXT; + @Deprecated + public static final String SAVE_GRAPHICS_STATE = OperatorName.SAVE; + @Deprecated + public static final String RESTORE_GRAPHICS_STATE = OperatorName.RESTORE; + @Deprecated + public static final String CONCAT = OperatorName.CONCAT; + @Deprecated + public static final String INLINE_IMAGE_BEGIN = OperatorName.BEGIN_INLINE_IMAGE; + @Deprecated + public static final String IMAGE_DATA = OperatorName.BEGIN_INLINE_IMAGE_DATA; + @Deprecated + public static final String INLINE_IMAGE_END = OperatorName.END_INLINE_IMAGE; private static final Map operatorStyleMap; @@ -64,14 +74,14 @@ final class OperatorMarker Map styleMap = new HashMap(); - styleMap.put(BEGIN_TEXT_OBJECT, textObjectStyle); - styleMap.put(END_TEXT_OBJECT, textObjectStyle); - styleMap.put(SAVE_GRAPHICS_STATE, graphicsStyle); - styleMap.put(RESTORE_GRAPHICS_STATE, graphicsStyle); - styleMap.put(CONCAT, concatStyle); - styleMap.put(INLINE_IMAGE_BEGIN, inlineImage); - styleMap.put(IMAGE_DATA, imageData); - styleMap.put(INLINE_IMAGE_END, inlineImage); + styleMap.put(OperatorName.BEGIN_TEXT, textObjectStyle); + styleMap.put(OperatorName.END_TEXT, textObjectStyle); + styleMap.put(OperatorName.SAVE, graphicsStyle); + styleMap.put(OperatorName.RESTORE, graphicsStyle); + styleMap.put(OperatorName.CONCAT, concatStyle); + styleMap.put(OperatorName.BEGIN_INLINE_IMAGE, inlineImage); + styleMap.put(OperatorName.BEGIN_INLINE_IMAGE_DATA, imageData); + styleMap.put(OperatorName.END_INLINE_IMAGE, inlineImage); operatorStyleMap = styleMap; } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/Stream.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/Stream.java index c6a295a21f7..e234c53cc1f 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/Stream.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/Stream.java @@ -23,6 +23,8 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; @@ -35,16 +37,20 @@ /** * @author Khyrul Bashar * - * A class that provides the COSStream in different version and related informations. + * A class that provides the COSStream in various views and related information. */ public class Stream { + private static final Log LOG = LogFactory.getLog(Stream.class); + public static final String UNFILTERED = "Unfiltered"; public static final String IMAGE = "Image"; - private final COSStream stream; + private final COSStream strm; private final boolean isThumb; private final boolean isImage; + private final boolean isXmlMetadata; + private final Map> filters; /** @@ -55,9 +61,10 @@ public class Stream */ Stream(COSStream cosStream, boolean isThumb) { - this.stream = cosStream; + this.strm = cosStream; this.isThumb = isThumb; this.isImage = isImageStream(cosStream, isThumb); + this.isXmlMetadata = isXmlMetadataStream(cosStream); filters = createFilterList(cosStream); } @@ -71,6 +78,16 @@ public boolean isImage() { return isImage; } + + /** + * Return if this is stream is an Metadata stream. + * + * @return true if this a metadata stream and false otherwise. + */ + public boolean isXmlMetadata() + { + return isXmlMetadata; + } /** * Return the available filter list. Only "Unfiltered" is returned if there is no filter and in @@ -80,12 +97,7 @@ public boolean isImage() */ public List getFilterList() { - List list = new ArrayList(); - for (Map.Entry> entry : filters.entrySet()) - { - list.add(entry.getKey()); - } - return list; + return new ArrayList(filters.keySet()); } /** @@ -94,7 +106,7 @@ public List getFilterList() private String getFilteredLabel() { StringBuilder sb = new StringBuilder(); - COSBase base = stream.getFilters(); + COSBase base = strm.getFilters(); if (base instanceof COSName) { sb.append(((COSName) base).getName()); @@ -126,20 +138,20 @@ public InputStream getStream(String key) { if (UNFILTERED.equals(key)) { - return stream.createInputStream(); + return strm.createInputStream(); } else if (getFilteredLabel().equals(key)) { - return stream.createRawInputStream(); + return strm.createRawInputStream(); } else { - return new PDStream(stream).createInputStream(filters.get(key)); + return new PDStream(strm).createInputStream(filters.get(key)); } } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } return null; } @@ -157,17 +169,17 @@ public BufferedImage getImage(PDResources resources) PDImageXObject imageXObject; if (isThumb) { - imageXObject = PDImageXObject.createThumbnail(stream); + imageXObject = PDImageXObject.createThumbnail(strm); } else { - imageXObject = new PDImageXObject(new PDStream(stream), resources); + imageXObject = new PDImageXObject(new PDStream(strm), resources); } return imageXObject.getImage(); } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } return null; } @@ -199,12 +211,12 @@ private Map> createFilterList(COSStream stream) private String getPartialStreamCommand(final int indexOfStopFilter) { - List avaiablrFilters = new PDStream(stream).getFilters(); + List availableFilters = new PDStream(strm).getFilters(); StringBuilder nameListBuilder = new StringBuilder(); - for (int i = indexOfStopFilter; i < avaiablrFilters.size(); i++) + for (int i = indexOfStopFilter; i < availableFilters.size(); i++) { - nameListBuilder.append(avaiablrFilters.get(i).getName()).append(" & "); + nameListBuilder.append(availableFilters.get(i).getName()).append(" & "); } nameListBuilder.delete(nameListBuilder.lastIndexOf("&"), nameListBuilder.length()); @@ -213,10 +225,10 @@ private String getPartialStreamCommand(final int indexOfStopFilter) private List getStopFilterList(final int stopFilterIndex) { - List avaiablrFilters = new PDStream(stream).getFilters(); + List availableFilters = new PDStream(strm).getFilters(); final List stopFilters = new ArrayList(1); - stopFilters.add(avaiablrFilters.get(stopFilterIndex).getName()); + stopFilters.add(availableFilters.get(stopFilterIndex).getName()); return stopFilters; } @@ -229,4 +241,9 @@ private boolean isImageStream(COSDictionary dic, boolean isThumb) } return dic.containsKey(COSName.SUBTYPE) && dic.getCOSName(COSName.SUBTYPE).equals(COSName.IMAGE); } + + private boolean isXmlMetadataStream(COSDictionary dic) + { + return dic.containsKey(COSName.SUBTYPE) && dic.getCOSName(COSName.SUBTYPE).equals(COSName.getPDFName("XML")); + } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamImageView.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamImageView.java index 6accf5ffc56..a2d6c423a7c 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamImageView.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamImageView.java @@ -19,13 +19,15 @@ import java.awt.Color; import java.awt.Component; import java.awt.Dimension; +import java.awt.GraphicsEnvironment; import java.awt.Image; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; +import java.awt.geom.AffineTransform; import java.awt.image.BufferedImage; + import javax.swing.Box; import javax.swing.BoxLayout; -import javax.swing.ImageIcon; import javax.swing.JComponent; import javax.swing.JLabel; import javax.swing.JPanel; @@ -34,6 +36,7 @@ import javax.swing.event.AncestorEvent; import javax.swing.event.AncestorListener; +import org.apache.pdfbox.debugger.ui.HighResolutionImageIcon; import org.apache.pdfbox.debugger.ui.ImageUtil; import org.apache.pdfbox.debugger.ui.RotationMenu; import org.apache.pdfbox.debugger.ui.ZoomMenu; @@ -66,10 +69,13 @@ private void initUI() JPanel panel = new JPanel(); panel.setLayout(new BoxLayout(panel, BoxLayout.Y_AXIS)); + zoomMenu = ZoomMenu.getInstance(); + zoomMenu.changeZoomSelection(zoomMenu.getImageZoomScale()); + label = new JLabel(); label.setBorder(new LineBorder(Color.BLACK)); label.setAlignmentX(Component.CENTER_ALIGNMENT); - label.setIcon(new ImageIcon(image)); + addImage(zoomImage(image, zoomMenu.getImageZoomScale(), RotationMenu.getRotationDegrees())); panel.add(Box.createVerticalGlue()); panel.add(label); @@ -95,7 +101,7 @@ private Image zoomImage(BufferedImage origin, float scale, int rotation) BufferedImage rotatedImage = ImageUtil.getRotatedImage(origin, rotation); int resizedWidth = (int) (rotatedImage.getWidth() * scale); int resizedHeight = (int) (rotatedImage.getHeight() * scale); - return rotatedImage.getScaledInstance(resizedWidth, resizedHeight, BufferedImage.SCALE_SMOOTH); + return rotatedImage.getScaledInstance(resizedWidth, resizedHeight, Image.SCALE_SMOOTH); } @Override @@ -105,21 +111,25 @@ public void actionPerformed(ActionEvent actionEvent) if (ZoomMenu.isZoomMenu(actionCommand) || RotationMenu.isRotationMenu(actionCommand)) { addImage(zoomImage(image, ZoomMenu.getZoomScale(), RotationMenu.getRotationDegrees())); + zoomMenu.setImageZoomScale(ZoomMenu.getZoomScale()); } } private void addImage(Image img) { - label.setIcon(new ImageIcon(img)); + // for JDK9; see explanation in PagePane + AffineTransform tx = GraphicsEnvironment.getLocalGraphicsEnvironment(). + getDefaultScreenDevice().getDefaultConfiguration().getDefaultTransform(); + label.setSize((int) Math.ceil(img.getWidth(null) / tx.getScaleX()), + (int) Math.ceil(img.getHeight(null) / tx.getScaleY())); + label.setIcon(new HighResolutionImageIcon(img, label.getWidth(), label.getHeight())); label.revalidate(); } @Override public void ancestorAdded(AncestorEvent ancestorEvent) { - zoomMenu = ZoomMenu.getInstance(); zoomMenu.addMenuListeners(this); - zoomMenu.setZoomSelection(ZoomMenu.ZOOM_100_PERCENT); zoomMenu.setEnableMenu(true); rotationMenu = RotationMenu.getInstance(); diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamPane.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamPane.java index fa2c0cf1edf..22f0dd4b079 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamPane.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamPane.java @@ -26,11 +26,12 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.util.List; import java.util.Map; +import java.util.Vector; import java.util.concurrent.ExecutionException; -import javax.imageio.ImageIO; import javax.swing.BoxLayout; import javax.swing.JComboBox; import javax.swing.JComponent; @@ -44,13 +45,25 @@ import javax.swing.text.StyleConstants; import javax.swing.text.StyleContext; import javax.swing.text.StyledDocument; +import javax.xml.XMLConstants; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSBoolean; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; @@ -60,6 +73,8 @@ import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.util.Charsets; +import org.apache.pdfbox.util.XMLUtil; +import org.w3c.dom.Document; /** * @author Khyrul Bashar @@ -68,16 +83,28 @@ */ public class StreamPane implements ActionListener { - public static final String BEGIN_TEXT_OBJECT = "BT"; - public static final String END_TEXT_OBJECT = "ET"; - public static final String SAVE_GRAPHICS_STATE = "q"; - public static final String RESTORE_GRAPHICS_STATE = "Q"; - public static final String INLINE_IMAGE_BEGIN = "BI"; - public static final String IMAGE_DATA = "ID"; - public static final String INLINE_IMAGE_END = "EI"; - public static final String BEGIN_MARKED_CONTENT1 = "BMC"; - public static final String BEGIN_MARKED_CONTENT2 = "BDC"; - public static final String END_MARKED_CONTENT = "EMC"; + private static final Log LOG = LogFactory.getLog(StreamPane.class); + + @Deprecated + public static final String BEGIN_TEXT_OBJECT = OperatorName.BEGIN_TEXT; + @Deprecated + public static final String END_TEXT_OBJECT = OperatorName.END_TEXT; + @Deprecated + public static final String SAVE_GRAPHICS_STATE = OperatorName.SAVE; + @Deprecated + public static final String RESTORE_GRAPHICS_STATE = OperatorName.RESTORE; + @Deprecated + public static final String INLINE_IMAGE_BEGIN = OperatorName.BEGIN_INLINE_IMAGE; + @Deprecated + public static final String IMAGE_DATA = OperatorName.BEGIN_INLINE_IMAGE_DATA; + @Deprecated + public static final String INLINE_IMAGE_END = OperatorName.END_INLINE_IMAGE; + @Deprecated + public static final String BEGIN_MARKED_CONTENT1 = OperatorName.BEGIN_MARKED_CONTENT; + @Deprecated + public static final String BEGIN_MARKED_CONTENT2 = OperatorName.BEGIN_MARKED_CONTENT_SEQ; + @Deprecated + public static final String END_MARKED_CONTENT = OperatorName.END_MARKED_CONTENT; private static final StyleContext CONTEXT = StyleContext.getDefaultStyleContext(); private static final Style OPERATOR_STYLE = CONTEXT.addStyle("operator", null); @@ -100,11 +127,11 @@ public class StreamPane implements ActionListener private final JPanel panel; private final HexView hexView; private final JTabbedPane tabbedPane; - private final StreamPaneView view; + private final StreamPaneView rawView; + private final StreamPaneView niceView; private final Stream stream; private ToolTipController tTController; private PDResources resources; - private final boolean isContentStream; /** * Constructor. @@ -118,8 +145,6 @@ public class StreamPane implements ActionListener public StreamPane(COSStream cosStream, boolean isContentStream, boolean isThumb, COSDictionary resourcesDic) throws IOException { - this.isContentStream = isContentStream; - this.stream = new Stream(cosStream, isThumb); if (resourcesDic != null) { @@ -131,8 +156,16 @@ public StreamPane(COSStream cosStream, boolean isContentStream, boolean isThumb, panel.setPreferredSize(new Dimension(300, 500)); panel.setLayout(new BoxLayout(panel, BoxLayout.Y_AXIS)); - view = new StreamPaneView(); + rawView = new StreamPaneView(); hexView = new HexView(); + if (isContentStream || stream.isXmlMetadata()) + { + niceView = new StreamPaneView(); + } + else + { + niceView = null; + } if (stream.isImage()) { @@ -146,8 +179,21 @@ public StreamPane(COSStream cosStream, boolean isContentStream, boolean isThumb, } tabbedPane = new JTabbedPane(); - tabbedPane.add("Text view", view.getStreamPanel()); - tabbedPane.add("Hex view", hexView.getPane()); + if (stream.isImage()) + { + tabbedPane.add("Image view", rawView.getStreamPanel()); + } + else if (niceView != null) + { + tabbedPane.add("Nice view", niceView.getStreamPanel()); + tabbedPane.add("Raw view", rawView.getStreamPanel()); + tabbedPane.add("Hex view", hexView.getPane()); + } + else + { + tabbedPane.add("Text view", rawView.getStreamPanel()); + tabbedPane.add("Hex view", hexView.getPane()); + } panel.add(tabbedPane); } @@ -159,7 +205,7 @@ public JComponent getPanel() private JPanel createHeaderPanel(List availableFilters, String i, ActionListener actionListener) { - JComboBox filters = new JComboBox(availableFilters.toArray()); + JComboBox filters = new JComboBox(new Vector(availableFilters)); filters.setSelectedItem(i); filters.addActionListener(actionListener); @@ -172,7 +218,7 @@ private JPanel createHeaderPanel(List availableFilters, String i, Action @Override public void actionPerformed(ActionEvent actionEvent) { - if (actionEvent.getActionCommand().equals("comboBoxChanged")) + if ("comboBoxChanged".equals(actionEvent.getActionCommand())) { JComboBox comboBox = (JComboBox) actionEvent.getSource(); String currentFilter = (String) comboBox.getSelectedItem(); @@ -182,18 +228,32 @@ public void actionPerformed(ActionEvent actionEvent) if (currentFilter.equals(Stream.IMAGE)) { requestImageShowing(); + tabbedPane.removeAll(); + tabbedPane.add("Image view", rawView.getStreamPanel()); return; } + tabbedPane.removeAll(); + if (Stream.UNFILTERED.equals(currentFilter) && niceView != null) + { + tabbedPane.add("Nice view", niceView.getStreamPanel()); + tabbedPane.add("Raw view", rawView.getStreamPanel()); + tabbedPane.add("Hex view", hexView.getPane()); + } + else + { + tabbedPane.add("Text view", rawView.getStreamPanel()); + tabbedPane.add("Hex view", hexView.getPane()); + } requestStreamText(currentFilter); } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } } - private void requestImageShowing() throws IOException + private void requestImageShowing() { if (stream.isImage()) { @@ -207,20 +267,17 @@ private void requestImageShowing() throws IOException JOptionPane.showMessageDialog(panel, "image not available (filter missing?)"); return; } - view.showStreamImage(image); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ImageIO.write(image, "jpg", baos); - baos.flush(); - byte[] bytes = baos.toByteArray(); - baos.close(); - hexView.changeData(bytes); + rawView.showStreamImage(image); } } private void requestStreamText(String command) throws IOException { - new DocumentCreator(command).execute(); + new DocumentCreator(rawView, command, false).execute(); + if (niceView != null) + { + new DocumentCreator(niceView, command, true).execute(); + } synchronized (stream) { InputStream is = stream.getStream(command); @@ -238,31 +295,45 @@ private void requestStreamText(String command) throws IOException */ private final class DocumentCreator extends SwingWorker { + private final StreamPaneView targetView; private final String filterKey; + private final boolean nice; private int indent; private boolean needIndent; - private DocumentCreator(String filterKey) + private DocumentCreator(StreamPaneView targetView, String filterKey, boolean nice) { + this.targetView = targetView; this.filterKey = filterKey; + this.nice = nice; } @Override protected StyledDocument doInBackground() { + // default encoding to use when reading text base content + String encoding = "ISO-8859-1"; synchronized (stream) { + if (stream.isXmlMetadata()) + { + encoding = "UTF-8"; + } InputStream inputStream = stream.getStream(filterKey); - if (isContentStream && Stream.UNFILTERED.equals(filterKey)) + if (nice && Stream.UNFILTERED.equals(filterKey)) { + if (stream.isXmlMetadata()) + { + return getXMLDocument(inputStream, encoding); + } StyledDocument document = getContentStreamDocument(inputStream); if (document != null) { return document; } - return getDocument(stream.getStream(filterKey)); + return getDocument(inputStream, encoding); } - return getDocument(inputStream); + return getDocument(inputStream, encoding); } } @@ -271,58 +342,102 @@ protected void done() { try { - view.showStreamText(get(), tTController); + targetView.showStreamText(get(), tTController); } catch (InterruptedException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } catch (ExecutionException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } - private String getStringOfStream(InputStream ioStream) + private String getStringOfStream(InputStream in, String encoding) { - ByteArrayOutputStream byteArray = new ByteArrayOutputStream(); - byte[] buffer = new byte[1024]; - int amountRead; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { - while ((amountRead = ioStream.read(buffer, 0, buffer.length)) != -1) - { - byteArray.write(buffer, 0, amountRead); - } + IOUtils.copy(in, baos); + return baos.toString(encoding); } catch (IOException e) { - e.printStackTrace(); - } - try - { - return byteArray.toString("ISO-8859-1"); - } - catch (UnsupportedEncodingException e) - { - e.printStackTrace(); + LOG.error(e.getMessage(), e); return null; } } - private StyledDocument getDocument(InputStream inputStream) + private StyledDocument getDocument(InputStream inputStream, String encoding) { StyledDocument docu = new DefaultStyledDocument(); if (inputStream != null) { - String data = getStringOfStream(inputStream); + String data = getStringOfStream(inputStream, encoding); + + // CR is not displayed in the raw view (see file from PDFBOX-4964), + // but LF is displayed, so lets first replace CR LF with LF and then + // replace the remaining CRs with LF + if (data != null) + { + data = data.replace("\r\n", "\n").replace('\r', '\n'); + } + try { docu.insertString(0, data, null); } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); + } + } + return docu; + } + + private StyledDocument getXMLDocument(InputStream inputStream, String encoding) + { + StyledDocument docu = new DefaultStyledDocument(); + if (inputStream != null) + { + try + { + Document doc = XMLUtil.parse(inputStream); + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + // XMLConstants.ACCESS_EXTERNAL_DTD in jdk 1.7 + transformerFactory.setAttribute("http://javax.xml.XMLConstants/property/accessExternalDTD", ""); + // XMLConstants.ACCESS_EXTERNAL_STYLESHEET in jdk 1.7 + transformerFactory.setAttribute("http://javax.xml.XMLConstants/property/accessExternalStylesheet", ""); + Transformer transformer = transformerFactory.newTransformer(); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "1"); + StringWriter sw = new StringWriter(); + StreamResult result = new StreamResult(sw); + DOMSource source = new DOMSource(doc); + transformer.transform(source, result); + docu.insertString(0, sw.toString(), null); + } + catch (UnsupportedEncodingException ex) + { + LOG.error(ex.getMessage(), ex); + } + catch (TransformerConfigurationException ex) + { + LOG.error(ex.getMessage(), ex); + } + catch (TransformerException ex) + { + LOG.error(ex.getMessage(), ex); + } + catch (BadLocationException ex) + { + LOG.error(ex.getMessage(), ex); + } + catch (IOException ex) + { + LOG.error(ex.getMessage(), ex); } } return docu; @@ -365,7 +480,7 @@ private void writeToken(Object obj, StyledDocument docu) } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } @@ -414,7 +529,7 @@ else if (chr == '(' || chr == ')' || chr == '\n' || chr == '\r' || } else { - String str = "" + (char)chr; + String str = Character.toString((char) chr); docu.insertString(docu.getLength(), str, STRING_STYLE); } } @@ -444,6 +559,10 @@ else if (obj instanceof COSDictionary) } docu.insertString(docu.getLength(), ">> ", null); } + else if (obj instanceof COSNull) + { + docu.insertString(docu.getLength(), "null ", null); + } else { String str = obj.toString(); @@ -456,17 +575,17 @@ private void addOperators(Object obj, StyledDocument docu) throws BadLocationExc { Operator op = (Operator) obj; - if (op.getName().equals(END_TEXT_OBJECT) - || op.getName().equals(RESTORE_GRAPHICS_STATE) - || op.getName().equals(END_MARKED_CONTENT)) + if (op.getName().equals(OperatorName.END_TEXT) + || op.getName().equals(OperatorName.RESTORE) + || op.getName().equals(OperatorName.END_MARKED_CONTENT)) { indent--; } writeIndent(docu); - if (op.getName().equals(INLINE_IMAGE_BEGIN)) + if (op.getName().equals(OperatorName.BEGIN_INLINE_IMAGE)) { - docu.insertString(docu.getLength(), INLINE_IMAGE_BEGIN + "\n", OPERATOR_STYLE); + docu.insertString(docu.getLength(), OperatorName.BEGIN_INLINE_IMAGE + "\n", OPERATOR_STYLE); COSDictionary dic = op.getImageParameters(); for (COSName key : dic.keySet()) { @@ -476,10 +595,10 @@ private void addOperators(Object obj, StyledDocument docu) throws BadLocationExc docu.insertString(docu.getLength(), "\n", null); } String imageString = new String(op.getImageData(), Charsets.ISO_8859_1); - docu.insertString(docu.getLength(), IMAGE_DATA + "\n", INLINE_IMAGE_STYLE); + docu.insertString(docu.getLength(), OperatorName.BEGIN_INLINE_IMAGE_DATA + "\n", INLINE_IMAGE_STYLE); docu.insertString(docu.getLength(), imageString, null); docu.insertString(docu.getLength(), "\n", null); - docu.insertString(docu.getLength(), INLINE_IMAGE_END + "\n", OPERATOR_STYLE); + docu.insertString(docu.getLength(), OperatorName.END_INLINE_IMAGE + "\n", OPERATOR_STYLE); } else { @@ -487,10 +606,10 @@ private void addOperators(Object obj, StyledDocument docu) throws BadLocationExc docu.insertString(docu.getLength(), operator + "\n", OPERATOR_STYLE); // nested opening operators - if (op.getName().equals(BEGIN_TEXT_OBJECT) || - op.getName().equals(SAVE_GRAPHICS_STATE) || - op.getName().equals(BEGIN_MARKED_CONTENT1) || - op.getName().equals(BEGIN_MARKED_CONTENT2)) + if (op.getName().equals(OperatorName.BEGIN_TEXT) || + op.getName().equals(OperatorName.SAVE) || + op.getName().equals(OperatorName.BEGIN_MARKED_CONTENT) || + op.getName().equals(OperatorName.BEGIN_MARKED_CONTENT_SEQ)) { indent++; } @@ -510,4 +629,4 @@ void writeIndent(StyledDocument docu) throws BadLocationException } } } -} +} \ No newline at end of file diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamTextView.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamTextView.java index b0fbda09276..25f35256bc4 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamTextView.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/StreamTextView.java @@ -89,6 +89,7 @@ JComponent getView() @Override public void mouseDragged(MouseEvent mouseEvent) { + // do nothing } @Override @@ -126,5 +127,6 @@ public void ancestorRemoved(AncestorEvent ancestorEvent) @Override public void ancestorMoved(AncestorEvent ancestorEvent) { + // do nothing } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/FontToolTip.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/FontToolTip.java index 20c41a0f0ef..30bfd09b0b3 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/FontToolTip.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/FontToolTip.java @@ -18,16 +18,19 @@ package org.apache.pdfbox.debugger.streampane.tooltip; import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.font.PDFont; /** * @author Khyrul Bashar - * A class that provieds tooltip text for font. This shows the name of the font. + * A class that provides tooltip text for font. This shows the name of the font. */ final class FontToolTip implements ToolTip { + private static final Log LOG = LogFactory.getLog(FontToolTip.class); private String markup; /** @@ -54,7 +57,7 @@ private void initUI(String fontReferenceName, PDResources resources) } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/KToolTip.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/KToolTip.java index ced245938b8..1ffdab4a886 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/KToolTip.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/KToolTip.java @@ -23,6 +23,8 @@ import java.io.IOException; import java.io.InputStream; import java.net.URL; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; /** @@ -30,8 +32,9 @@ * A class that provide tooltip for K and k. */ final class KToolTip extends ColorToolTip - { + private static final Log LOG = LogFactory.getLog(KToolTip.class); + /** * Constructor. * @param rowText String instance. @@ -53,7 +56,7 @@ private void createMarkUp(String rowText) } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } } @@ -77,9 +80,9 @@ ICC_Profile getICCProfile() throws IOException // Instead, the "ISO Coated v2 300% (basICColor)" is used, which // is an open alternative to the "ISO Coated v2 300% (ECI)" profile. - String name = "org/apache/pdfbox/resources/icc/ISOcoated_v2_300_bas.icc"; + String name = "/org/apache/pdfbox/resources/icc/ISOcoated_v2_300_bas.icc"; - URL url = PDDeviceCMYK.class.getClassLoader().getResource(name); + URL url = PDDeviceCMYK.class.getResource(name); if (url == null) { throw new IOException("Error loading resource: " + name); diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/SCNToolTip.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/SCNToolTip.java index 96566444fc0..c862c319c9c 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/SCNToolTip.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/SCNToolTip.java @@ -19,9 +19,12 @@ import java.awt.Color; import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDPattern; /** * @author Khyrul Bashar @@ -29,6 +32,8 @@ */ final class SCNToolTip extends ColorToolTip { + private static final Log LOG = LogFactory.getLog(SCNToolTip.class); + /** * Constructor. * @param rowText String instance. @@ -47,7 +52,12 @@ private void createMarkUp(PDResources resources, String colorSpaceName, String r } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); + } + if (colorSpace instanceof PDPattern) + { + setToolTipText("Pattern"); + return; } if (colorSpace != null) { @@ -62,7 +72,7 @@ private void createMarkUp(PDResources resources, String colorSpaceName, String r } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/ToolTipController.java b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/ToolTipController.java index 8ac91aecf65..18f1b9efb6f 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/ToolTipController.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/streampane/tooltip/ToolTipController.java @@ -22,6 +22,9 @@ import javax.swing.text.BadLocationException; import javax.swing.text.JTextComponent; import javax.swing.text.Utilities; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.pdmodel.PDResources; interface ToolTip @@ -31,21 +34,11 @@ interface ToolTip /** * @author Khyrul Bashar - * A class that provieds the tooltip for an operator. + * A class that provides the tooltip for an operator. */ public class ToolTipController { - private static final String FONT_OPERATOR = "Tf"; - private static final String STROKING_COLOR = "SCN"; - private static final String STROKING_COLOR_SPACE = "CS"; - private static final String NON_STROKING_COLOR_SPACE = "cs"; - private static final String NON_STROKING_COLOR = "scn"; - private static final String RGB_STROKING_COLOR = "RG"; - private static final String RGB_NON_STROKING_COLOR = "rg"; - private static final String CMYK_STROKING_COLOR = "K"; - private static final String CMYK_NON_STROKING_COLOR = "k"; - private static final String GRAY_STROKING_COLOR = "G"; - private static final String GRAY_NON_STROKING_COLOR = "g"; + private static final Log LOG = LogFactory.getLog(ToolTipController.class); private final PDResources resources; private JTextComponent textComponent; @@ -89,40 +82,40 @@ public String getToolTip(int offset, JTextComponent textComponent) if (word != null) { ToolTip toolTip; - if (word.equals(FONT_OPERATOR)) + if (word.equals(OperatorName.SET_FONT_AND_SIZE)) { toolTip = new FontToolTip(resources, rowText); return toolTip.getToolTipText(); } - else if (word.equals(STROKING_COLOR)) + else if (word.equals(OperatorName.STROKING_COLOR_N)) { - String colorSpaceName = findColorSpace(offset, STROKING_COLOR_SPACE); + String colorSpaceName = findColorSpace(offset, OperatorName.STROKING_COLORSPACE); if (colorSpaceName != null) { toolTip = new SCNToolTip(resources, colorSpaceName, rowText); return toolTip.getToolTipText(); } } - else if (word.equals(NON_STROKING_COLOR)) + else if (word.equals(OperatorName.NON_STROKING_COLOR_N)) { - String colorSpaceName = findColorSpace(offset, NON_STROKING_COLOR_SPACE); + String colorSpaceName = findColorSpace(offset, OperatorName.NON_STROKING_COLORSPACE); if (colorSpaceName != null) { toolTip = new SCNToolTip(resources, colorSpaceName, rowText); return toolTip.getToolTipText(); } } - else if (word.equals(RGB_STROKING_COLOR) || word.equals(RGB_NON_STROKING_COLOR)) + else if (word.equals(OperatorName.STROKING_COLOR_RGB) || word.equals(OperatorName.NON_STROKING_RGB)) { toolTip = new RGToolTip(rowText); return toolTip.getToolTipText(); } - else if (word.equals(CMYK_STROKING_COLOR) || word.equals(CMYK_NON_STROKING_COLOR)) + else if (word.equals(OperatorName.STROKING_COLOR_CMYK) || word.equals(OperatorName.NON_STROKING_CMYK)) { toolTip = new KToolTip(rowText); return toolTip.getToolTipText(); } - else if (word.equals(GRAY_STROKING_COLOR) || word.equals(GRAY_NON_STROKING_COLOR)) + else if (word.equals(OperatorName.STROKING_COLOR_GRAY) || word.equals(OperatorName.NON_STROKING_GRAY)) { toolTip = new GToolTip(rowText); return toolTip.getToolTipText(); @@ -152,8 +145,7 @@ private String findColorSpace(int offset, String colorSpaceType) } catch (BadLocationException e) { - e.printStackTrace(); - return null; + LOG.error(e, e); } return null; } @@ -174,7 +166,7 @@ private String getWord(int offset) } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e, e); } return null; } @@ -189,7 +181,7 @@ private String getRowText(int offset) } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e, e); } return null; } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/stringpane/StringPane.java b/debugger/src/main/java/org/apache/pdfbox/debugger/stringpane/StringPane.java index 151e5e591e2..2a7fcd5ded6 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/stringpane/StringPane.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/stringpane/StringPane.java @@ -62,7 +62,7 @@ private String getTextString(COSString cosString) String text = cosString.getString(); for (char c : text.toCharArray()) { - if (Character.isISOControl(c)) + if (Character.isISOControl(c) && c != '\n' && c != '\r' && c != '\t') { text = "<" + cosString.toHexString() + ">"; break; diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/treestatus/TreeStatus.java b/debugger/src/main/java/org/apache/pdfbox/debugger/treestatus/TreeStatus.java index 965e196e212..8c5e9d9d62a 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/treestatus/TreeStatus.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/treestatus/TreeStatus.java @@ -173,7 +173,7 @@ private List parsePathString(String path) * An object is searched in the tree structure using the identifiers parsed earlier step. * @param obj * @param searchStr - * @return + * @return the Object found or null */ private Object searchNode(Object obj, String searchStr) { diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/DebugLog.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/DebugLog.java new file mode 100644 index 00000000000..14871e1bb9e --- /dev/null +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/DebugLog.java @@ -0,0 +1,164 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.debugger.ui; + +import org.apache.commons.logging.Log; + +/** + * Custom Log implementation which forwards to LogDialog. + * + * @author John Hewson + */ +public class DebugLog implements Log +{ + private final String name; + + // hardcoded, but kept to aid with debugging custom builds + private static final boolean INFO = true; + private static final boolean TRACE = false; + private static final boolean DEBUG = false; + + public DebugLog(String name) + { + this.name = name; + } + + @Override + public void debug(Object o) + { + if (DEBUG) + { + LogDialog.instance().log(name, "debug", o, null); + } + } + + @Override + public void debug(Object o, Throwable throwable) + { + if (DEBUG) + { + LogDialog.instance().log(name, "debug", o, throwable); + } + } + + @Override + public void error(Object o) + { + LogDialog.instance().log(name, "error", o, null); + } + + @Override + public void error(Object o, Throwable throwable) + { + LogDialog.instance().log(name, "error", o, throwable); + } + + @Override + public void fatal(Object o) + { + LogDialog.instance().log(name, "fatal", o, null); + } + + @Override + public void fatal(Object o, Throwable throwable) + { + LogDialog.instance().log(name, "fatal", o, throwable); + } + + @Override + public void info(Object o) + { + if (INFO) + { + LogDialog.instance().log(name, "info", o, null); + } + } + + @Override + public void info(Object o, Throwable throwable) + { + if (INFO) + { + LogDialog.instance().log(name, "info", o, throwable); + } + } + + @Override + public boolean isDebugEnabled() + { + return DEBUG; + } + + @Override + public boolean isErrorEnabled() + { + return true; + } + + @Override + public boolean isFatalEnabled() + { + return true; + } + + @Override + public boolean isInfoEnabled() + { + return INFO; + } + + @Override + public boolean isTraceEnabled() + { + return TRACE; + } + + @Override + public boolean isWarnEnabled() + { + return true; + } + + @Override + public void trace(Object o) + { + if (TRACE) + { + LogDialog.instance().log(name, "trace", o, null); + } + } + + @Override + public void trace(Object o, Throwable throwable) + { + if (TRACE) + { + LogDialog.instance().log(name, "trace", o, throwable); + } + } + + @Override + public void warn(Object o) + { + LogDialog.instance().log(name, "warn", o, null); + } + + @Override + public void warn(Object o, Throwable throwable) + { + LogDialog.instance().log(name, "warn", o, throwable); + } +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/DocumentEntry.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/DocumentEntry.java index 78da1b6efc2..8dd08cc49eb 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/DocumentEntry.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/DocumentEntry.java @@ -19,6 +19,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.debugger.PDFDebugger; /** * Represents an abstract view of a document in the tree view. @@ -44,7 +45,8 @@ public int getPageCount() public PageEntry getPage(int index) { PDPage page = doc.getPages().get(index); - return new PageEntry(page.getCOSObject(), index + 1); + String pageLabel = PDFDebugger.getPageLabel(doc, index); + return new PageEntry(page.getCOSObject(), index + 1, pageLabel); } public int indexOf(PageEntry page) diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ErrorDialog.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ErrorDialog.java index 9ba34a31d61..f19b1387328 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ErrorDialog.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ErrorDialog.java @@ -39,6 +39,8 @@ import javax.swing.JScrollPane; import javax.swing.JTextPane; import javax.swing.KeyStroke; +import javax.swing.ScrollPaneConstants; +import javax.swing.WindowConstants; /** * A dialog to display a runtime exception stack trace. @@ -50,7 +52,7 @@ * package. * */ -@SuppressWarnings("serial") +@SuppressWarnings({"serial","squid:MaximumInheritanceDepth"}) public class ErrorDialog extends JDialog { private static final List FILTERS = Arrays.asList( @@ -110,7 +112,7 @@ public ErrorDialog(JComponent owner, Icon icon, Throwable t) { setIconImage(((ImageIcon) icon).getImage()); } - setDefaultCloseOperation(JDialog.DISPOSE_ON_CLOSE); + setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); error = t; message = createErrorMessage(error); main = createContent(); @@ -148,8 +150,6 @@ static void position(Component c, Component parent) /** * Creates the display with the top-level exception message followed by a pane (that toggles) * for detailed stack traces. - * - * @param t a non-null exception */ final JComponent createContent() { @@ -169,7 +169,7 @@ public void actionPerformed(ActionEvent e) { if (details == null) { - details = createDetailedMessage(error); + details = createDetailedMessage(); StringBuilder buffer = new StringBuilder(); stacktrace.setText(generateStackTrace(error, buffer).toString()); stacktrace.setCaretPosition(0); @@ -249,14 +249,13 @@ final JComponent createErrorMessage(Throwable t) /** * Creates a non-editable widget to display the detailed stack trace. */ - JScrollPane createDetailedMessage(Throwable t) + JScrollPane createDetailedMessage() { stacktrace = new JTextPane(); stacktrace.setEditable(false); - JScrollPane pane = new JScrollPane(stacktrace, - JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED, - JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED); - return pane; + return new JScrollPane(stacktrace, + ScrollPaneConstants.VERTICAL_SCROLLBAR_AS_NEEDED, + ScrollPaneConstants.HORIZONTAL_SCROLLBAR_AS_NEEDED); } /** diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/FileOpenSaveDialog.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/FileOpenSaveDialog.java index 5f9a49e5a9c..b5f886bae5a 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/FileOpenSaveDialog.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/FileOpenSaveDialog.java @@ -64,6 +64,7 @@ public void approveSelection() public FileOpenSaveDialog(Component parentUI, FileFilter fileFilter) { mainUI = parentUI; + fileChooser.resetChoosableFileFilters(); fileChooser.setFileFilter(fileFilter); } @@ -74,16 +75,21 @@ public FileOpenSaveDialog(Component parentUI, FileFilter fileFilter) * @return true if the file is saved successfully or false if failed. * @throws IOException if there is an error in creation of the file. */ - public boolean saveFile(byte[] bytes) throws IOException + public boolean saveFile(byte[] bytes, String extension) throws IOException { int result = fileChooser.showSaveDialog(mainUI); if (result == JFileChooser.APPROVE_OPTION) { - File selectedFile = fileChooser.getSelectedFile(); + String filename = fileChooser.getSelectedFile().getAbsolutePath(); + if (extension != null && !filename.endsWith(extension)) + { + filename += "." + extension; + } + FileOutputStream outputStream = null; try { - outputStream = new FileOutputStream(selectedFile); + outputStream = new FileOutputStream(filename); outputStream.write(bytes); } finally diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/HighResolutionImageIcon.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/HighResolutionImageIcon.java new file mode 100644 index 00000000000..3c96d0845ae --- /dev/null +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/HighResolutionImageIcon.java @@ -0,0 +1,53 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.debugger.ui; + +import java.awt.Component; +import java.awt.Graphics; +import java.awt.Image; +import javax.swing.Icon; + +public class HighResolutionImageIcon implements Icon +{ + private final Image image; + private final int baseWidth; + private final int baseHeight; + + public HighResolutionImageIcon(Image image, int baseWidth, int baseHeight) + { + this.image = image; + this.baseWidth = baseWidth; + this.baseHeight = baseHeight; + } + + @Override + public void paintIcon(Component c, Graphics g, int x, int y) + { + g.drawImage(image, x, y, getIconWidth(), getIconHeight(), null); + } + + @Override + public int getIconWidth() + { + return baseWidth; + } + + @Override + public int getIconHeight() + { + return baseHeight; + } +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ImageTypeMenu.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ImageTypeMenu.java new file mode 100644 index 00000000000..0451ef1680f --- /dev/null +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ImageTypeMenu.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.debugger.ui; + +import javax.swing.ButtonGroup; +import javax.swing.JMenu; +import javax.swing.JRadioButtonMenuItem; + +import org.apache.pdfbox.rendering.ImageType; + +/** + * @author Tilman Hausherr + * + * A singleton class that provides the imagetype menu for the menubar. To act upon the menu item + * selection, the user of the class must add ActionListener which will check for the action command + * and act accordingly. + */ +public final class ImageTypeMenu extends MenuBase +{ + public static final String IMAGETYPE_RGB = "RGB"; + public static final String IMAGETYPE_ARGB = "ARGB"; + public static final String IMAGETYPE_GRAY = "Gray"; + public static final String IMAGETYPE_BITONAL = "Bitonal"; + + private static ImageTypeMenu instance; + private JRadioButtonMenuItem rgbItem; + private JRadioButtonMenuItem argbItem; + private JRadioButtonMenuItem grayItem; + private JRadioButtonMenuItem bitonalItem; + + /** + * Constructor. + */ + private ImageTypeMenu() + { + setMenu(createMenu()); + } + + /** + * Provides the ImageTypeMenu instance. + * @return ImageTypeMenu instance. + */ + public static ImageTypeMenu getInstance() + { + if (instance == null) + { + instance = new ImageTypeMenu(); + } + return instance; + } + + /** + * Set the image type selection. + * @param selection String instance. + */ + public void setImageTypeSelection(String selection) + { + if (IMAGETYPE_RGB.equals(selection)) + { + rgbItem.setSelected(true); + } + else if (IMAGETYPE_ARGB.equals(selection)) + { + argbItem.setSelected(true); + } + else if (IMAGETYPE_GRAY.equals(selection)) + { + grayItem.setSelected(true); + } + else if (IMAGETYPE_BITONAL.equals(selection)) + { + bitonalItem.setSelected(true); + } + else + { + throw new IllegalArgumentException(); + } + } + + public static boolean isImageTypeMenu(String actionCommand) + { + return IMAGETYPE_RGB.equals(actionCommand) || IMAGETYPE_ARGB.equals(actionCommand) || + IMAGETYPE_GRAY.equals(actionCommand) || IMAGETYPE_BITONAL.equals(actionCommand); + } + + public static ImageType getImageType() + { + if (instance.argbItem.isSelected()) + { + return ImageType.ARGB; + } + if (instance.grayItem.isSelected()) + { + return ImageType.GRAY; + } + if (instance.bitonalItem.isSelected()) + { + return ImageType.BINARY; + } + return ImageType.RGB; + } + + public static ImageType getImageType(String actionCommand) + { + if (IMAGETYPE_RGB.equals(actionCommand)) + { + return ImageType.RGB; + } + else if (IMAGETYPE_ARGB.equals(actionCommand)) + { + return ImageType.ARGB; + } + else if (IMAGETYPE_GRAY.equals(actionCommand)) + { + return ImageType.GRAY; + } + else if (IMAGETYPE_BITONAL.equals(actionCommand)) + { + return ImageType.BINARY; + } + else + { + throw new IllegalArgumentException(); + } + } + + private JMenu createMenu() + { + JMenu menu = new JMenu(); + menu.setText("Image type"); + + rgbItem = new JRadioButtonMenuItem(); + argbItem = new JRadioButtonMenuItem(); + grayItem = new JRadioButtonMenuItem(); + bitonalItem = new JRadioButtonMenuItem(); + rgbItem.setSelected(true); + + ButtonGroup bg = new ButtonGroup(); + bg.add(rgbItem); + bg.add(argbItem); + bg.add(grayItem); + bg.add(bitonalItem); + + rgbItem.setText(IMAGETYPE_RGB); + argbItem.setText(IMAGETYPE_ARGB); + grayItem.setText(IMAGETYPE_GRAY); + bitonalItem.setText(IMAGETYPE_BITONAL); + + menu.add(rgbItem); + menu.add(argbItem); + menu.add(grayItem); + menu.add(bitonalItem); + + return menu; + } +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ImageUtil.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ImageUtil.java index 1c0312b08b9..51dcf4bf93e 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ImageUtil.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ImageUtil.java @@ -36,30 +36,34 @@ private ImageUtil() * @param image The image to rotate. * @param rotation The rotation in degrees. * @return The rotated image. + * @throws IllegalArgumentException if the angle isn't a multiple of 90°. */ public static BufferedImage getRotatedImage(BufferedImage image, int rotation) { int width = image.getWidth(); int height = image.getHeight(); - double x = 0, y = 0; + int x = 0; + int y = 0; BufferedImage rotatedImage; - switch (rotation % 360) + switch ((rotation + 360) % 360) { + case 0: + return image; case 90: x = height; - rotatedImage = new BufferedImage(height, width, BufferedImage.TYPE_INT_RGB); + rotatedImage = new BufferedImage(height, width, image.getType()); break; case 270: y = width; - rotatedImage = new BufferedImage(height, width, BufferedImage.TYPE_INT_RGB); + rotatedImage = new BufferedImage(height, width, image.getType()); break; case 180: x = width; y = height; - rotatedImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); + rotatedImage = new BufferedImage(width, height, image.getType()); break; default: - return image; + throw new IllegalArgumentException("Only multiple of 90° are supported"); } Graphics2D g = (Graphics2D) rotatedImage.getGraphics(); g.translate(x, y); diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/LogDialog.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/LogDialog.java new file mode 100644 index 00000000000..74f9610b41b --- /dev/null +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/LogDialog.java @@ -0,0 +1,223 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.debugger.ui; + +import java.awt.Color; +import java.awt.Container; +import java.awt.Frame; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.List; +import javax.swing.JDialog; +import javax.swing.JLabel; +import javax.swing.JScrollPane; +import javax.swing.JTextPane; +import javax.swing.text.BadLocationException; +import javax.swing.text.SimpleAttributeSet; +import javax.swing.text.StyleConstants; +import javax.swing.text.StyledDocument; + +/** + * Custom log dialog. + * + * @author John Hewson + */ +@SuppressWarnings({"serial","squid:MaximumInheritanceDepth"}) +public class LogDialog extends JDialog +{ + private static LogDialog instance; + private final JLabel logLabel; + private final JTextPane textPane; + private final JScrollPane scrollPane; + private int fatalCount = 0; + private int errorCount = 0; + private int warnCount = 0; + private int otherCount = 0; + private int exceptionCount = 0; + + private LogDialog(Frame owner, JLabel logLabel) + { + super(owner); + this.logLabel = logLabel; + + textPane = new JTextPane(); + scrollPane = new JScrollPane(textPane); + getContentPane().add(scrollPane); + + this.pack(); + } + + public static void init(Frame owner, JLabel logLabel) + { + instance = new LogDialog(owner, logLabel); + } + + public static LogDialog instance() + { + return instance; + } + + public void log(String name, String level, Object o, Throwable throwable) + { + StyledDocument doc = textPane.getStyledDocument(); + + String levelText; + SimpleAttributeSet levelStyle = new SimpleAttributeSet(); + if ("fatal".equals(level)) + { + levelText = "Fatal"; + StyleConstants.setForeground(levelStyle, Color.WHITE); + StyleConstants.setBackground(levelStyle, Color.BLACK); + fatalCount++; + } + else if ("error".equals(level)) + { + levelText = "Error"; + StyleConstants.setForeground(levelStyle, new Color(0xFF291F)); + StyleConstants.setBackground(levelStyle, new Color(0xFFF0F0)); + errorCount++; + } + else if ("warn".equals(level)) + { + levelText = "Warning"; + StyleConstants.setForeground(levelStyle, new Color(0x614201)); + StyleConstants.setBackground(levelStyle, new Color(0xFFFCE5)); + warnCount++; + } + else if ("info".equals(level)) + { + levelText = "Info"; + StyleConstants.setForeground(levelStyle, new Color(0x203261)); + StyleConstants.setBackground(levelStyle, new Color(0xE2E8FF)); + otherCount++; + } + else if ("debug".equals(level)) + { + levelText = "Debug"; + StyleConstants.setForeground(levelStyle, new Color(0x32612E)); + StyleConstants.setBackground(levelStyle, new Color(0xF4FFEC)); + otherCount++; + } + else if ("trace".equals(level)) + { + levelText = "Trace"; + StyleConstants.setForeground(levelStyle, new Color(0x64438D)); + StyleConstants.setBackground(levelStyle, new Color(0xFEF3FF)); + otherCount++; + } + else + { + throw new Error(level); + } + + SimpleAttributeSet nameStyle = new SimpleAttributeSet(); + StyleConstants.setForeground(nameStyle, new Color(0x6A6A6A)); + + String shortName = name.substring(name.lastIndexOf('.') + 1); + String message = o == null ? "(null)" : o.toString(); + + if (throwable != null) + { + StringWriter sw = new StringWriter(); + throwable.printStackTrace(new PrintWriter(sw)); + message += "\n " + sw.toString(); + exceptionCount++; + } + + try + { + doc.insertString(doc.getLength(), " " + levelText + " ", levelStyle); + doc.insertString(doc.getLength(), " [" + shortName + "]", nameStyle); + doc.insertString(doc.getLength(), " " + message + "\n", null); + } + catch (BadLocationException e) + { + throw new Error(e); + } + textPane.setCaretPosition(doc.getLength()); + + // update status bar with new counts + updateStatusBar(); + } + + private void updateStatusBar() + { + List infos = new ArrayList(); + + if (exceptionCount > 0) + { + infos.add(exceptionCount + " exception" + (errorCount > 1 ? "s" : "")); + } + + if (fatalCount > 0) + { + infos.add(errorCount + " error" + (errorCount > 1 ? "s" : "")); + } + + if (errorCount > 0) + { + infos.add(errorCount + " error" + (errorCount > 1 ? "s" : "")); + } + + if (warnCount > 0) + { + infos.add(warnCount + " warning" + (warnCount > 1 ? "s" : "")); + } + + if (otherCount > 0) + { + infos.add(otherCount + " message" + (otherCount > 1 ? "s" : "")); + } + + String info = ""; + for (String str : infos) + { + if (info.length() > 0) + { + info += ", "; + } + info += str; + } + + logLabel.setText(info); + } + + public void clear() + { + fatalCount = 0; + errorCount = 0; + warnCount = 0; + otherCount = 0; + exceptionCount = 0; + textPane.setText(""); + logLabel.setText(""); + } + + // these two just to avoid the "overridable method call in constructor" warning + + @Override + public final Container getContentPane() + { + return super.getContentPane(); + } + + @Override + public final void pack() + { + super.pack(); + } +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/MenuBase.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/MenuBase.java index fe3fff7de11..7667628b2c0 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/MenuBase.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/MenuBase.java @@ -57,7 +57,7 @@ public void setEnableMenu(boolean isEnable) } /** - * Add the ActionListener for the menuitems. + * Add the ActionListener for the menu items. * * @param listener ActionListener. */ diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/OSXAdapter.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/OSXAdapter.java index 8736fb9f92c..b2e4c3d22a7 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/OSXAdapter.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/OSXAdapter.java @@ -18,7 +18,7 @@ /* * This file includes code under the following terms: - * + * * Version: 2.0 * * Disclaimer: IMPORTANT: This Apple software is supplied to you by @@ -64,10 +64,14 @@ package org.apache.pdfbox.debugger.ui; +import java.awt.Desktop; +import java.io.File; import java.lang.reflect.InvocationHandler; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.lang.reflect.Proxy; +import java.util.StringTokenizer; +import java.util.List; /** * Hooks existing preferences/about/quit functionality from an @@ -87,10 +91,63 @@ public class OSXAdapter implements InvocationHandler protected String proxySignature; static Object macOSXApplication; + + private static boolean isMinJdk9() + { + // strategy from lucene-solr/lucene/core/src/java/org/apache/lucene/util/Constants.java + String version = System.getProperty("java.specification.version"); + final StringTokenizer st = new StringTokenizer(version, "."); + try + { + int major = Integer.parseInt(st.nextToken()); + int minor = 0; + if (st.hasMoreTokens()) + { + minor = Integer.parseInt(st.nextToken()); + } + return major > 1 || (major == 1 && minor >= 9); + } + catch (NumberFormatException nfe) + { + // maybe some new numbering scheme in the 22nd century + return true; + } + } // Pass this method an Object and Method equipped to perform application shutdown logic // The method passed should return a boolean stating whether or not the quit should occur - public static void setQuitHandler(Object target, Method quitHandler) { + public static void setQuitHandler(final Object target, final Method quitHandler) + { + if (isMinJdk9()) + { + try + { + Desktop desktopObject = Desktop.getDesktop(); + Class filesHandlerClass = Class.forName("java.awt.desktop.QuitHandler"); + final Method setQuitHandlerMethod = desktopObject.getClass().getMethod("setQuitHandler", filesHandlerClass); + Object osxAdapterProxy = Proxy.newProxyInstance(OSXAdapter.class.getClassLoader(), + new Class[] { filesHandlerClass }, new InvocationHandler() + { + @Override + public Object invoke(Object proxy, Method method, Object[] args) + throws Throwable + { + if ("handleQuitRequestWith".equals(method.getName())) + { + // We just call our own quit handler + quitHandler.invoke(target); + } + return null; + } + }); + setQuitHandlerMethod.invoke(desktopObject, osxAdapterProxy); + } + catch (Exception e) + { + e.printStackTrace(); + } + return; + } setHandler(new OSXAdapter("handleQuit", target, quitHandler)); } @@ -104,8 +161,8 @@ public static void setAboutHandler(Object target, Method aboutHandler) { // If we're setting a handler, enable the About menu item by calling // com.apple.eawt.Application reflectively try { - Method enableAboutMethod = macOSXApplication.getClass().getDeclaredMethod("setEnabledAboutMenu", new Class[] { boolean.class }); - enableAboutMethod.invoke(macOSXApplication, new Object[] { Boolean.valueOf(enableAboutMenu) }); + Method enableAboutMethod = macOSXApplication.getClass().getDeclaredMethod("setEnabledAboutMenu", boolean.class); + enableAboutMethod.invoke(macOSXApplication, Boolean.valueOf(enableAboutMenu)); } catch (Exception ex) { System.err.println("OSXAdapter could not access the About Menu"); throw new RuntimeException(ex); @@ -122,8 +179,8 @@ public static void setPreferencesHandler(Object target, Method prefsHandler) { // If we're setting a handler, enable the Preferences menu item by calling // com.apple.eawt.Application reflectively try { - Method enablePrefsMethod = macOSXApplication.getClass().getDeclaredMethod("setEnabledPreferencesMenu", new Class[] { boolean.class }); - enablePrefsMethod.invoke(macOSXApplication, new Object[] { Boolean.valueOf(enablePrefsMenu) }); + Method enablePrefsMethod = macOSXApplication.getClass().getDeclaredMethod("setEnabledPreferencesMenu", boolean.class); + enablePrefsMethod.invoke(macOSXApplication, enablePrefsMenu); } catch (Exception ex) { System.err.println("OSXAdapter could not access the About Menu"); throw new RuntimeException(ex); @@ -133,16 +190,65 @@ public static void setPreferencesHandler(Object target, Method prefsHandler) { // Pass this method an Object and a Method equipped to handle document events from the Finder // Documents are registered with the Finder via the CFBundleDocumentTypes dictionary in the // application bundle's Info.plist - public static void setFileHandler(Object target, Method fileHandler) { - setHandler(new OSXAdapter("handleOpenFile", target, fileHandler) { + public static void setFileHandler(Object target, Method fileHandler) + { + if (isMinJdk9()) + { + try + { + Desktop desktopObject = Desktop.getDesktop(); + Class filesHandlerClass = Class.forName("java.awt.desktop.OpenFilesHandler"); + Method setOpenFileHandlerMethod = desktopObject.getClass().getMethod("setOpenFileHandler", filesHandlerClass); + Object osxAdapterProxy = Proxy.newProxyInstance(OSXAdapter.class.getClassLoader(), + new Class[] + { + filesHandlerClass + }, new OSXAdapter("openFiles", target, fileHandler) + { + // Override OSXAdapter.callTarget to send information on the + // file to be opened + @Override + public boolean callTarget(Object openFilesEvent) + { + if (openFilesEvent != null) + { + try + { + Method getFilesMethod = openFilesEvent.getClass().getDeclaredMethod("getFiles", + (Class[]) null); + @SuppressWarnings("unchecked") + List files = (List) getFilesMethod.invoke(openFilesEvent, + (Object[]) null); + this.targetMethod.invoke(this.targetObject, files.get(0).getAbsolutePath()); + } + catch (Exception ex) + { + throw new RuntimeException(ex); + } + } + return true; + } + }); + setOpenFileHandlerMethod.invoke(desktopObject, osxAdapterProxy); + } + catch (Exception e) + { + e.printStackTrace(); + } + return; + } + /* JDK <= 1.8, using Apple classes */ + setHandler(new OSXAdapter("handleOpenFile", target, fileHandler) + { // Override OSXAdapter.callTarget to send information on the // file to be opened + @Override public boolean callTarget(Object appleEvent) { if (appleEvent != null) { try { Method getFilenameMethod = appleEvent.getClass().getDeclaredMethod("getFilename", (Class[])null); String filename = (String) getFilenameMethod.invoke(appleEvent, (Object[])null); - this.targetMethod.invoke(this.targetObject, new Object[] { filename }); + this.targetMethod.invoke(this.targetObject, filename); } catch (Exception ex) { throw new RuntimeException(ex); } @@ -157,13 +263,13 @@ public static void setHandler(OSXAdapter adapter) { try { Class applicationClass = Class.forName("com.apple.eawt.Application"); if (macOSXApplication == null) { - macOSXApplication = applicationClass.getConstructor((Class[])null).newInstance((Object[])null); + macOSXApplication = applicationClass.getDeclaredConstructor((Class[])null).newInstance((Object[])null); } Class applicationListenerClass = Class.forName("com.apple.eawt.ApplicationListener"); - Method addListenerMethod = applicationClass.getDeclaredMethod("addApplicationListener", new Class[] { applicationListenerClass }); + Method addListenerMethod = applicationClass.getDeclaredMethod("addApplicationListener", applicationListenerClass); // Create a proxy object around this handler that can be reflectively added as an Apple ApplicationListener Object osxAdapterProxy = Proxy.newProxyInstance(OSXAdapter.class.getClassLoader(), new Class[]{applicationListenerClass}, adapter); - addListenerMethod.invoke(macOSXApplication, new Object[] { osxAdapterProxy }); + addListenerMethod.invoke(macOSXApplication, osxAdapterProxy); } catch (ClassNotFoundException cnfe) { System.err.println("This version of Mac OS X does not support the Apple EAWT. ApplicationEvent handling has been disabled (" + cnfe + ")"); } catch (Exception ex) { // Likely a NoSuchMethodException or an IllegalAccessException loading/invoking eawt.Application methods @@ -188,7 +294,7 @@ public boolean callTarget(Object appleEvent) throws InvocationTargetException, I if (result == null) { return true; } - return Boolean.valueOf(result.toString()).booleanValue(); + return Boolean.valueOf(result.toString()); } // InvocationHandler implementation @@ -213,9 +319,9 @@ protected boolean isCorrectMethod(Method method, Object[] args) { protected void setApplicationEventHandled(Object event, boolean handled) { if (event != null) { try { - Method setHandledMethod = event.getClass().getDeclaredMethod("setHandled", new Class[] { boolean.class }); + Method setHandledMethod = event.getClass().getDeclaredMethod("setHandled", boolean.class); // If the target method returns a boolean, use that as a hint - setHandledMethod.invoke(event, new Object[] { Boolean.valueOf(handled) }); + setHandledMethod.invoke(event, Boolean.valueOf(handled)); } catch (Exception ex) { System.err.println("OSXAdapter was unable to handle an ApplicationEvent: " + event); throw new RuntimeException(ex); diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PDFTreeCellRenderer.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PDFTreeCellRenderer.java index 3dd0ff29312..6ecad49196c 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PDFTreeCellRenderer.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PDFTreeCellRenderer.java @@ -102,7 +102,7 @@ private Object toTreeObject(Object nodeValue) else { ArrayEntry entry = (ArrayEntry) nodeValue; - key = "" + entry.getIndex(); + key = Integer.toString(entry.getIndex()); object = toTreeObject(entry.getValue()); value = entry.getValue(); item = entry.getItem(); @@ -125,15 +125,15 @@ private Object toTreeObject(Object nodeValue) } else if (nodeValue instanceof COSBoolean) { - result = "" + ((COSBoolean) nodeValue).getValue(); + result = Boolean.toString(((COSBoolean) nodeValue).getValue()); } else if (nodeValue instanceof COSFloat) { - result = "" + ((COSFloat) nodeValue).floatValue(); + result = Float.toString(((COSFloat) nodeValue).floatValue()); } else if (nodeValue instanceof COSInteger) { - result = "" + ((COSInteger) nodeValue).intValue(); + result = Integer.toString(((COSInteger) nodeValue).intValue()); } else if (nodeValue instanceof COSString) { @@ -188,16 +188,36 @@ private String toTreePostfix(Object nodeValue) StringBuilder sb = new StringBuilder(); COSDictionary dict = (COSDictionary)nodeValue; + + if (COSName.ANNOT.equals(dict.getCOSName(COSName.TYPE)) + && COSName.WIDGET.equals(dict.getCOSName(COSName.SUBTYPE)) || + dict.containsKey(COSName.T) && dict.containsKey(COSName.KIDS)) + { + String name = dict.getString(COSName.T); + if (name != null) + { + sb.append(" Name: "); + sb.append(name); + sb.append(' '); + } + } + if (dict.containsKey(COSName.TYPE)) { COSName type = dict.getCOSName(COSName.TYPE); - sb.append(" /T:").append(type.getName()); + if (type != null) + { + sb.append(" /T:").append(type.getName()); + } } - + if (dict.containsKey(COSName.SUBTYPE)) { COSName subtype = dict.getCOSName(COSName.SUBTYPE); - sb.append(" /S:").append(subtype.getName()); + if (subtype != null) + { + sb.append(" /S:").append(subtype.getName()); + } } return sb.toString(); } @@ -315,7 +335,7 @@ else if (nodeValue instanceof PageEntry) /** * An ImageIcon which allows other ImageIcon overlays. */ - private class OverlayIcon extends ImageIcon + private static class OverlayIcon extends ImageIcon { private final ImageIcon base; private final List overlays; diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PDFTreeModel.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PDFTreeModel.java index 7a577373ae8..2230ed0b393 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PDFTreeModel.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PDFTreeModel.java @@ -92,7 +92,7 @@ public void addTreeModelListener(TreeModelListener l) * Returns the child of parent at index index in the parent's child * array. parent must be a node previously obtained from this data source. This * should not return null if index is a valid index for - * parent (that is index >= 0 && + * parent (that is index >= 0 && * index < getChildCount(parent)). * * @param parent a node in the tree, obtained from this data source @@ -306,7 +306,7 @@ public Object getRoot() @Override public boolean isLeaf(Object node) { - boolean isLeaf = !(node instanceof COSDictionary || + return !(node instanceof COSDictionary || node instanceof COSArray || node instanceof COSDocument || node instanceof DocumentEntry || @@ -314,7 +314,6 @@ public boolean isLeaf(Object node) node instanceof COSObject || (node instanceof MapEntry && !isLeaf(((MapEntry)node).getValue()) ) || (node instanceof ArrayEntry && !isLeaf(((ArrayEntry)node).getValue()) )); - return isLeaf; } /** Removes a listener previously added with diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PageEntry.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PageEntry.java index 8c7ef56c27c..2011be41b3e 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PageEntry.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/PageEntry.java @@ -18,6 +18,7 @@ package org.apache.pdfbox.debugger.ui; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; @@ -30,11 +31,13 @@ public class PageEntry { private final COSDictionary dict; private final int pageNum; - - public PageEntry(COSDictionary page, int pageNum) + private final String pageLabel; + + public PageEntry(COSDictionary page, int pageNum, String pageLabel) { dict = page; this.pageNum = pageNum; + this.pageLabel = pageLabel; } public COSDictionary getDict() @@ -50,7 +53,7 @@ public int getPageNum() @Override public String toString() { - return "Page: " + pageNum; + return "Page: " + pageNum + (pageLabel == null ? "" : " - " + pageLabel); } public String getPath() @@ -61,8 +64,18 @@ public String getPath() COSDictionary node = dict; while (node.containsKey(COSName.PARENT)) { - COSDictionary parent = (COSDictionary)node.getDictionaryObject(COSName.PARENT); - COSArray kids = (COSArray)parent.getDictionaryObject(COSName.KIDS); + COSBase base = node.getDictionaryObject(COSName.PARENT); + if (!(base instanceof COSDictionary)) + { + return ""; + } + COSDictionary parent = (COSDictionary) base; + base = parent.getDictionaryObject(COSName.KIDS); + if (!(base instanceof COSArray)) + { + return ""; + } + COSArray kids = (COSArray) base; int idx = kids.indexOfObject(node); sb.append("/Kids/[").append(idx).append("]"); node = parent; diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ReaderBottomPanel.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ReaderBottomPanel.java index d5f6f774d67..80f13ac6b46 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ReaderBottomPanel.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ReaderBottomPanel.java @@ -16,12 +16,16 @@ */ package org.apache.pdfbox.debugger.ui; +import java.awt.BorderLayout; +import java.awt.Cursor; import java.awt.Dimension; - +import java.awt.Window; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import javax.swing.JLabel; import javax.swing.JPanel; +import javax.swing.border.EmptyBorder; -import javax.swing.JLabel; -import java.awt.FlowLayout; /** * A panel to display at the bottom of the window for status and other stuff. * @@ -29,31 +33,47 @@ */ public class ReaderBottomPanel extends JPanel { - private JLabel statusLabel = null; - - /** - * This is the default constructor. - */ + private JLabel logLabel = null; + public ReaderBottomPanel() { - FlowLayout flowLayout = new FlowLayout(); - this.setLayout(flowLayout); - this.setComponentOrientation(java.awt.ComponentOrientation.LEFT_TO_RIGHT); - this.setPreferredSize(new Dimension(1000, 20)); - flowLayout.setAlignment(FlowLayout.LEFT); + BorderLayout layout = new BorderLayout(); + this.setLayout(layout); + statusLabel = new JLabel(); statusLabel.setText("Ready"); - this.add(statusLabel, null); - } + this.add(statusLabel, BorderLayout.WEST); - /** - * Return the status label. - * - * @return JLabel The status label. - */ + logLabel = new JLabel(); + logLabel.setCursor(new Cursor(Cursor.HAND_CURSOR)); + logLabel.addMouseListener(new MouseAdapter() + { + @Override + public void mouseClicked(MouseEvent e) + { + Window viewer = LogDialog.instance().getOwner(); + + // show the log window + LogDialog.instance().setSize(800, 400); + LogDialog.instance().setVisible(true); + LogDialog.instance().setLocation(viewer.getLocationOnScreen().x + viewer.getWidth() / 2, + viewer.getLocationOnScreen().y + viewer.getHeight() / 2); + } + }); + this.add(logLabel, BorderLayout.EAST); + + this.setBorder(new EmptyBorder(0, 5, 0, 5)); + this.setPreferredSize(new Dimension(1000, 24)); + } + public JLabel getStatusLabel() { return statusLabel; } + + public JLabel getLogLabel() + { + return logLabel; + } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/RecentFiles.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/RecentFiles.java index 563f7bb431c..69822afa895 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/RecentFiles.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/RecentFiles.java @@ -160,7 +160,7 @@ private String[] breakString(String fullPath) private void writeHistoryToPref(Queue filePaths) { - if (filePaths.size() == 0) + if (filePaths.isEmpty()) { return; } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/RenderDestinationMenu.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/RenderDestinationMenu.java new file mode 100644 index 00000000000..48e0c585e7e --- /dev/null +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/RenderDestinationMenu.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.debugger.ui; + +import javax.swing.ButtonGroup; +import javax.swing.JMenu; +import javax.swing.JRadioButtonMenuItem; + +import org.apache.pdfbox.rendering.RenderDestination; + +/** + * @author Tilman Hausherr + * + * A singleton class that provides the RenderDestination menu for the menubar. To act upon the menu + * item selection, the user of the class must add ActionListener which will check for the action + * command and act accordingly. + */ +public final class RenderDestinationMenu extends MenuBase +{ + public static final String RENDER_DESTINATION_EXPORT = "Export"; + public static final String RENDER_DESTINATION_PRINT = "Print"; + public static final String RENDER_DESTINATION_VIEW = "View"; + + private static RenderDestinationMenu instance; + private JRadioButtonMenuItem exportItem; + private JRadioButtonMenuItem printItem; + private JRadioButtonMenuItem viewItem; + + /** + * Constructor. + */ + private RenderDestinationMenu() + { + setMenu(createMenu()); + } + + /** + * Provides the RenderDestination instance. + * @return RenderDestination instance. + */ + public static RenderDestinationMenu getInstance() + { + if (instance == null) + { + instance = new RenderDestinationMenu(); + } + return instance; + } + + /** + * Set the render destination selection. + * @param selection String instance. + */ + public void setRenderDestinationSelection(String selection) + { + if (RENDER_DESTINATION_EXPORT.equals(selection)) + { + exportItem.setSelected(true); + } + else if (RENDER_DESTINATION_PRINT.equals(selection)) + { + printItem.setSelected(true); + } + else if (RENDER_DESTINATION_VIEW.equals(selection)) + { + viewItem.setSelected(true); + } + else + { + throw new IllegalArgumentException(); + } + } + + public static boolean isRenderDestinationMenu(String actionCommand) + { + return RENDER_DESTINATION_EXPORT.equals(actionCommand) || RENDER_DESTINATION_PRINT.equals(actionCommand) || + RENDER_DESTINATION_VIEW.equals(actionCommand); + } + + public static RenderDestination getRenderDestination() + { + if (instance.printItem.isSelected()) + { + return RenderDestination.PRINT; + } + if (instance.viewItem.isSelected()) + { + return RenderDestination.VIEW; + } + return RenderDestination.EXPORT; + } + + public static RenderDestination getRenderDestination(String actionCommand) + { + if (RENDER_DESTINATION_EXPORT.equals(actionCommand)) + { + return RenderDestination.EXPORT; + } + else if (RENDER_DESTINATION_PRINT.equals(actionCommand)) + { + return RenderDestination.PRINT; + } + else if (RENDER_DESTINATION_VIEW.equals(actionCommand)) + { + return RenderDestination.VIEW; + } + else + { + throw new IllegalArgumentException(); + } + } + + private JMenu createMenu() + { + JMenu menu = new JMenu(); + menu.setText("Render destination"); + + exportItem = new JRadioButtonMenuItem(); + printItem = new JRadioButtonMenuItem(); + viewItem = new JRadioButtonMenuItem(); + exportItem.setSelected(true); + + ButtonGroup bg = new ButtonGroup(); + bg.add(exportItem); + bg.add(printItem); + bg.add(viewItem); + + exportItem.setText(RENDER_DESTINATION_EXPORT); + printItem.setText(RENDER_DESTINATION_PRINT); + viewItem.setText(RENDER_DESTINATION_VIEW); + + menu.add(exportItem); + menu.add(printItem); + menu.add(viewItem); + + return menu; + } +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/Tree.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/Tree.java index 14982202df4..d0bf4e56941 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/Tree.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/Tree.java @@ -18,6 +18,7 @@ package org.apache.pdfbox.debugger.ui; import java.awt.Component; +import java.awt.Desktop; import java.awt.Point; import java.awt.Toolkit; import java.awt.datatransfer.Clipboard; @@ -25,6 +26,8 @@ import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.event.MouseEvent; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -32,6 +35,8 @@ import javax.swing.JMenuItem; import javax.swing.JPopupMenu; import javax.swing.JTree; +import javax.swing.filechooser.FileFilter; +import javax.swing.filechooser.FileNameExtensionFilter; import javax.swing.tree.TreePath; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; @@ -46,10 +51,10 @@ * * A customized tree for PDFDebugger. */ +@SuppressWarnings({"serial","squid:S1948"}) public class Tree extends JTree { private final JPopupMenu treePopupMenu; - private final Component parent; private final Object rootNode; /** @@ -60,8 +65,7 @@ public Tree(Component parentComponent) { treePopupMenu = new JPopupMenu(); setComponentPopupMenu(treePopupMenu); - parent = parentComponent; - rootNode = this.getModel().getRoot(); + rootNode = getModel().getRoot(); } @Override @@ -70,12 +74,13 @@ public Point getPopupLocation(MouseEvent event) if (event != null) { TreePath path = getClosestPathForLocation(event.getX(), event.getY()); - setSelectionPath(path); - treePopupMenu.removeAll(); - for (JMenuItem menuItem : getPopupMenuItems(path)) + if (path == null) { - treePopupMenu.add(menuItem); + return null; } + setSelectionPath(path); + treePopupMenu.removeAll(); + addPopupMenuItems(path); return event.getPoint(); } return null; @@ -86,12 +91,11 @@ public Point getPopupLocation(MouseEvent event) * @param nodePath is instance of TreePath of the specified Node. * @return the JMenuItem list for the node */ - private List getPopupMenuItems(TreePath nodePath) + private void addPopupMenuItems(TreePath nodePath) { Object obj = nodePath.getLastPathComponent(); - List menuItems = new ArrayList(); - menuItems.add(getTreePathMenuItem(nodePath)); + treePopupMenu.add(getTreePathMenuItem(nodePath)); if (obj instanceof MapEntry) { @@ -102,24 +106,34 @@ else if (obj instanceof ArrayEntry) obj = ((ArrayEntry) obj).getValue(); } - if (obj instanceof COSStream) + if (!(obj instanceof COSStream)) { - COSStream stream = (COSStream) obj; - menuItems.add(getUnFilteredStreamSaveMenu(stream)); - if (stream.getFilters() != null) + return; + } + + treePopupMenu.addSeparator(); + + COSStream stream = (COSStream) obj; + treePopupMenu.add(getStreamSaveMenu(stream, nodePath)); + + if (stream.getFilters() != null) + { + if (stream.getFilters() instanceof COSArray && ((COSArray) stream.getFilters()).size() >= 2) { - if (stream.getFilters() instanceof COSArray && ((COSArray) stream.getFilters()).size() >= 2) + for (JMenuItem menuItem : getPartiallyDecodedStreamSaveMenu(stream)) { - for (JMenuItem menuItem : getPartiallyFilteredStreamSaveMenu(stream)) - { - menuItems.add(menuItem); - } + treePopupMenu.add(menuItem); } - menuItems.add(getFilteredStreamSaveMenu(stream)); } + treePopupMenu.add(getRawStreamSaveMenu(stream)); + } + + JMenuItem open = getFileOpenMenu(stream, nodePath); + if (open != null) + { + treePopupMenu.addSeparator(); + treePopupMenu.add(open); } - - return menuItems; } /** @@ -143,13 +157,13 @@ public void actionPerformed(ActionEvent actionEvent) } /** - * Produce JMenuItem that saves filtered stream + * Produce JMenuItem that saves the raw stream * @param cosStream stream to save - * @return JMenuItem for saving filtered stream + * @return JMenuItem for saving the raw stream */ - private JMenuItem getFilteredStreamSaveMenu(final COSStream cosStream) + private JMenuItem getRawStreamSaveMenu(final COSStream cosStream) { - JMenuItem saveMenuItem = new JMenuItem("Save Filtered Stream (" + getFilters(cosStream) + ")..."); + JMenuItem saveMenuItem = new JMenuItem("Save Raw Stream (" + getFilters(cosStream) + ") As..."); saveMenuItem.addActionListener(new ActionListener() { @Override @@ -158,7 +172,7 @@ public void actionPerformed(ActionEvent actionEvent) try { byte[] bytes = IOUtils.toByteArray(cosStream.createRawInputStream()); - saveStream(bytes); + saveStream(bytes, null, null); } catch (IOException e) { @@ -176,36 +190,75 @@ private String getFilters(COSStream cosStream) { StringBuilder sb = new StringBuilder(); COSBase filters = cosStream.getFilters(); - if (filters != null) + if (filters instanceof COSName) { - if (filters instanceof COSName) - { - sb.append(((COSName) filters).getName()); - } - else if (filters instanceof COSArray) + sb.append(((COSName) filters).getName()); + } + else if (filters instanceof COSArray) + { + COSArray filterArray = (COSArray) filters; + for (int i = 0; i < filterArray.size(); i++) { - COSArray filterArray = (COSArray) filters; - for (int i = 0; i < filterArray.size(); i++) + if (i > 0) { - if (i > 0) - { - sb.append(", "); - } - sb.append(((COSName) filterArray.get(i)).getName()); + sb.append(", "); } + sb.append(((COSName) filterArray.get(i)).getName()); } } return sb.toString(); } /** - * Produce JMenuItem that saves unfiltered stream + * Produce JMenuItem that saves the stream * @param cosStream stream to save - * @return JMenuItem for saving unfiltered stream + * @return JMenuItem for saving stream */ - private JMenuItem getUnFilteredStreamSaveMenu(final COSStream cosStream) + private JMenuItem getStreamSaveMenu(final COSStream cosStream, final TreePath nodePath) { - JMenuItem saveMenuItem = new JMenuItem("Save Unfiltered Stream..."); + // set file extension based on stream type + final String extension = getFileExtensionForStream(cosStream, nodePath); + final FileFilter fileFilter; + + if (extension != null) + { + if (extension.equals("pdb")) + { + fileFilter = new FileNameExtensionFilter("Type 1 Font (*.pfb)", "pfb"); + } + else if (extension.equals("ttf")) + { + fileFilter = new FileNameExtensionFilter("TrueType Font (*.ttf)", "ttf"); + } + else if (extension.equals("cff")) + { + fileFilter = new FileNameExtensionFilter("Compact Font Format (*.cff)", "cff"); + } + else if (extension.equals("otf")) + { + fileFilter = new FileNameExtensionFilter("OpenType Font (*.otf)", "otf"); + } + else + { + fileFilter = null; + } + } + else + { + fileFilter = null; + } + + String format; + if (extension != null) + { + format = " " + extension.toUpperCase(); + } + else + { + format = ""; + } + + JMenuItem saveMenuItem = new JMenuItem("Save Stream As" + format + "..."); saveMenuItem.addActionListener(new ActionListener() { @Override @@ -214,7 +267,7 @@ public void actionPerformed(ActionEvent actionEvent) try { byte[] bytes = IOUtils.toByteArray(cosStream.createInputStream()); - saveStream(bytes); + saveStream(bytes, fileFilter, extension); } catch (IOException e) { @@ -226,11 +279,88 @@ public void actionPerformed(ActionEvent actionEvent) } /** - * produce possible partially filtered stream saving menu items + * Returns the recommended file extension for the given cos stream. + */ + private String getFileExtensionForStream(final COSStream cosStream, final TreePath nodePath) + { + String name = nodePath.getLastPathComponent().toString(); + if (name.equals("FontFile")) + { + return "pfb"; + } + else if (name.equals("FontFile2")) + { + return "ttf"; + } + else if (name.equals("FontFile3")) + { + if (cosStream.getCOSName(COSName.SUBTYPE) == COSName.OPEN_TYPE) + { + return "otf"; + } + else + { + return "cff"; + } + } + return null; + } + + /** + * Produce JMenuItem that opens the stream with the system's default app. + */ + private JMenuItem getFileOpenMenu(final COSStream cosStream, final TreePath nodePath) + { + // if we know the file type, create a system open menu + final String extension = getFileExtensionForStream(cosStream, nodePath); + if (extension == null) + { + return null; + } + + JMenuItem openMenuItem = new JMenuItem("Open with Default Application"); + openMenuItem.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent actionEvent) + { + try + { + byte[] bytes = IOUtils.toByteArray(cosStream.createInputStream()); + File temp = File.createTempFile("pdfbox", "." + extension); + temp.deleteOnExit(); + + FileOutputStream outputStream = null; + try + { + outputStream = new FileOutputStream(temp); + outputStream.write(bytes); + + Desktop.getDesktop().open(temp); + } + finally + { + if (outputStream != null) + { + outputStream.close(); + } + } + } + catch (IOException e) + { + e.printStackTrace(); + } + } + }); + return openMenuItem; + } + + /** + * produce possible partially decoded stream saving menu items * @param cosStream stream to save - * @return JMenuItems for saving partially filtered streams + * @return JMenuItems for saving partially decoded streams */ - private List getPartiallyFilteredStreamSaveMenu(final COSStream cosStream) + private List getPartiallyDecodedStreamSaveMenu(final COSStream cosStream) { List menuItems = new ArrayList(); PDStream stream = new PDStream(cosStream); @@ -267,7 +397,7 @@ public void actionPerformed(ActionEvent actionEvent) try { InputStream data = stream.createInputStream(stopFilters); - saveStream(IOUtils.toByteArray(data)); + saveStream(IOUtils.toByteArray(data), null, null); } catch (IOException e) { @@ -281,11 +411,12 @@ public void actionPerformed(ActionEvent actionEvent) /** * Save the stream. * @param bytes byte array of the stream. + * @param filter an optional FileFilter * @throws IOException if there is an error in creation of the file. */ - private void saveStream(byte[] bytes) throws IOException + private void saveStream(byte[] bytes, FileFilter filter, String extension) throws IOException { - FileOpenSaveDialog saveDialog = new FileOpenSaveDialog(parent, null); - saveDialog.saveFile(bytes); + FileOpenSaveDialog saveDialog = new FileOpenSaveDialog(getParent(), filter); + saveDialog.saveFile(bytes, extension); } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/WindowPrefs.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/WindowPrefs.java new file mode 100644 index 00000000000..42dac27915f --- /dev/null +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/WindowPrefs.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.debugger.ui; + +import java.awt.Dimension; +import java.awt.Frame; +import java.awt.Rectangle; +import java.awt.Toolkit; +import java.util.prefs.Preferences; + +/** + * A class to save windows position and size in preference using java Preference API. + * + * @author Tilman Hausherr + */ +public class WindowPrefs +{ + private static final String KEY = "window_prefs_"; + private final Preferences pref; + + public WindowPrefs(Class className) + { + this.pref = Preferences.userNodeForPackage(className); + } + + public void setBounds(Rectangle rect) + { + Preferences node = pref.node(KEY); + node.putInt("X", rect.x); + node.putInt("Y", rect.y); + node.putInt("W", rect.width); + node.putInt("H", rect.height); + } + + public Rectangle getBounds() + { + Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); + Preferences node = pref.node(KEY); + int x = node.getInt("X", screenSize.width / 4); + int y = node.getInt("Y", screenSize.height / 4); + int w = node.getInt("W", screenSize.width / 2); + int h = node.getInt("H", screenSize.height / 2); + return new Rectangle(x, y, w, h); + } + + public void setDividerLocation(int divider) + { + Preferences node = pref.node(KEY); + node.putInt("DIV", divider); + } + + public int getDividerLocation() + { + Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); + Preferences node = pref.node(KEY); + return node.getInt("DIV", screenSize.width / 8); + } + + public void setExtendedState(int extendedState) + { + Preferences node = pref.node(KEY); + node.putInt("EXTSTATE", extendedState); + } + + public int getExtendedState() + { + Preferences node = pref.node(KEY); + return node.getInt("EXTSTATE", Frame.NORMAL); + } +} diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ZoomMenu.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ZoomMenu.java index f46de343c3e..a13679320c1 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ZoomMenu.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ZoomMenu.java @@ -31,6 +31,7 @@ */ public final class ZoomMenu extends MenuBase { + @SuppressWarnings("squid:MaximumInheritanceDepth") private static class ZoomMenuItem extends JRadioButtonMenuItem { private final int zoom; @@ -42,8 +43,9 @@ private static class ZoomMenuItem extends JRadioButtonMenuItem } } - public static final String ZOOM_100_PERCENT = "100%"; - private static final int[] ZOOMS = new int[] { 25, 50, 100, 200, 400 }; + private float pageZoomScale = 1; + private float imageZoomScale = 1; + private static final int[] ZOOMS = new int[] { 25, 50, 100, 150, 200, 400, 1000, 2000 }; private static ZoomMenu instance; private final JMenu menu; @@ -81,21 +83,22 @@ public static ZoomMenu getInstance() /** * Set the zoom selection. * - * @param selection zoom menu string, e.g. "100%". + * @param zoomValue e.g. 1, 0.25, 4. * @throws IllegalArgumentException if the parameter doesn't belong to a zoom menu item. */ - public void setZoomSelection(String selection) + public void changeZoomSelection(float zoomValue) { + int selection = (int) (zoomValue * 100); for (Component comp : menu.getMenuComponents()) { - JRadioButtonMenuItem menuItem = (JRadioButtonMenuItem) comp; - if (menuItem.getText().equals(selection)) + ZoomMenuItem menuItem = (ZoomMenuItem) comp; + if (menuItem.zoom == selection) { menuItem.setSelected(true); return; } } - throw new IllegalArgumentException("no zoom menu item found for: " + selection); + throw new IllegalArgumentException("no zoom menu item found for: " + selection + "%"); } /** @@ -132,4 +135,35 @@ public static float getZoomScale() } throw new IllegalStateException("no zoom menu item is selected"); } + + public float getPageZoomScale() + { + return pageZoomScale; + } + + public void setPageZoomScale(float pageZoomValue) + { + pageZoomScale = pageZoomValue; + } + + public float getImageZoomScale() + { + return imageZoomScale; + } + + public void setImageZoomScale(float imageZoomValue) + { + imageZoomScale = imageZoomValue; + } + + /** + * When a new file is loaded zoom values should be reset. + * + */ + public void resetZoom() + { + setPageZoomScale(1); + setImageZoomScale(1); + changeZoomSelection(1); + } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/textsearcher/SearchEngine.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/textsearcher/SearchEngine.java index 93d44ab36aa..5b038ab4f63 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/textsearcher/SearchEngine.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/textsearcher/SearchEngine.java @@ -23,6 +23,8 @@ import javax.swing.text.Document; import javax.swing.text.Highlighter; import javax.swing.text.JTextComponent; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; /** * @author Khyrul Bashar @@ -30,6 +32,8 @@ */ class SearchEngine { + private static final Log LOG = LogFactory.getLog(SearchEngine.class); + private final Document document; private final Highlighter highlighter; private final Highlighter.HighlightPainter painter; @@ -73,7 +77,7 @@ public List search(String searchKey, boolean isCaseSensit } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); return highlights; } if (!isCaseSensitive) @@ -97,7 +101,7 @@ public List search(String searchKey, boolean isCaseSensit } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } } diff --git a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/textsearcher/Searcher.java b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/textsearcher/Searcher.java index 5fe983b7c0c..718deea879c 100644 --- a/debugger/src/main/java/org/apache/pdfbox/debugger/ui/textsearcher/Searcher.java +++ b/debugger/src/main/java/org/apache/pdfbox/debugger/ui/textsearcher/Searcher.java @@ -34,6 +34,8 @@ import javax.swing.text.DefaultHighlighter; import javax.swing.text.Highlighter; import javax.swing.text.JTextComponent; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.debugger.PDFDebugger; @@ -42,6 +44,8 @@ */ public class Searcher implements DocumentListener, ChangeListener, ComponentListener { + private static final Log LOG = LogFactory.getLog(Searcher.class); + private static final Highlighter.HighlightPainter PAINTER = new DefaultHighlighter.DefaultHighlightPainter(Color.yellow); private static final Highlighter.HighlightPainter SELECTION_PAINTER = @@ -139,7 +143,7 @@ private void search(DocumentEvent documentEvent) } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } @@ -192,7 +196,7 @@ private void scrollToWord(int offset) } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } @@ -218,7 +222,7 @@ private void changeHighlighter(int index, Highlighter.HighlightPainter newPainte } catch (BadLocationException e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } @@ -234,13 +238,13 @@ public void stateChanged(ChangeEvent changeEvent) @Override public void componentResized(ComponentEvent componentEvent) { - + // do nothing } @Override public void componentMoved(ComponentEvent componentEvent) { - + // do nothing } @Override diff --git a/examples/pom.xml b/examples/pom.xml index 189b33e754d..02878e6fa48 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -23,7 +23,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml @@ -42,6 +42,26 @@ + + + + [11,) + + + + javax.xml.bind + jaxb-api + provided + + + javax.activation + activation + provided + + + + + org.bouncycastle @@ -77,7 +97,8 @@ org.apache.ant ant - 1.9.6 + + 1.9.15 junit @@ -85,9 +106,15 @@ test - org.apache.wink - wink-component-test-support - 1.4 + javax.servlet + javax.servlet-api + 4.0.1 + test + + + org.apache.geronimo.specs + geronimo-jaxrs_1.1_spec + 1.0 test @@ -106,9 +133,49 @@ src/test/resources/org/apache/pdfbox/examples/signature/* + src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.* + + com.googlecode.maven-download-plugin + download-maven-plugin + + + testAddValidationInformation + generate-test-resources + + wget + + + https://issues.apache.org/jira/secure/attachment/13014110/notCertified_368835_Sig_en_201026090509.pdf + ${project.build.directory}/pdfs + notCertified_368835_Sig_en_201026090509.pdf + eec730efc741d52ec2de5a26bb74e34fda3d01f8f547a92c4730478d4e35cd383c9c9f3c397e388ee0882166a18c1396d9576c6f9ab0a70c9551fb49c0ee3e6f + + + + testDoubleVisibleSignatureOnEncryptedFile + generate-test-resources + + wget + + + https://issues.apache.org/jira/secure/attachment/12682897/FormI-9-English.pdf + ${project.build.directory}/pdfs + PDFBOX-2469-1-AcroForm-AES128.pdf + a5067d67da88dcb3f2b6e63c6387d2fc7170db104d67e81de59d12e9e6b1ad473c0325411fc1cc235e12fbc56a37a67181f85b4e49cb208fbea0c0a01ebe6dd2 + + + + + + + maven-surefire-plugin + + ${addmod} + + diff --git a/examples/src/main/appended-resources/META-INF/LICENSE b/examples/src/main/appended-resources/META-INF/LICENSE index 839853550ce..77e285f61f5 100644 --- a/examples/src/main/appended-resources/META-INF/LICENSE +++ b/examples/src/main/appended-resources/META-INF/LICENSE @@ -169,14 +169,111 @@ The International Components for Unicode library (http://site.icu-project.org/) copyright holder. The file "sRGB Color Space Profile.icm" is: -Copyright (c) 1998 Hewlett-Packard Company - -To anyone who acknowledges that the file "sRGB Color Space Profile.icm" -is provided "AS IS" WITH NO EXPRESS OR IMPLIED WARRANTY: -permission to use, copy and distribute this file for any purpose is hereby -granted without fee, provided that the file is not changed including the HP -copyright notice tag, and that the name of Hewlett-Packard Company not be -used in advertising or publicity pertaining to distribution of the software -without specific, written prior permission. Hewlett-Packard Company makes -no representations about the suitability of this software for any purpose. + Copyright (c) 1998 Hewlett-Packard Company + + To anyone who acknowledges that the file "sRGB Color Space Profile.icm" + is provided "AS IS" WITH NO EXPRESS OR IMPLIED WARRANTY: + permission to use, copy and distribute this file for any purpose is hereby + granted without fee, provided that the file is not changed including the HP + copyright notice tag, and that the name of Hewlett-Packard Company not be + used in advertising or publicity pertaining to distribution of the software + without specific, written prior permission. Hewlett-Packard Company makes + no representations about the suitability of this software for any purpose. + +Lohit-Bengali font (https://pagure.io/lohit): + + Copyright 2011-13 Lohit Fonts Project contributors + + + This Font Software is licensed under the SIL Open Font License, Version 1.1. + This license is copied below, and is also available with a FAQ at: + http://scripts.sil.org/OFL + + + ----------------------------------------------------------- + SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 + ----------------------------------------------------------- + + PREAMBLE + The goals of the Open Font License (OFL) are to stimulate worldwide + development of collaborative font projects, to support the font creation + efforts of academic and linguistic communities, and to provide a free and + open framework in which fonts may be shared and improved in partnership + with others. + + The OFL allows the licensed fonts to be used, studied, modified and + redistributed freely as long as they are not sold by themselves. The + fonts, including any derivative works, can be bundled, embedded, + redistributed and/or sold with any software provided that any reserved + names are not used by derivative works. The fonts and derivatives, + however, cannot be released under any other type of license. The + requirement for fonts to remain under this license does not apply + to any document created using the fonts or their derivatives. + + DEFINITIONS + "Font Software" refers to the set of files released by the Copyright + Holder(s) under this license and clearly marked as such. This may + include source files, build scripts and documentation. + + "Reserved Font Name" refers to any names specified as such after the + copyright statement(s). + + "Original Version" refers to the collection of Font Software components as + distributed by the Copyright Holder(s). + + "Modified Version" refers to any derivative made by adding to, deleting, + or substituting -- in part or in whole -- any of the components of the + Original Version, by changing formats or by porting the Font Software to a + new environment. + + "Author" refers to any designer, engineer, programmer, technical + writer or other person who contributed to the Font Software. + + PERMISSION & CONDITIONS + Permission is hereby granted, free of charge, to any person obtaining + a copy of the Font Software, to use, study, copy, merge, embed, modify, + redistribute, and sell modified and unmodified copies of the Font + Software, subject to the following conditions: + + 1) Neither the Font Software nor any of its individual components, + in Original or Modified Versions, may be sold by itself. + + 2) Original or Modified Versions of the Font Software may be bundled, + redistributed and/or sold with any software, provided that each copy + contains the above copyright notice and this license. These can be + included either as stand-alone text files, human-readable headers or + in the appropriate machine-readable metadata fields within text or + binary files as long as those fields can be easily viewed by the user. + + 3) No Modified Version of the Font Software may use the Reserved Font + Name(s) unless explicit written permission is granted by the corresponding + Copyright Holder. This restriction only applies to the primary font name as + presented to the users. + + 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font + Software shall not be used to promote, endorse or advertise any + Modified Version, except to acknowledge the contribution(s) of the + Copyright Holder(s) and the Author(s) or with their explicit written + permission. + + 5) The Font Software, modified or unmodified, in part or in whole, + must be distributed entirely under this license, and must not be + distributed under any other license. The requirement for fonts to + remain under this license does not apply to any document created + using the Font Software. + + TERMINATION + This license becomes null and void if any of the above conditions are + not met. + + DISCLAIMER + THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE + COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL + DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM + OTHER DEALINGS IN THE FONT SOFTWARE. diff --git a/examples/src/main/java/org/apache/pdfbox/examples/ant/PDFToTextTask.java b/examples/src/main/java/org/apache/pdfbox/examples/ant/PDFToTextTask.java index cbcd875a892..5d508af544a 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/ant/PDFToTextTask.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/ant/PDFToTextTask.java @@ -19,7 +19,6 @@ import java.io.File; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import org.apache.pdfbox.tools.ExtractText; @@ -36,7 +35,7 @@ */ public class PDFToTextTask extends Task { - private final List fileSets = new ArrayList(); + private final List fileSets = new ArrayList(); /** * Adds a set of files (nested fileset attribute). @@ -55,11 +54,10 @@ public void addFileset( FileSet set ) public void execute() { log( "PDFToTextTask executing" ); - Iterator fileSetIter = fileSets.iterator(); - while( fileSetIter.hasNext() ) + + for (FileSet fileSet : fileSets) { - FileSet next = (FileSet)fileSetIter.next(); - DirectoryScanner dirScanner = next.getDirectoryScanner( getProject() ); + DirectoryScanner dirScanner = fileSet.getDirectoryScanner(getProject()); dirScanner.scan(); String[] files = dirScanner.getIncludedFiles(); for (String file : files) diff --git a/examples/src/main/java/org/apache/pdfbox/examples/ant/package.html b/examples/src/main/java/org/apache/pdfbox/examples/ant/package.html index 1d70ed480a7..6153e670443 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/ant/package.html +++ b/examples/src/main/java/org/apache/pdfbox/examples/ant/package.html @@ -15,20 +15,20 @@ ! limitations under the License. !--> - - + + ANT tasks that utilize PDFBox features can be found in this package. -This is an example of using the PDF2Text task:

+This is an example of using the PDF2Text task:

-<taskdef name="pdf2text" classname="org.apache.pdfbox.ant.PDFToTextTask" classpathref="build.classpath" />
+<taskdef name="pdf2text" classname="org.apache.pdfbox.ant.PDFToTextTask" classpathref="build.classpath" />
-<pdf2text>
-   <fileset dir="test">
-     <include name="**/*.pdf" />
-   </fileset>
-</pdf2text>
+<pdf2text>
+   <fileset dir="test">
+     <include name="**/*.pdf" />
+   </fileset>
+</pdf2text>
diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/AddBorderToField.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/AddBorderToField.java index 79acaf22272..fd6d8d11231 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/AddBorderToField.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/AddBorderToField.java @@ -54,12 +54,12 @@ public static void main(String[] args) throws IOException PDField field = acroForm.getField("SampleField"); PDAnnotationWidget widget = field.getWidgets().get(0); - // Create the definition for a green border - PDAppearanceCharacteristicsDictionary fieldAppearance = + // Create the definition for a red border + PDAppearanceCharacteristicsDictionary fieldAppearance = new PDAppearanceCharacteristicsDictionary(new COSDictionary()); - PDColor green = new PDColor(new float[] { 0, 1, 0 }, PDDeviceRGB.INSTANCE); - fieldAppearance.setBorderColour(green); - + PDColor red = new PDColor(new float[] { 1, 0, 0 }, PDDeviceRGB.INSTANCE); + fieldAppearance.setBorderColour(red); + // Set the information to be used by the widget which is responsible // for the visual style of the form field. widget.setAppearanceCharacteristics(fieldAppearance); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateCheckBox.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateCheckBox.java new file mode 100644 index 00000000000..3cee71aba68 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateCheckBox.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.interactive.form; + +import java.awt.geom.Rectangle2D; +import java.io.IOException; +import org.apache.fontbox.afm.AFMParser; +import org.apache.fontbox.afm.CharMetric; +import org.apache.fontbox.afm.FontMetrics; +import org.apache.fontbox.util.BoundingBox; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceCharacteristicsDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDCheckBox; + +/** + * Example to create a checkbox. + * + * @author Tilman Hausherr + */ +public class CreateCheckBox +{ + private CreateCheckBox() + { + } + + public static void main(String[] args) throws IOException + { + PDDocument document = new PDDocument(); + PDPage page = new PDPage(); + document.addPage(page); + + PDAcroForm acroForm = new PDAcroForm(document); + document.getDocumentCatalog().setAcroForm(acroForm); + + // if you want to see what Adobe does, activate this, open with Adobe + // save the file, and then open it with PDFDebugger + //acroForm.setNeedAppearances(true) + + float x = 50; + float y = page.getMediaBox().getHeight() - 50; + + PDRectangle rect = new PDRectangle(x, y, 20, 20); + + PDCheckBox checkbox = new PDCheckBox(acroForm); + checkbox.setPartialName("MyCheckBox"); + PDAnnotationWidget widget = checkbox.getWidgets().get(0); + widget.setPage(page); + widget.setRectangle(rect); + widget.setPrinted(true); + + PDAppearanceCharacteristicsDictionary appearanceCharacteristics = new PDAppearanceCharacteristicsDictionary(new COSDictionary()); + appearanceCharacteristics.setBorderColour(new PDColor(new float[]{1, 0, 0}, PDDeviceRGB.INSTANCE)); + appearanceCharacteristics.setBackground(new PDColor(new float[]{1, 1, 0}, PDDeviceRGB.INSTANCE)); + // 8 = cross; 4 = checkmark; H = star; u = diamond; n = square, l = dot + appearanceCharacteristics.setNormalCaption("4"); + widget.setAppearanceCharacteristics(appearanceCharacteristics); + + PDBorderStyleDictionary borderStyleDictionary = new PDBorderStyleDictionary(); + borderStyleDictionary.setWidth(1); + borderStyleDictionary.setStyle(PDBorderStyleDictionary.STYLE_SOLID); + widget.setBorderStyle(borderStyleDictionary); + + PDAppearanceDictionary ap = new PDAppearanceDictionary(); + widget.setAppearance(ap); + PDAppearanceEntry normalAppearance = ap.getNormalAppearance(); + + COSDictionary normalAppearanceDict = (COSDictionary) normalAppearance.getCOSObject(); + normalAppearanceDict.setItem(COSName.Off, createAppearanceStream(document, widget, false)); + normalAppearanceDict.setItem(COSName.YES, createAppearanceStream(document, widget, true)); + + // If we ever decide to implement a /D (down) appearance, just + // replace the background colors c with c * 0.75 + page.getAnnotations().add(checkbox.getWidgets().get(0)); + acroForm.getFields().add(checkbox); + + // always call check() or unCheck(), or the box will remain invisible. + checkbox.unCheck(); + + document.save("target/CheckBoxSample.pdf"); + document.close(); + } + + private static PDAppearanceStream createAppearanceStream( + final PDDocument document, PDAnnotationWidget widget, boolean on) throws IOException + { + PDRectangle rect = widget.getRectangle(); + PDAppearanceCharacteristicsDictionary appearanceCharacteristics; + PDAppearanceStream yesAP = new PDAppearanceStream(document); + yesAP.setBBox(new PDRectangle(rect.getWidth(), rect.getHeight())); + yesAP.setResources(new PDResources()); + PDPageContentStream yesAPCS = new PDPageContentStream(document, yesAP); + appearanceCharacteristics = widget.getAppearanceCharacteristics(); + PDColor backgroundColor = appearanceCharacteristics.getBackground(); + PDColor borderColor = appearanceCharacteristics.getBorderColour(); + float lineWidth = getLineWidth(widget); + yesAPCS.setLineWidth(lineWidth); // border style (dash) ignored + yesAPCS.setNonStrokingColor(backgroundColor); + yesAPCS.addRect(0, 0, rect.getWidth(), rect.getHeight()); + yesAPCS.fill(); + yesAPCS.setStrokingColor(borderColor); + yesAPCS.addRect(lineWidth / 2, lineWidth / 2, rect.getWidth() - lineWidth, rect.getHeight() - lineWidth); + yesAPCS.stroke(); + if (!on) + { + yesAPCS.close(); + return yesAP; + } + + yesAPCS.addRect(lineWidth, lineWidth, rect.getWidth() - lineWidth * 2, rect.getHeight() - lineWidth * 2); + yesAPCS.clip(); + + String normalCaption = appearanceCharacteristics.getNormalCaption(); + if (normalCaption == null) + { + normalCaption = "4"; // Adobe behaviour + } + if ("8".equals(normalCaption)) + { + // Adobe paints a cross instead of using the Zapf Dingbats cross symbol + yesAPCS.setStrokingColor(0f); + yesAPCS.moveTo(lineWidth * 2, rect.getHeight() - lineWidth * 2); + yesAPCS.lineTo(rect.getWidth() - lineWidth * 2, lineWidth * 2); + yesAPCS.moveTo(rect.getWidth() - lineWidth * 2, rect.getHeight() - lineWidth * 2); + yesAPCS.lineTo(lineWidth * 2, lineWidth * 2); + yesAPCS.stroke(); + } + else + { + Rectangle2D bounds = new Rectangle2D.Float(); + String unicode = null; + + // ZapfDingbats font may be missing or substituted, let's use AFM resources instead. + AFMParser parser = new AFMParser(PDType1Font.class.getResourceAsStream( + "/org/apache/pdfbox/resources/afm/ZapfDingbats.afm")); + FontMetrics metric = parser.parse(); + for (CharMetric cm : metric.getCharMetrics()) + { + // The caption is not unicode, but the Zapf Dingbats code in the PDF. + // Assume that only the first character is used. + if (normalCaption.codePointAt(0) == cm.getCharacterCode()) + { + BoundingBox bb = cm.getBoundingBox(); + bounds = new Rectangle2D.Float(bb.getLowerLeftX(), bb.getLowerLeftY(), + bb.getWidth(), bb.getHeight()); + unicode = PDType1Font.ZAPF_DINGBATS.getGlyphList().toUnicode(cm.getName()); + break; + } + } + if (bounds.isEmpty()) + { + throw new IOException("Bounds rectangle for chosen glyph is empty"); + } + float size = (float) Math.min(bounds.getWidth(), bounds.getHeight()) / 1000; + // assume that checkmark has square size + // the calculations approximate what Adobe is doing, i.e. put the glyph in the middle + float fontSize = (rect.getWidth() - lineWidth * 2) / size * 0.6666f; + float xOffset = (float) (rect.getWidth() - (bounds.getWidth()) / 1000 * fontSize) / 2; + xOffset -= bounds.getX() / 1000 * fontSize; + float yOffset = (float) (rect.getHeight() - (bounds.getHeight()) / 1000 * fontSize) / 2; + yOffset -= bounds.getY() / 1000 * fontSize; + yesAPCS.setNonStrokingColor(0f); + yesAPCS.beginText(); + yesAPCS.setFont(PDType1Font.ZAPF_DINGBATS, fontSize); + yesAPCS.newLineAtOffset(xOffset, yOffset); + yesAPCS.showText(unicode); + yesAPCS.endText(); + } + yesAPCS.close(); + return yesAP; + } + + static float getLineWidth(PDAnnotationWidget widget) + { + PDBorderStyleDictionary bs = widget.getBorderStyle(); + if (bs != null) + { + return bs.getWidth(); + } + return 1; + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateMultiWidgetsForm.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateMultiWidgetsForm.java new file mode 100644 index 00000000000..250b6c75437 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateMultiWidgetsForm.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.interactive.form; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceCharacteristicsDictionary; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDTextField; + +/** + * An example of creating an AcroForm and a form field from scratch, with two widgets for one field: + * This means that the same field is visible on two separate pages, but can be on different + * positions and different size and colors. Changing the value on one page will also change it on + * the other page. + * + * The form field is created with properties similar to creating a form with default settings in + * Adobe Acrobat. + * + */ +public final class CreateMultiWidgetsForm +{ + private CreateMultiWidgetsForm() + { + } + + public static void main(String[] args) throws IOException + { + // Create a new document with 2 empty pages. + PDDocument document = new PDDocument(); + PDPage page1 = new PDPage(PDRectangle.A4); + document.addPage(page1); + PDPage page2 = new PDPage(PDRectangle.A4); + document.addPage(page2); + + // Adobe Acrobat uses Helvetica as a default font and + // stores that under the name '/Helv' in the resources dictionary + PDFont font = PDType1Font.HELVETICA; + PDResources resources = new PDResources(); + resources.put(COSName.getPDFName("Helv"), font); + + // Add a new AcroForm and add that to the document + PDAcroForm acroForm = new PDAcroForm(document); + document.getDocumentCatalog().setAcroForm(acroForm); + + // Add and set the resources and default appearance at the form level + acroForm.setDefaultResources(resources); + + // Acrobat sets the font size on the form level to be + // auto sized as default. This is done by setting the font size to '0' + String defaultAppearanceString = "/Helv 0 Tf 0 g"; + acroForm.setDefaultAppearance(defaultAppearanceString); + + // Add a form field to the form. + PDTextField textBox = new PDTextField(acroForm); + textBox.setPartialName("SampleField"); + // Acrobat sets the font size to 12 as default + // This is done by setting the font size to '12' on the + // field level. + // The text color is set to blue in this example. + // To use black, replace "0 0 1 rg" with "0 0 0 rg" or "0 g". + defaultAppearanceString = "/Helv 12 Tf 0 0 1 rg"; + textBox.setDefaultAppearance(defaultAppearanceString); + + // add the field to the AcroForm + acroForm.getFields().add(textBox); + + // Specify 1st annotation associated with the field + PDAnnotationWidget widget1 = new PDAnnotationWidget(); + PDRectangle rect = new PDRectangle(50, 750, 250, 50); + widget1.setRectangle(rect); + widget1.setPage(page1); + widget1.setParent(textBox); + + // Specify 2nd annotation associated with the field + PDAnnotationWidget widget2 = new PDAnnotationWidget(); + PDRectangle rect2 = new PDRectangle(200, 650, 100, 50); + widget2.setRectangle(rect2); + widget2.setPage(page2); + widget2.setParent(textBox); + + // set green border and yellow background for 1st widget + // if you prefer defaults, delete this code block + PDAppearanceCharacteristicsDictionary fieldAppearance1 + = new PDAppearanceCharacteristicsDictionary(new COSDictionary()); + fieldAppearance1.setBorderColour(new PDColor(new float[]{0,1,0}, PDDeviceRGB.INSTANCE)); + fieldAppearance1.setBackground(new PDColor(new float[]{1,1,0}, PDDeviceRGB.INSTANCE)); + widget1.setAppearanceCharacteristics(fieldAppearance1); + + // set red border and green background for 2nd widget + // if you prefer defaults, delete this code block + PDAppearanceCharacteristicsDictionary fieldAppearance2 + = new PDAppearanceCharacteristicsDictionary(new COSDictionary()); + fieldAppearance2.setBorderColour(new PDColor(new float[]{1,0,0}, PDDeviceRGB.INSTANCE)); + fieldAppearance2.setBackground(new PDColor(new float[]{0,1,0}, PDDeviceRGB.INSTANCE)); + widget2.setAppearanceCharacteristics(fieldAppearance2); + + List widgets = new ArrayList(); + widgets.add(widget1); + widgets.add(widget2); + textBox.setWidgets(widgets); + + // make sure the annotations are visible on screen and paper + widget1.setPrinted(true); + widget2.setPrinted(true); + + // Add the annotations to the pages + page1.getAnnotations().add(widget1); + page2.getAnnotations().add(widget2); + + // set the field value + textBox.setValue("Sample field"); + + document.save("target/MultiWidgetsForm.pdf"); + document.close(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateRadioButtons.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateRadioButtons.java new file mode 100644 index 00000000000..cdf18ab7fd0 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateRadioButtons.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.interactive.form; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceCharacteristicsDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDRadioButton; + +/** + * Example to create radio buttons. + * + * @author Tilman Hausherr + */ +public class CreateRadioButtons +{ + private CreateRadioButtons() + { + } + + public static void main(String[] args) throws IOException + { + PDDocument document = new PDDocument(); + PDPage page = new PDPage(PDRectangle.A4); + document.addPage(page); + PDAcroForm acroForm = new PDAcroForm(document); + + // if you want to see what Adobe does, activate this, open with Adobe + // save the file, and then open it with PDFDebugger + + //acroForm.setNeedAppearances(true) + + document.getDocumentCatalog().setAcroForm(acroForm); + List options = Arrays.asList("a", "b", "c"); + PDRadioButton radioButton = new PDRadioButton(acroForm); + radioButton.setPartialName("MyRadioButton"); + radioButton.setExportValues(options); + + PDAppearanceCharacteristicsDictionary appearanceCharacteristics = new PDAppearanceCharacteristicsDictionary(new COSDictionary()); + appearanceCharacteristics.setBorderColour(new PDColor(new float[] { 1, 0, 0 }, PDDeviceRGB.INSTANCE)); + appearanceCharacteristics.setBackground(new PDColor(new float[]{0, 1, 0.3f}, PDDeviceRGB.INSTANCE)); + // no caption => round + // with caption => see checkbox example + + List widgets = new ArrayList(); + for (int i = 0; i < options.size(); i++) + { + PDAnnotationWidget widget = new PDAnnotationWidget(); + widget.setRectangle(new PDRectangle(30, PDRectangle.A4.getHeight() - 40 - i * 35, 30, 30)); + widget.setAppearanceCharacteristics(appearanceCharacteristics); + PDBorderStyleDictionary borderStyleDictionary = new PDBorderStyleDictionary(); + borderStyleDictionary.setWidth(2); + borderStyleDictionary.setStyle(PDBorderStyleDictionary.STYLE_SOLID); + widget.setBorderStyle(borderStyleDictionary); + widget.setPage(page); + + COSDictionary apNDict = new COSDictionary(); + apNDict.setItem(COSName.Off, createAppearanceStream(document, widget, false)); + apNDict.setItem(options.get(i), createAppearanceStream(document, widget, true)); + + PDAppearanceDictionary appearance = new PDAppearanceDictionary(); + PDAppearanceEntry appearanceNEntry = new PDAppearanceEntry(apNDict); + appearance.setNormalAppearance(appearanceNEntry); + widget.setAppearance(appearance); + widget.setAppearanceState("Off"); // don't forget this, or button will be invisible + widgets.add(widget); + page.getAnnotations().add(widget); + } + radioButton.setWidgets(widgets); + + acroForm.getFields().add(radioButton); + + // Set the texts + PDPageContentStream contents = new PDPageContentStream(document, page); + for (int i = 0; i < options.size(); i++) + { + contents.beginText(); + contents.setFont(PDType1Font.HELVETICA, 15); + contents.newLineAtOffset(70, PDRectangle.A4.getHeight() - 30 - i * 35); + contents.showText(options.get(i)); + contents.endText(); + } + contents.close(); + + radioButton.setValue("c"); + + document.save("target/RadioButtonsSample.pdf"); + document.close(); + } + + private static PDAppearanceStream createAppearanceStream( + final PDDocument document, PDAnnotationWidget widget, boolean on) throws IOException + { + PDRectangle rect = widget.getRectangle(); + PDAppearanceStream onAP = new PDAppearanceStream(document); + onAP.setBBox(new PDRectangle(rect.getWidth(), rect.getHeight())); + PDPageContentStream onAPCS = new PDPageContentStream(document, onAP); + + PDAppearanceCharacteristicsDictionary appearanceCharacteristics = widget.getAppearanceCharacteristics(); + PDColor backgroundColor = appearanceCharacteristics.getBackground(); + PDColor borderColor = appearanceCharacteristics.getBorderColour(); + float lineWidth = getLineWidth(widget); + onAPCS.setLineWidth(lineWidth); // border style (dash) ignored + onAPCS.setNonStrokingColor(backgroundColor); + float radius = Math.min(rect.getWidth() / 2, rect.getHeight() / 2); + drawCircle(onAPCS, rect.getWidth() / 2, rect.getHeight() / 2, radius); + onAPCS.fill(); + onAPCS.setStrokingColor(borderColor); + drawCircle(onAPCS, rect.getWidth() / 2, rect.getHeight() / 2, radius - lineWidth / 2); + onAPCS.stroke(); + if (on) + { + onAPCS.setNonStrokingColor(0f); + drawCircle(onAPCS, rect.getWidth() / 2, rect.getHeight() / 2, (radius - lineWidth) / 2); + onAPCS.fill(); + } + + onAPCS.close(); + return onAP; + } + + static float getLineWidth(PDAnnotationWidget widget) + { + PDBorderStyleDictionary bs = widget.getBorderStyle(); + if (bs != null) + { + return bs.getWidth(); + } + return 1; + } + + static void drawCircle(PDPageContentStream cs, float x, float y, float r) throws IOException + { + // http://stackoverflow.com/a/2007782/535646 + float magic = r * 0.551784f; + cs.moveTo(x, y + r); + cs.curveTo(x + magic, y + r, x + r, y + magic, x + r, y); + cs.curveTo(x + r, y - magic, x + magic, y - r, x, y - r); + cs.curveTo(x - magic, y - r, x - r, y - magic, x - r, y); + cs.curveTo(x - r, y + magic, x - magic, y + r, x, y + r); + cs.closePath(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateSimpleForm.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateSimpleForm.java index 0cca6556c59..5e60f66cac6 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateSimpleForm.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateSimpleForm.java @@ -18,16 +18,22 @@ package org.apache.pdfbox.examples.interactive.form; import java.io.IOException; +import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceCharacteristicsDictionary; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; import org.apache.pdfbox.pdmodel.interactive.form.PDTextField; +import org.apache.pdfbox.pdmodel.interactive.form.PDVariableText; /** * An example of creating an AcroForm and a form field from scratch. @@ -72,24 +78,49 @@ public static void main(String[] args) throws IOException textBox.setPartialName("SampleField"); // Acrobat sets the font size to 12 as default // This is done by setting the font size to '12' on the - // field level. - defaultAppearanceString = "/Helv 12 Tf 0 g"; + // field level. + // The text color is set to blue in this example. + // To use black, replace "0 0 1 rg" with "0 0 0 rg" or "0 g". + defaultAppearanceString = "/Helv 12 Tf 0 0 1 rg"; textBox.setDefaultAppearance(defaultAppearanceString); - + // add the field to the acroform acroForm.getFields().add(textBox); - - // Specify the annotation associated with the field + + // Specify the widget annotation associated with the field PDAnnotationWidget widget = textBox.getWidgets().get(0); PDRectangle rect = new PDRectangle(50, 750, 200, 50); widget.setRectangle(rect); widget.setPage(page); + + // set green border and yellow background + // if you prefer defaults, delete this code block + PDAppearanceCharacteristicsDictionary fieldAppearance + = new PDAppearanceCharacteristicsDictionary(new COSDictionary()); + fieldAppearance.setBorderColour(new PDColor(new float[]{0,1,0}, PDDeviceRGB.INSTANCE)); + fieldAppearance.setBackground(new PDColor(new float[]{1,1,0}, PDDeviceRGB.INSTANCE)); + widget.setAppearanceCharacteristics(fieldAppearance); + + // make sure the widget annotation is visible on screen and paper + widget.setPrinted(true); - // Add the annotation to the page + // Add the widget annotation to the page page.getAnnotations().add(widget); - + + // set the alignment ("quadding") + textBox.setQ(PDVariableText.QUADDING_CENTERED); + // set the field value - textBox.setValue("Sample field"); + textBox.setValue("Sample field content"); + + // put some text near the field + PDPageContentStream cs = new PDPageContentStream(document, page); + cs.beginText(); + cs.setFont(PDType1Font.HELVETICA, 15); + cs.newLineAtOffset(50, 810); + cs.showText("Field:"); + cs.endText(); + cs.close(); document.save("target/SimpleForm.pdf"); document.close(); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateSimpleFormWithEmbeddedFont.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateSimpleFormWithEmbeddedFont.java new file mode 100644 index 00000000000..d232ed392d1 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/CreateSimpleFormWithEmbeddedFont.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.interactive.form; + +import java.io.IOException; +import org.apache.pdfbox.cos.COSDictionary; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceCharacteristicsDictionary; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDTextField; + +/** + * An example of creating an AcroForm and a form field from scratch with a font fully embedded to + * allow non-WinAnsiEncoding input. + * + * The form field is created with properties similar to creating a form with default settings in + * Adobe Acrobat. + * + */ +public class CreateSimpleFormWithEmbeddedFont +{ + private CreateSimpleFormWithEmbeddedFont() + { + } + + public static void main(String[] args) throws IOException + { + // Create a new document with an empty page. + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(PDRectangle.A4); + doc.addPage(page); + PDAcroForm acroForm = new PDAcroForm(doc); + doc.getDocumentCatalog().setAcroForm(acroForm); + + // Note that the font is fully embedded. If you use a different font, make sure that + // its license allows full embedding. + PDFont formFont = PDType0Font.load(doc, CreateSimpleFormWithEmbeddedFont.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"), false); + + // Add and set the resources and default appearance at the form level + final PDResources resources = new PDResources(); + acroForm.setDefaultResources(resources); + final String fontName = resources.add(formFont).getName(); + + // Acrobat sets the font size on the form level to be + // auto sized as default. This is done by setting the font size to '0' + acroForm.setDefaultResources(resources); + String defaultAppearanceString = "/" + fontName + " 0 Tf 0 g"; + + PDTextField textBox = new PDTextField(acroForm); + textBox.setPartialName("SampleField"); + textBox.setDefaultAppearance(defaultAppearanceString); + acroForm.getFields().add(textBox); + + // Specify the widget annotation associated with the field + PDAnnotationWidget widget = textBox.getWidgets().get(0); + PDRectangle rect = new PDRectangle(50, 700, 200, 50); + widget.setRectangle(rect); + widget.setPage(page); + page.getAnnotations().add(widget); + + // set green border and yellow background + // if you prefer defaults, delete this code block + PDAppearanceCharacteristicsDictionary fieldAppearance + = new PDAppearanceCharacteristicsDictionary(new COSDictionary()); + fieldAppearance.setBorderColour(new PDColor(new float[]{0,1,0}, PDDeviceRGB.INSTANCE)); + fieldAppearance.setBackground(new PDColor(new float[]{1,1,0}, PDDeviceRGB.INSTANCE)); + widget.setAppearanceCharacteristics(fieldAppearance); + + // set the field value. Note that the last character is a turkish capital I with a dot, + // which is not part of WinAnsiEncoding + textBox.setValue("Sample field İ"); + + // put some text near the field + PDPageContentStream cs = new PDPageContentStream(doc, page); + cs.beginText(); + cs.setFont(PDType1Font.HELVETICA, 15); + cs.newLineAtOffset(50, 760); + cs.showText("Field:"); + cs.endText(); + cs.close(); + + doc.save("target/SimpleFormWithEmbeddedFont.pdf"); + doc.close(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/FillFormField.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/FillFormField.java index 045867b4499..c391ce8ef66 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/FillFormField.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/FillFormField.java @@ -46,7 +46,7 @@ public static void main(String[] args) throws IOException // as there might not be an AcroForm entry a null check is necessary if (acroForm != null) { - // Retrieve an individual field and set it's value. + // Retrieve an individual field and set its value. PDTextField field = (PDTextField) acroForm.getField( "sampleField" ); field.setValue("Text Entry"); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/PrintFields.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/PrintFields.java index 65e151bd4ec..319122c1f34 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/PrintFields.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/PrintFields.java @@ -61,12 +61,9 @@ private void processField(PDField field, String sLevel, String sParent) throws I if (field instanceof PDNonTerminalField) { - if (!sParent.equals(field.getPartialName())) + if (!sParent.equals(field.getPartialName()) && partialName != null) { - if (partialName != null) - { - sParent = sParent + "." + partialName; - } + sParent = sParent + "." + partialName; } System.out.println(sLevel + sParent); @@ -91,7 +88,7 @@ private void processField(PDField field, String sLevel, String sParent) throws I } /** - * This will read a PDF file and print out the form elements.
+ * This will read a PDF file and print out the form elements.
* see usage() for commandline * * @param args command line arguments diff --git a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/SetField.java b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/SetField.java index 08a2e809b5b..7cc1928f897 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/SetField.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/interactive/form/SetField.java @@ -54,7 +54,15 @@ public void setField(PDDocument pdfDocument, String name, String value) throws I { if (field instanceof PDCheckBox) { - field.setValue("Yes"); + PDCheckBox checkbox = (PDCheckBox) field; + if (value.isEmpty()) + { + checkbox.unCheck(); + } + else + { + checkbox.check(); + } } else if (field instanceof PDComboBox) { @@ -81,7 +89,7 @@ else if (field instanceof PDTextField) /** * This will read a PDF file and set a field and then write it the pdf out - * again.
+ * again.
* see usage() for commandline * * @param args command line arguments @@ -108,7 +116,7 @@ private void setField(String[] args) throws IOException SetField example = new SetField(); pdf = PDDocument.load(new File(args[0])); example.setField(pdf, args[1], args[2]); - pdf.save(args[0]); + pdf.save(calculateOutputFilename(args[0])); } } finally @@ -120,6 +128,21 @@ private void setField(String[] args) throws IOException } } + private static String calculateOutputFilename(String filename) + { + String outputFilename; + if (filename.toLowerCase().endsWith(".pdf")) + { + outputFilename = filename.substring(0, filename.length() - 4); + } + else + { + outputFilename = filename; + } + outputFilename += "_filled.pdf"; + return outputFilename; + } + /** * This will print out a message telling how to use this example. */ diff --git a/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java b/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java index 55b4db254ba..058a0d64fcb 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java @@ -43,6 +43,7 @@ * This class is used to create a document for the lucene search engine. This should easily plug into the IndexPDFFiles * that comes with the lucene project. This class will populate the following fields. * + * * * * @@ -414,7 +415,7 @@ private void addContent(Document document, InputStream is, String documentLocati } catch (InvalidPasswordException e) { - // they didn't suppply a password and the default of "" was wrong. + // they didn't supply a password and the default of "" was wrong. throw new IOException("Error: The document(" + documentLocation + ") is encrypted and will not be indexed.", e); } finally diff --git a/examples/src/main/java/org/apache/pdfbox/examples/lucene/package.html b/examples/src/main/java/org/apache/pdfbox/examples/lucene/package.html index 8607cfa536d..fd30787858c 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/lucene/package.html +++ b/examples/src/main/java/org/apache/pdfbox/examples/lucene/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddAnnotations.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddAnnotations.java index e7360fc5737..bb2de06155a 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddAnnotations.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddAnnotations.java @@ -18,6 +18,9 @@ import java.io.IOException; import java.util.List; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSFloat; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; @@ -26,13 +29,17 @@ import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo; import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLine; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationSquareCircle; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitWidthDestination; /** * Add annotations to pages of a PDF document. @@ -56,13 +63,18 @@ public static void main(String[] args) throws IOException PDDocument document = new PDDocument(); try { - PDPage page = new PDPage(); - document.addPage(page); - List annotations = page.getAnnotations(); + PDPage page1 = new PDPage(); + PDPage page2 = new PDPage(); + PDPage page3 = new PDPage(); + document.addPage(page1); + document.addPage(page2); + document.addPage(page3); + List annotations = page1.getAnnotations(); // Some basic reusable objects/constants // Annotations themselves can only be used once! PDColor red = new PDColor(new float[] { 1, 0, 0 }, PDDeviceRGB.INSTANCE); + PDColor green = new PDColor(new float[] { 0, 1, 0 }, PDDeviceRGB.INSTANCE); PDColor blue = new PDColor(new float[] { 0, 0, 1 }, PDDeviceRGB.INSTANCE); PDColor black = new PDColor(new float[] { 0, 0, 0 }, PDDeviceRGB.INSTANCE); @@ -76,18 +88,20 @@ public static void main(String[] args) throws IOException borderULine.setStyle(PDBorderStyleDictionary.STYLE_UNDERLINE); borderULine.setWidth(INCH / 72); // 1 point - float pw = page.getMediaBox().getUpperRightX(); - float ph = page.getMediaBox().getUpperRightY(); + float pw = page1.getMediaBox().getUpperRightX(); + float ph = page1.getMediaBox().getUpperRightY(); // First add some text, two lines we'll add some annotations to this later PDFont font = PDType1Font.HELVETICA_BOLD; - PDPageContentStream contents = new PDPageContentStream(document, page); + PDPageContentStream contents = new PDPageContentStream(document, page1); contents.beginText(); contents.setFont(font, 18); contents.newLineAtOffset(INCH, ph - INCH - 18); contents.showText("PDFBox"); contents.newLineAtOffset(0, -(INCH / 2)); - contents.showText("Click Here"); + contents.showText("External URL"); + contents.newLineAtOffset(0, -(INCH / 2)); + contents.showText("Jump to page three"); contents.endText(); contents.close(); @@ -95,14 +109,17 @@ public static void main(String[] args) throws IOException PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup( PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); txtMark.setColor(blue); - txtMark.setConstantOpacity((float)0.2); // 20% transparent + + // remove line below if PDF/A-2b (and possibly other PDF-A flavours) + // also add txtMark.setPrinted(true) + txtMark.setConstantOpacity((float) 0.2); // 20% transparent // Set the rectangle containing the markup float textWidth = font.getStringWidth("PDFBox") / 1000 * 18; PDRectangle position = new PDRectangle(); position.setLowerLeftX(INCH); position.setLowerLeftY(ph - INCH - 18); - position.setUpperRightX(72 + textWidth); + position.setUpperRightX(INCH + textWidth); position.setUpperRightY(ph - INCH); txtMark.setRectangle(position); @@ -124,16 +141,16 @@ public static void main(String[] args) throws IOException txtMark.setContents("Highlighted since it's important"); annotations.add(txtMark); - // Now add the link annotation, so the clickme works + // Now add the link annotation, so the click on "External URL" works PDAnnotationLink txtLink = new PDAnnotationLink(); txtLink.setBorderStyle(borderULine); // Set the rectangle containing the link - textWidth = font.getStringWidth("Click Here") / 1000 * 18; + textWidth = font.getStringWidth("External URL") / 1000 * 18; position = new PDRectangle(); position.setLowerLeftX(INCH); position.setLowerLeftY(ph - 1.5f * INCH -20); // down a couple of points - position.setUpperRightX(72 + textWidth); + position.setUpperRightX(INCH + textWidth); position.setUpperRightY(ph - 1.5f * INCH); txtLink.setRectangle(position); @@ -168,7 +185,7 @@ public static void main(String[] args) throws IOException aSquare.setColor(red); // Outline in red, not setting a fill aSquare.setBorderStyle(borderThick); - // Place the annotation on the page, we'll make this 1" (72points) square + // Place the annotation on the page, we'll make this 1" (72 points) square // 3.5" down, 1" in from the right on the page position = new PDRectangle(); // Reuse the variable, but note it's a new object! position.setLowerLeftX(pw - 2 * INCH); // 1" in from right, 1" wide @@ -203,7 +220,103 @@ public static void main(String[] args) throws IOException aLine.setBorderStyle(borderThick); aLine.setColor(black); annotations.add(aLine); + + + // Now add the link annotation, so the click on "Jump to page three" works + PDAnnotationLink pageLink = new PDAnnotationLink(); + pageLink.setBorderStyle(borderULine); + + // Set the rectangle containing the link + textWidth = font.getStringWidth("Jump to page three") / 1000 * 18; + position = new PDRectangle(); + position.setLowerLeftX(INCH); + position.setLowerLeftY(ph - 2 * INCH - 20); // down a couple of points + position.setUpperRightX(INCH + textWidth); + position.setUpperRightY(ph - 2 * INCH); + pageLink.setRectangle(position); + // add the GoTo action + PDActionGoTo actionGoto = new PDActionGoTo(); + // see javadoc for other types of PDPageDestination + PDPageDestination dest = new PDPageFitWidthDestination(); + // do not use setPageNumber(), this is for external destinations only + dest.setPage(page3); + actionGoto.setDestination(dest); + pageLink.setAction(actionGoto); + annotations.add(pageLink); + + PDAnnotationMarkup freeTextAnnotation = new PDAnnotationMarkup(); + freeTextAnnotation.getCOSObject().setName(COSName.SUBTYPE, PDAnnotationMarkup.SUB_TYPE_FREETEXT); + PDColor yellow = new PDColor(new float[] { 1, 1, 0 }, PDDeviceRGB.INSTANCE); + // this sets background only (contradicts PDF specification) + freeTextAnnotation.setColor(yellow); + position = new PDRectangle(); + position.setLowerLeftX(1 * INCH); + position.setLowerLeftY(ph - 5f * INCH - 3 * INCH); + position.setUpperRightX(pw - INCH); + position.setUpperRightY(ph - 5f * INCH); + freeTextAnnotation.setRectangle(position); + freeTextAnnotation.setTitlePopup("Sophia Lorem"); + freeTextAnnotation.setSubject("Lorem ipsum"); + freeTextAnnotation.setContents("Lorem ipsum dolor sit amet, consetetur sadipscing elitr," + + " sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam " + + "erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea " + + "rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum " + + "dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, " + + "sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam " + + "erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea " + + "rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum " + + "dolor sit amet."); + // Text and border in blue RGB color, "Helv" font, 20 point + // setDefaultAppearance() missing in 2.0 + freeTextAnnotation.getCOSObject().setString(COSName.DA, "0 0 1 rg /Helv 20 Tf"); + freeTextAnnotation.setIntent("FreeTextCallout"); + COSArray newCallout = new COSArray(); + newCallout.setFloatArray(new float[]{0, ph - 9 * INCH, 3 * INCH, ph - 9 * INCH, 4 * INCH, ph - 8 * INCH}); + // setCallout() missing in 2.0 + freeTextAnnotation.getCOSObject().setItem(COSName.CL, newCallout); + freeTextAnnotation.getCOSObject(); + // setLineEndingStyle() missing in 2.0 + freeTextAnnotation.getCOSObject().setName(COSName.LE, PDAnnotationLine.LE_OPEN_ARROW); + annotations.add(freeTextAnnotation); + + // create a polygon annotation. Yes this is clunky, it will be easier in 3.0 + PDAnnotationMarkup polygon = new PDAnnotationMarkup(); + polygon.getCOSObject().setName(COSName.SUBTYPE, PDAnnotationMarkup.SUB_TYPE_POLYGON); + position = new PDRectangle(); + position.setLowerLeftX(pw - INCH); + position.setLowerLeftY(ph - INCH); + position.setUpperRightX(pw - 2 * INCH); + position.setUpperRightY(ph - 2 * INCH); + polygon.setRectangle(position); + polygon.setColor(blue); // border color + polygon.getCOSObject().setItem(COSName.IC, green.toCOSArray()); // interior color + COSArray verticesArray = new COSArray(); + verticesArray.add(new COSFloat(pw - INCH)); + verticesArray.add(new COSFloat(ph - 2 * INCH)); + verticesArray.add(new COSFloat(pw - INCH * 1.5f)); + verticesArray.add(new COSFloat(ph - INCH)); + verticesArray.add(new COSFloat(pw - 2 * INCH)); + verticesArray.add(new COSFloat(ph - 2 * INCH)); + polygon.getCOSObject().setItem(COSName.VERTICES, verticesArray); + polygon.setBorderStyle(borderThick); + polygon.setContents("Polygon annotation"); + annotations.add(polygon); + + // Create the appearance streams. + // Adobe Reader will always display annotations without appearance streams nicely, + // but other applications may not. + // Pass the PDDocument so that the appearance handler can look into the default resources + // for non-standard fonts. + for (PDAnnotation ann : annotations) + { + ann.constructAppearances(document); + } + + showPageNo(document, page1, "Page 1"); + showPageNo(document, page2, "Page 2"); + showPageNo(document, page3, "Page 3"); + // save the PDF document.save(args[0]); } @@ -212,4 +325,23 @@ public static void main(String[] args) throws IOException document.close(); } } + + private static void showPageNo(PDDocument document, PDPage page, String pageText) + throws IOException + { + int fontSize = 10; + + PDPageContentStream contents = + new PDPageContentStream(document, page, PDPageContentStream.AppendMode.PREPEND, true); + float pageWidth = page.getMediaBox().getWidth(); + float pageHeight = page.getMediaBox().getHeight(); + PDFont font = PDType1Font.HELVETICA; + contents.setFont(font, fontSize); + float textWidth = font.getStringWidth(pageText) / 1000 * fontSize; + contents.beginText(); + contents.newLineAtOffset(pageWidth / 2 - textWidth / 2, pageHeight - INCH / 2); + contents.showText(pageText); + contents.endText(); + contents.close(); + } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java index e6ca5b1f637..9b4446af26e 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.java @@ -58,7 +58,7 @@ public void createPDFFromImage( String inputFile, String imagePath, String outpu // if you already have the image in a BufferedImage, // call LosslessFactory.createFromImage() instead PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc); - PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true); + PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true, true); // contentStream.drawImage(ximage, 20, 20 ); // better method inspired by http://stackoverflow.com/a/22318681/535646 @@ -80,7 +80,7 @@ public void createPDFFromImage( String inputFile, String imagePath, String outpu /** * This will load a PDF document and add a single image on it. - *
+ *
* see usage() for commandline * * @param args Command line arguments. diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMessageToEachPage.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMessageToEachPage.java index 56d3fc79583..434d70be8ee 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMessageToEachPage.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/AddMessageToEachPage.java @@ -16,6 +16,7 @@ */ package org.apache.pdfbox.examples.pdmodel; +import java.awt.Color; import java.io.File; import java.io.IOException; @@ -81,7 +82,7 @@ public void doIt( String file, String message, String outfile ) throws IOExcept // set font and font size contentStream.setFont( font, fontSize ); // set text color to red - contentStream.setNonStrokingColor(255, 0, 0); + contentStream.setNonStrokingColor(Color.red); if (rotate) { // rotate the text according to the page rotation @@ -109,7 +110,7 @@ public void doIt( String file, String message, String outfile ) throws IOExcept /** * This will create a hello world PDF document. - *
+ *
* see usage() for commandline * * @param args Command line arguments. diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateBookmarks.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateBookmarks.java index b214f28eb9d..74610501e99 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateBookmarks.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateBookmarks.java @@ -21,6 +21,8 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PageMode; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitWidthDestination; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; @@ -71,7 +73,10 @@ public static void main( String[] args ) throws IOException for( PDPage page : document.getPages() ) { pageNum++; - PDPageFitWidthDestination dest = new PDPageFitWidthDestination(); + PDPageDestination dest = new PDPageFitWidthDestination(); + // If you want to have several bookmarks pointing to different areas + // on the same page, have a look at the other classes derived from PDPageDestination. + dest.setPage( page ); PDOutlineItem bookmark = new PDOutlineItem(); bookmark.setDestination( dest ); @@ -80,6 +85,9 @@ public static void main( String[] args ) throws IOException } pagesOutline.openNode(); outline.openNode(); + + // optional: show the outlines when opening the file + document.getDocumentCatalog().setPageMode(PageMode.USE_OUTLINES); document.save( args[1] ); } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateGradientShadingPDF.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateGradientShadingPDF.java index 0bef63de143..6d3e0d9332b 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateGradientShadingPDF.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateGradientShadingPDF.java @@ -15,15 +15,17 @@ */ package org.apache.pdfbox.examples.pdmodel; -import java.awt.image.BufferedImage; -import java.io.File; import java.io.IOException; -import javax.imageio.ImageIO; +import java.io.OutputStream; + +import javax.imageio.stream.MemoryCacheImageOutputStream; + import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.function.PDFunctionType2; @@ -33,11 +35,11 @@ import org.apache.pdfbox.pdmodel.graphics.shading.PDShading; import org.apache.pdfbox.pdmodel.graphics.shading.PDShadingType2; import org.apache.pdfbox.pdmodel.graphics.shading.PDShadingType3; -import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.pdmodel.graphics.shading.PDShadingType4; /** - * This example creates a PDF with type 2 (axial) and 3 (radial) shadings with a - * type 2 (exponential) function. + * This example creates a PDF with type 2 (axial) and type 3 (radial) shadings with a type 2 + * (exponential) function, and a type 4 (gouraud triangle shading) without function. * * @author Tilman Hausherr */ @@ -65,16 +67,16 @@ public void create(String file) throws IOException COSDictionary fdict = new COSDictionary(); fdict.setInt(COSName.FUNCTION_TYPE, 2); COSArray domain = new COSArray(); - domain.add(COSInteger.get(0)); - domain.add(COSInteger.get(1)); + domain.add(COSInteger.ZERO); + domain.add(COSInteger.ONE); COSArray c0 = new COSArray(); - c0.add(COSFloat.get("1")); - c0.add(COSFloat.get("0")); - c0.add(COSFloat.get("0")); + c0.add(COSInteger.ONE); + c0.add(COSInteger.ZERO); + c0.add(COSInteger.ZERO); COSArray c1 = new COSArray(); - c1.add(COSFloat.get("0.5")); - c1.add(COSFloat.get("1")); - c1.add(COSFloat.get("0.5")); + c1.add(COSNumber.get("0.5")); + c1.add(COSInteger.ONE); + c1.add(COSNumber.get("0.5")); fdict.setItem(COSName.DOMAIN, domain); fdict.setItem(COSName.C0, c0); fdict.setItem(COSName.C1, c1); @@ -107,21 +109,89 @@ public void create(String file) throws IOException radialShading.setCoords(coords2); radialShading.setFunction(func); + // Gouraud shading + // See PDF 32000 specification, + // 8.7.4.5.5 Type 4 Shadings (Free-Form Gouraud-Shaded Triangle Meshes) + PDShadingType4 gouraudShading = new PDShadingType4(document.getDocument().createCOSStream()); + gouraudShading.setShadingType(PDShading.SHADING_TYPE4); + // we use multiple of 8, so that no padding is needed + gouraudShading.setBitsPerFlag(8); + gouraudShading.setBitsPerCoordinate(16); + gouraudShading.setBitsPerComponent(8); + COSArray decodeArray = new COSArray(); + // coordinates x y map 16 bits 0..FFFF to 0..FFFF to make your life easy + // so no calculation is needed, but you can only use integer coordinates + // for real numbers, you'll need smaller bounds, e.g. 0xFFFF / 0xA = 0x1999 + // would allow 1 point decimal result coordinate. + // See in PDF specification: 8.9.5.2 Decode Arrays + decodeArray.add(COSInteger.ZERO); + decodeArray.add(COSInteger.get(0xFFFF)); + decodeArray.add(COSInteger.ZERO); + decodeArray.add(COSInteger.get(0xFFFF)); + // colors r g b map 8 bits from 0..FF to 0..1 + decodeArray.add(COSInteger.ZERO); + decodeArray.add(COSInteger.ONE); + decodeArray.add(COSInteger.ZERO); + decodeArray.add(COSInteger.ONE); + decodeArray.add(COSInteger.ZERO); + decodeArray.add(COSInteger.ONE); + gouraudShading.setDecodeValues(decodeArray); + gouraudShading.setColorSpace(PDDeviceRGB.INSTANCE); + + // Function is not required for type 4 shadings and not really useful, + // because if a function would be used, each corner "color" of a triangle would be one value, + // which would then transformed into n color components by the function so it is + // difficult to get 3 "extremes". + + // fill the vertex stream + OutputStream os = ((COSStream) gouraudShading.getCOSObject()).createOutputStream(); + MemoryCacheImageOutputStream mcos = new MemoryCacheImageOutputStream(os); + + // Vertex 1, starts with flag1 + // (flags always 0 for vertices of start triangle) + mcos.writeByte(0); + // x1 y1 (left corner) + mcos.writeShort(0); + mcos.writeShort(0); + // r1 g1 b1 (red) + mcos.writeByte(0xFF); + mcos.writeByte(0); + mcos.writeByte(0); + + // Vertex 2, starts with flag2 + mcos.writeByte(0); + // x2 y2 (top corner) + mcos.writeShort(100); + mcos.writeShort(100); + // r2 g2 b2 (green) + mcos.writeByte(0); + mcos.writeByte(0xFF); + mcos.writeByte(0); + + // Vertex 3, starts with flag3 + mcos.writeByte(0); + // x3 y3 (right corner) + mcos.writeShort(200); + mcos.writeShort(0); + // r3 g3 b3 (blue) + mcos.writeByte(0); + mcos.writeByte(0); + mcos.writeByte(0xFF); + + mcos.close(); + // outside stream MUST be closed as well, see javadoc of MemoryCacheImageOutputStream + os.close(); + // invoke shading from content stream // compress parameter is set to false so that you can see the stream in a text editor PDPageContentStream contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, false); contentStream.shadingFill(axialShading); contentStream.shadingFill(radialShading); + contentStream.shadingFill(gouraudShading); contentStream.close(); document.save(file); document.close(); - - // render the PDF and save it into a PNG file - document = PDDocument.load(new File(file)); - BufferedImage bim = new PDFRenderer(document).renderImageWithDPI(0, 300); - ImageIO.write(bim, "png", new File(file + ".png")); - document.close(); } finally { @@ -157,6 +227,6 @@ public static void main(String[] args) throws IOException */ private static void usage() { - System.err.println("usage: java o" + CreateGradientShadingPDF.class.getName() + " "); + System.err.println("usage: java " + CreateGradientShadingPDF.class.getName() + " "); } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePDFA.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePDFA.java index 9a7e3db1e47..ee85bc7d6f1 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePDFA.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePDFA.java @@ -64,6 +64,21 @@ public static void main(String[] args) throws IOException, TransformerException // load the font as this needs to be embedded PDFont font = PDType0Font.load(doc, new File(fontfile)); + + // A PDF/A file needs to have the font embedded if the font is used for text rendering + // in rendering modes other than text rendering mode 3. + // + // This requirement includes the PDF standard fonts, so don't use their static PDFType1Font classes such as + // PDFType1Font.HELVETICA. + // + // As there are many different font licenses it is up to the developer to check if the license terms for the + // font loaded allows embedding in the PDF. + // + if (!font.isEmbedded()) + { + throw new IllegalStateException("PDF/A compliance requires that all fonts used for" + + " text rendering in rendering modes other than rendering mode 3 are embedded."); + } // create a page with the message PDPageContentStream contents = new PDPageContentStream(doc, page); @@ -72,7 +87,6 @@ public static void main(String[] args) throws IOException, TransformerException contents.newLineAtOffset(100, 700); contents.showText(message); contents.endText(); - contents.saveGraphicsState(); contents.close(); // add XMP metadata @@ -103,7 +117,7 @@ public static void main(String[] args) throws IOException, TransformerException // sRGB output intent InputStream colorProfile = CreatePDFA.class.getResourceAsStream( - "/org/apache/pdfbox/resources/pdfa/sRGB Color Space Profile.icm"); + "/org/apache/pdfbox/resources/pdfa/sRGB.icc"); PDOutputIntent intent = new PDOutputIntent(doc, colorProfile); intent.setInfo("sRGB IEC61966-2.1"); intent.setOutputCondition("sRGB IEC61966-2.1"); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePageLabels.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePageLabels.java new file mode 100644 index 00000000000..d1b3c456724 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePageLabels.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.io.IOException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDPageLabelRange; +import org.apache.pdfbox.pdmodel.common.PDPageLabels; + +/** + * Create a 3-page PDF with the page labels "RO III", "RO IV", "1". + * + * @author Tilman Hausherr + */ +public class CreatePageLabels +{ + /** + * Constructor. + */ + private CreatePageLabels() + { + } + + public static void main(String[] args) throws IOException + { + PDDocument doc = new PDDocument(); + doc.addPage(new PDPage()); + doc.addPage(new PDPage()); + doc.addPage(new PDPage()); + PDPageLabels pageLabels = new PDPageLabels(doc); + PDPageLabelRange pageLabelRange1 = new PDPageLabelRange(); + pageLabelRange1.setPrefix("RO "); + pageLabelRange1.setStart(3); + pageLabelRange1.setStyle(PDPageLabelRange.STYLE_ROMAN_UPPER); + pageLabels.setLabelItem(0, pageLabelRange1); + PDPageLabelRange pageLabelRange2 = new PDPageLabelRange(); + pageLabelRange2.setStart(1); + pageLabelRange2.setStyle(PDPageLabelRange.STYLE_DECIMAL); + pageLabels.setLabelItem(2, pageLabelRange2); + doc.getDocumentCatalog().setPageLabels(pageLabels); + doc.save("labels.pdf"); + doc.close(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePatternsPDF.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePatternsPDF.java new file mode 100644 index 00000000000..88f7beee176 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePatternsPDF.java @@ -0,0 +1,132 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.pdmodel; + +import java.awt.Color; +import java.io.IOException; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.graphics.color.PDPattern; +import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; + +/** + * This is an example of how to create a page that uses patterns to paint areas. + * + * @author Tilman Hausherr + */ +public final class CreatePatternsPDF +{ + private CreatePatternsPDF() + { + } + + public static void main(String[] args) throws IOException + { + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + page.setResources(new PDResources()); + + PDPageContentStream pcs = new PDPageContentStream(doc, page); + + // Colored pattern, i.e. the pattern content stream will set its own color(s) + PDColorSpace patternCS1 = new PDPattern(null, PDDeviceRGB.INSTANCE); + + // Table 75 spec + PDTilingPattern tilingPattern1 = new PDTilingPattern(); + tilingPattern1.setBBox(new PDRectangle(0, 0, 10, 10)); + tilingPattern1.setPaintType(PDTilingPattern.PAINT_COLORED); + tilingPattern1.setTilingType(PDTilingPattern.TILING_CONSTANT_SPACING); + tilingPattern1.setXStep(10); + tilingPattern1.setYStep(10); + + COSName patternName1 = page.getResources().add(tilingPattern1); + + PDPageContentStream cs1 = new PDPageContentStream(doc, + tilingPattern1, + tilingPattern1.getContentStream().createOutputStream()); + // Set color, draw diagonal line + 2 more diagonals so that corners look good + cs1.setStrokingColor(Color.red); + cs1.moveTo(0, 0); + cs1.lineTo(10, 10); + cs1.moveTo(-1, 9); + cs1.lineTo(1, 11); + cs1.moveTo(9, -1); + cs1.lineTo(11, 1); + cs1.stroke(); + cs1.close(); + + PDColor patternColor1 = new PDColor(patternName1, patternCS1); + + pcs.addRect(50, 500, 200, 200); + pcs.setNonStrokingColor(patternColor1); + pcs.fill(); + + // Uncolored pattern - the color is passed later + PDTilingPattern tilingPattern2 = new PDTilingPattern(); + tilingPattern2.setBBox(new PDRectangle(0, 0, 10, 10)); + tilingPattern2.setPaintType(PDTilingPattern.PAINT_UNCOLORED); + tilingPattern2.setTilingType(PDTilingPattern.TILING_NO_DISTORTION); + tilingPattern2.setXStep(10); + tilingPattern2.setYStep(10); + + COSName patternName2 = page.getResources().add(tilingPattern2); + + PDPageContentStream cs2 = new PDPageContentStream(doc, + tilingPattern2, + tilingPattern2.getContentStream().createOutputStream()); + // draw a cross + cs2.moveTo(0, 5); + cs2.lineTo(10, 5); + cs2.moveTo(5, 0); + cs2.lineTo(5, 10); + cs2.stroke(); + cs2.close(); + + // Uncolored pattern colorspace needs to know the colorspace + // for the color values that will be passed when painting the fill + PDColorSpace patternCS2 = new PDPattern(null, PDDeviceRGB.INSTANCE); + PDColor patternColor2green = new PDColor( + new float[]{0,1,0}, + patternName2, + patternCS2); + + pcs.addRect(300, 500, 100, 100); + pcs.setNonStrokingColor(patternColor2green); + pcs.fill(); + + // same pattern again but with different color + different pattern start position + PDColor patternColor2blue = new PDColor( + new float[]{0,0,1}, + patternName2, + patternCS2); + pcs.addRect(455, 505, 100, 100); + pcs.setNonStrokingColor(patternColor2blue); + pcs.fill(); + + pcs.close(); + doc.save("patterns.pdf"); + doc.close(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePortableCollection.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePortableCollection.java new file mode 100644 index 00000000000..d25dbb422be --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreatePortableCollection.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; +import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PageMode; +import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; +import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.util.Charsets; + +/** + * This is an example on how to create a portable collection PDF, as described in the PDF 1.7 + * specification in chapter 12.3.5. It uses the COS methods because there are not any PD classes + * yet. If you want to help, we'd need PDCollection, PDCollectionField, PDCollectionSort and + * PDCollectionItem. + * + * @author Tilman Hausherr + */ +public class CreatePortableCollection +{ + + /** + * Constructor. + */ + private CreatePortableCollection() + { + } + + /** + * Create a portable collection PDF with two files. + * + * @param file The file to write the PDF to. + * + * @throws IOException If there is an error writing the data. + */ + public void doIt(String file) throws IOException + { + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + + PDPageContentStream contentStream = new PDPageContentStream(doc, page); + contentStream.beginText(); + contentStream.setFont(PDType1Font.HELVETICA, 12); + contentStream.newLineAtOffset(100, 700); + contentStream.showText("Example of a portable collection"); + contentStream.endText(); + contentStream.close(); + + //embedded files are stored in a named tree + PDEmbeddedFilesNameTreeNode efTree = new PDEmbeddedFilesNameTreeNode(); + + //first create the file specification, which holds the embedded file + PDComplexFileSpecification fs1 = new PDComplexFileSpecification(); + + // use both methods for backwards, cross-platform and cross-language compatibility. + fs1.setFile("Test1.txt"); + fs1.setFileUnicode("Test1.txt"); + + //create a dummy file stream, this would probably normally be a FileInputStream + byte[] data1 = "This is the contents of the first embedded file".getBytes(Charsets.ISO_8859_1); + PDEmbeddedFile ef1 = new PDEmbeddedFile(doc, new ByteArrayInputStream(data1), COSName.FLATE_DECODE); + //now lets some of the optional parameters + ef1.setSubtype("text/plain"); + ef1.setSize(data1.length); + ef1.setCreationDate(new GregorianCalendar()); + + // use both methods for backwards, cross-platform and cross-language compatibility. + fs1.setEmbeddedFile(ef1); + fs1.setEmbeddedFileUnicode(ef1); + fs1.setFileDescription("The first file"); + + //first create the file specification, which holds the embedded file + PDComplexFileSpecification fs2 = new PDComplexFileSpecification(); + + // use both methods for backwards, cross-platform and cross-language compatibility. + fs2.setFile("Test2.txt"); + fs2.setFileUnicode("Test2.txt"); + + //create a dummy file stream, this would probably normally be a FileInputStream + byte[] data2 = "This is the contents of the second embedded file".getBytes(Charsets.ISO_8859_1); + PDEmbeddedFile ef2 = new PDEmbeddedFile(doc, new ByteArrayInputStream(data2), COSName.FLATE_DECODE); + //now lets some of the optional parameters + ef2.setSubtype("text/plain"); + ef2.setSize(data2.length); + ef2.setCreationDate(new GregorianCalendar()); + + // use both methods for backwards, cross-platform and cross-language compatibility. + fs2.setEmbeddedFile(ef2); + fs2.setEmbeddedFileUnicode(ef2); + fs2.setFileDescription("The second file"); + + Map map = new HashMap(); + map.put("Attachment 1", fs1); + map.put("Attachment 2", fs2); + + // create a new tree node and add the embedded file + PDEmbeddedFilesNameTreeNode treeNode = new PDEmbeddedFilesNameTreeNode(); + treeNode.setNames(map); + // add the new node as kid to the root node + List kids = new ArrayList(); + kids.add(treeNode); + efTree.setKids(kids); + + // add the tree to the document catalog + PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog()); + names.setEmbeddedFiles(efTree); + doc.getDocumentCatalog().setNames(names); + + // show attachments panel in some viewers + doc.getDocumentCatalog().setPageMode(PageMode.USE_ATTACHMENTS); + + // create collection directory + COSDictionary collectionDic = new COSDictionary(); + COSDictionary schemaDict = new COSDictionary(); + schemaDict.setItem(COSName.TYPE, COSName.COLLECTION_SCHEMA); + COSDictionary sortDic = new COSDictionary(); + sortDic.setItem(COSName.TYPE, COSName.COLLECTION_SORT); + sortDic.setString(COSName.A, "true"); // sort ascending + // "it identifies a field described in the parent collection dictionary" + // sort by field 2 + sortDic.setItem(COSName.S, COSName.getPDFName("fieldtwo")); + collectionDic.setItem(COSName.TYPE, COSName.COLLECTION); + collectionDic.setItem(COSName.SCHEMA, schemaDict); + collectionDic.setItem(COSName.SORT, sortDic); + collectionDic.setItem(COSName.VIEW, COSName.D); // Details mode + COSDictionary fieldDict1 = new COSDictionary(); + fieldDict1.setItem(COSName.TYPE, COSName.COLLECTION_FIELD); + fieldDict1.setItem(COSName.SUBTYPE, COSName.S); // type: text field + fieldDict1.setString(COSName.N, "field header one (description)"); // header text + fieldDict1.setInt(COSName.O, 1); // order on the screen + COSDictionary fieldDict2 = new COSDictionary(); + fieldDict2.setItem(COSName.TYPE, COSName.COLLECTION_FIELD); + fieldDict2.setItem(COSName.SUBTYPE, COSName.S); // type: text field + fieldDict2.setString(COSName.N, "field header two (name)"); + fieldDict2.setInt(COSName.O, 2); + COSDictionary fieldDict3 = new COSDictionary(); + fieldDict3.setItem(COSName.TYPE, COSName.COLLECTION_FIELD); + fieldDict3.setItem(COSName.SUBTYPE, COSName.N); // type: number field + fieldDict3.setString(COSName.N, "field header three (size)"); + fieldDict3.setInt(COSName.O, 3); + schemaDict.setItem("fieldone", fieldDict1); // field name (this is a key) + schemaDict.setItem("fieldtwo", fieldDict2); + schemaDict.setItem("fieldthree", fieldDict3); + doc.getDocumentCatalog().getCOSObject().setItem(COSName.COLLECTION, collectionDic); + doc.getDocumentCatalog().setVersion("1.7"); + + // collection item dictionary with fields for 1st file + COSDictionary ciDict1 = new COSDictionary(); + ciDict1.setItem(COSName.TYPE, COSName.COLLECTION_ITEM); + // use the field names from earlier + ciDict1.setString("fieldone", fs1.getFileDescription()); + ciDict1.setString("fieldtwo", fs1.getFile()); + ciDict1.setInt("fieldthree", fs1.getEmbeddedFile().getSize()); + fs1.getCOSObject().setItem(COSName.CI, ciDict1); + + // collection item dictionary with fields for 2nd file + COSDictionary ciDict2 = new COSDictionary(); + ciDict2.setItem(COSName.TYPE, COSName.COLLECTION_ITEM); + // use the field names from earlier + ciDict2.setString("fieldone", fs2.getFileDescription()); + ciDict2.setString("fieldtwo", fs2.getFile()); + ciDict2.setInt("fieldthree", fs2.getEmbeddedFile().getSize()); + fs2.getCOSObject().setItem(COSName.CI, ciDict2); + + doc.save(file); + doc.close(); + } + + /** + * This will create a portable collection PDF. + *
+ * see usage() for commandline + * + * @param args Command line arguments. + */ + public static void main(String[] args) throws IOException + { + CreatePortableCollection app = new CreatePortableCollection(); + if (args.length != 1) + { + app.usage(); + } + else + { + app.doIt(args[0]); + } + } + + /** + * This will print out a message telling how to use this example. + */ + private void usage() + { + System.err.println("usage: " + this.getClass().getName() + " "); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateSeparationColorBox.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateSeparationColorBox.java new file mode 100644 index 00000000000..9bcca404dec --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateSeparationColorBox.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.io.IOException; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSInteger; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.function.PDFunctionType2; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDSeparation; + +/** + * This example shows how to use a separation color / spot color. Here it is a placeholder for gold, + * and it is displayed as yellow. You can see the colorspace in PDFDebugger by going to + * "Root/Pages/Kids/[0]/Resources/ColorSpace/cs1". + * + * @author Tilman Hausherr + */ +public class CreateSeparationColorBox +{ + private CreateSeparationColorBox() + { + } + + public static void main(String[] args) throws IOException + { + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + + COSArray separationArray = new COSArray(); + separationArray.add(COSName.SEPARATION); // type + separationArray.add(COSName.getPDFName("Gold")); // the name, e.g. metallic, fluorescent, glitter + separationArray.add(COSName.DEVICERGB); // alternate colorspace + + // tint transform function, results between C0=white (1 1 1) and C1=yellow (1 1 0) + COSDictionary fdict = new COSDictionary(); + fdict.setInt(COSName.FUNCTION_TYPE, 2); + COSArray range = new COSArray(); + range.add(COSInteger.ZERO); + range.add(COSInteger.ONE); + range.add(COSInteger.ZERO); + range.add(COSInteger.ONE); + range.add(COSInteger.ZERO); + range.add(COSInteger.ONE); + fdict.setItem(COSName.RANGE, range); + COSArray domain = new COSArray(); + domain.add(COSInteger.ZERO); + domain.add(COSInteger.ONE); + fdict.setItem(COSName.DOMAIN, domain); + COSArray c0 = new COSArray(); + c0.add(COSInteger.ONE); + c0.add(COSInteger.ONE); + c0.add(COSInteger.ONE); + fdict.setItem(COSName.C0, c0); + COSArray c1 = new COSArray(); + c1.add(COSInteger.ONE); + c1.add(COSInteger.ONE); + c1.add(COSInteger.ZERO); + fdict.setItem(COSName.C1, c1); + fdict.setInt(COSName.N, 1); + PDFunctionType2 func = new PDFunctionType2(fdict); + separationArray.add(func); + + PDColorSpace spotColorSpace = new PDSeparation(separationArray); + + PDPageContentStream cs = new PDPageContentStream(doc, page); + PDColor color = new PDColor(new float[]{0.5f}, spotColorSpace); + cs.setStrokingColor(color); + cs.setLineWidth(10); + cs.addRect(50, 50, 500, 700); + cs.stroke(); + cs.close(); + doc.save("gold.pdf"); + doc.close(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFiles.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFiles.java index b8d5162857c..a5bb418add3 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFiles.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFiles.java @@ -27,11 +27,10 @@ import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; import org.apache.pdfbox.pdmodel.PDPage; - import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; import org.apache.pdfbox.pdmodel.PDPageContentStream; - +import org.apache.pdfbox.pdmodel.PageMode; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; @@ -45,9 +44,8 @@ public class EmbeddedFiles /** * Constructor. */ - public EmbeddedFiles() + private EmbeddedFiles() { - super(); } /** @@ -59,7 +57,6 @@ public EmbeddedFiles() */ public void doIt( String file) throws IOException { - // the document PDDocument doc = null; try { @@ -82,19 +79,26 @@ public void doIt( String file) throws IOException //first create the file specification, which holds the embedded file PDComplexFileSpecification fs = new PDComplexFileSpecification(); + + // use both methods for backwards, cross-platform and cross-language compatibility. fs.setFile( "Test.txt" ); + fs.setFileUnicode("Test.txt"); + //create a dummy file stream, this would probably normally be a FileInputStream byte[] data = "This is the contents of the embedded file".getBytes("ISO-8859-1"); - ByteArrayInputStream fakeFile = - new ByteArrayInputStream( data ); + ByteArrayInputStream fakeFile = new ByteArrayInputStream(data); PDEmbeddedFile ef = new PDEmbeddedFile(doc, fakeFile ); //now lets some of the optional parameters - ef.setSubtype( "test/plain" ); + ef.setSubtype( "text/plain" ); ef.setSize( data.length ); ef.setCreationDate( new GregorianCalendar() ); + + // use both methods for backwards, cross-platform and cross-language compatibility. fs.setEmbeddedFile( ef ); + fs.setEmbeddedFileUnicode(ef); + fs.setFileDescription("Very interesting file"); - // create a new tree node and add the embedded file + // create a new tree node and add the embedded file PDEmbeddedFilesNameTreeNode treeNode = new PDEmbeddedFilesNameTreeNode(); treeNode.setNames( Collections.singletonMap( "My first attachment", fs ) ); // add the new node as kid to the root node @@ -106,7 +110,8 @@ public void doIt( String file) throws IOException names.setEmbeddedFiles( efTree ); doc.getDocumentCatalog().setNames( names ); - + // show attachments panel in some viewers + doc.getDocumentCatalog().setPageMode(PageMode.USE_ATTACHMENTS); doc.save( file ); } finally @@ -120,7 +125,7 @@ public void doIt( String file) throws IOException /** * This will create a hello world PDF document with an embedded file. - *
+ *
* see usage() for commandline * * @param args Command line arguments. diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFonts.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFonts.java index b1e9c62ccbd..aa6220bf084 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFonts.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedFonts.java @@ -51,7 +51,7 @@ public static void main(String[] args) throws IOException stream.beginText(); stream.setFont(font, 12); - stream.setLeading(12 * 1.2); + stream.setLeading(12 * 1.2f); stream.newLineAtOffset(50, 600); stream.showText("PDFBox's Unicode with Embedded TrueType Font"); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedMultipleFonts.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedMultipleFonts.java new file mode 100644 index 00000000000..6ba7c08209b --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedMultipleFonts.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.fontbox.ttf.TrueTypeCollection; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.encoding.GlyphList; +import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; + +/** + * Output a text without knowing which font is the right one. One use case is a worldwide address + * list. Only LTR languages are supported, RTL (e.g. Hebrew, Arabic) are not supported so they would + * appear in the wrong direction. Complex scripts (Thai, Arabic, some Indian languages) are also not + * supported, any output will look weird. There is an (unfinished) effort here: + * https://issues.apache.org/jira/browse/PDFBOX-4189 + * + * @author Tilman Hausherr + */ +public class EmbeddedMultipleFonts +{ + private EmbeddedMultipleFonts() + { + } + + public static void main(String[] args) throws IOException + { + PDDocument document = new PDDocument(); + + PDPage page = new PDPage(PDRectangle.A4); + document.addPage(page); + + PDFont font1 = PDType1Font.HELVETICA; // always have a simple font as first one + TrueTypeCollection ttc2 = new TrueTypeCollection(new File("c:/windows/fonts/batang.ttc")); + PDType0Font font2 = PDType0Font.load(document, ttc2.getFontByName("Batang"), true); // Korean + TrueTypeCollection ttc3 = new TrueTypeCollection(new File("c:/windows/fonts/mingliu.ttc")); + PDType0Font font3 = PDType0Font.load(document, ttc3.getFontByName("MingLiU"), true); // Chinese + PDType0Font font4 = PDType0Font.load(document, new File("c:/windows/fonts/mangal.ttf")); // Indian + PDType0Font font5 = PDType0Font.load(document, new File("c:/windows/fonts/ArialUni.ttf")); // Fallback + + PDPageContentStream cs = new PDPageContentStream(document, page); + + cs.beginText(); + List fonts = new ArrayList(); + fonts.add(font1); + fonts.add(font2); + fonts.add(font3); + fonts.add(font4); + fonts.add(font5); + cs.newLineAtOffset(20, 700); + showTextMultiple(cs, "abc 한국 中国 भारत 日本 abc", fonts, 20); + cs.endText(); + cs.close(); + + document.save("example.pdf"); + document.close(); + + ttc2.close(); + ttc3.close(); + } + + static void showTextMultiple(PDPageContentStream cs, String text, List fonts, float size) + throws IOException + { + try + { + // first try all at once + fonts.get(0).encode(text); + cs.setFont(fonts.get(0), size); + cs.showText(text); + return; + } + catch (IllegalArgumentException ex) + { + // do nothing + } + // now try separately + int i = 0; + while (i < text.length()) + { + boolean found = false; + for (PDFont font : fonts) + { + try + { + String s = text.substring(i, i + 1); + font.encode(s); + // it works! Try more with this font + int j = i + 1; + for (; j < text.length(); ++j) + { + String s2 = text.substring(j, j + 1); + + if (isWinAnsiEncoding(s2.codePointAt(0)) && font != fonts.get(0)) + { + // Without this segment, the example would have a flaw: + // This code tries to keep the current font, so + // the second "abc" would appear in a different font + // than the first one, which would be weird. + // This segment assumes that the first font has WinAnsiEncoding. + // (all static PDType1Font Times / Helvetica / Courier fonts) + break; + } + try + { + font.encode(s2); + } + catch (IllegalArgumentException ex) + { + // it's over + break; + } + } + s = text.substring(i, j); + cs.setFont(font, size); + cs.showText(s); + i = j; + found = true; + break; + } + catch (IllegalArgumentException ex) + { + // didn't work, will try next font + } + } + if (!found) + { + throw new IllegalArgumentException("Could not show '" + text.substring(i, i + 1) + + "' with the fonts provided"); + } + } + } + + static boolean isWinAnsiEncoding(int unicode) + { + String name = GlyphList.getAdobeGlyphList().codePointToName(unicode); + if (".notdef".equals(name)) + { + return false; + } + return WinAnsiEncoding.INSTANCE.contains(name); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedVerticalFonts.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedVerticalFonts.java new file mode 100644 index 00000000000..a0b35a68c0f --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/EmbeddedVerticalFonts.java @@ -0,0 +1,100 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.io.File; +import java.io.IOException; +import org.apache.fontbox.ttf.TTFParser; +import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.PDType0Font; + +/** + * + * @author Aaron Madlon-Kay + */ +public class EmbeddedVerticalFonts +{ + private EmbeddedVerticalFonts() + { + } + + public static void main(String[] args) throws IOException + { + PDDocument document = new PDDocument(); + PDPage page = new PDPage(); + document.addPage(page); + + // The actual font file + // Download: https://ipafont.ipa.go.jp/ipafont/ipag00303.zip + // (free license: https://www.gnu.org/licenses/license-list.html#IPAFONT) + File ipafont = new File("ipag.ttf"); + + // You can also use a Windows 7 TrueType font collection, e.g. MingLiU: + // TrueTypeFont ttf = new TrueTypeCollection(new File("C:/windows/fonts/mingliu.ttc")).getFontByName("MingLiU") + // PDType0Font.loadVertical(document, ttf, true) + + // Load as horizontal + PDType0Font hfont = PDType0Font.load(document, ipafont); + + // Load as vertical + PDType0Font vfont = PDType0Font.loadVertical(document, ipafont); + + // Load as vertical, but disable vertical glyph substitution + // (You will usually not want this because it doesn't look good!) + TrueTypeFont ttf = new TTFParser().parse(ipafont); + PDType0Font vfont2 = PDType0Font.loadVertical(document, ttf, true); + ttf.disableGsubFeature("vrt2"); + ttf.disableGsubFeature("vert"); + + PDPageContentStream contentStream = new PDPageContentStream(document, page); + contentStream.beginText(); + contentStream.setFont(hfont, 20); + contentStream.setLeading(25); + contentStream.newLineAtOffset(20, 300); + contentStream.showText("Key:"); + contentStream.newLine(); + contentStream.showText("① Horizontal"); + contentStream.newLine(); + contentStream.showText("② Vertical with substitution"); + contentStream.newLine(); + contentStream.showText("③ Vertical without substitution"); + contentStream.endText(); + + contentStream.beginText(); + contentStream.setFont(hfont, 20); + contentStream.newLineAtOffset(20, 650); + contentStream.showText("①「あーだこーだ」"); + contentStream.endText(); + + contentStream.beginText(); + contentStream.setFont(vfont, 20); + contentStream.newLineAtOffset(50, 600); + contentStream.showText("②「あーだこーだ」"); + contentStream.endText(); + + contentStream.beginText(); + contentStream.setFont(vfont2, 20); + contentStream.newLineAtOffset(100, 600); + contentStream.showText("③「あーだこーだ」"); + contentStream.endText(); + contentStream.close(); + // result file should look like the one attached to JIRA issue PDFBOX-4106 + document.save("vertical.pdf"); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java index bf24ee508ed..e3db313f8a5 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java @@ -57,70 +57,86 @@ public static void main( String[] args ) throws IOException usage(); System.exit(1); } - else + + PDDocument document = null; + try { - PDDocument document = null; - try + File pdfFile = new File(args[0]); + String filePath = pdfFile.getParent() + System.getProperty("file.separator"); + document = PDDocument.load(pdfFile ); + PDDocumentNameDictionary namesDictionary = + new PDDocumentNameDictionary( document.getDocumentCatalog() ); + PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); + if (efTree != null) { - File pdfFile = new File(args[0]); - String filePath = pdfFile.getParent() + System.getProperty("file.separator"); - document = PDDocument.load(pdfFile ); - PDDocumentNameDictionary namesDictionary = - new PDDocumentNameDictionary( document.getDocumentCatalog() ); - PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); - if (efTree != null) - { - Map names = efTree.getNames(); - if (names != null) - { - extractFiles(names, filePath); - } - else - { - List> kids = efTree.getKids(); - for (PDNameTreeNode node : kids) - { - names = node.getNames(); - extractFiles(names, filePath); - } - } - } - - // extract files from annotations - for (PDPage page : document.getPages()) - { - for (PDAnnotation annotation : page.getAnnotations()) - { - if (annotation instanceof PDAnnotationFileAttachment) - { - PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation; - PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment.getFile(); - PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec); - extractFile(filePath, fileSpec.getFilename(), embeddedFile); - } - } - } - + extractFilesFromEFTree(efTree, filePath); } - finally + + // extract files from page annotations + for (PDPage page : document.getPages()) { - if( document != null ) + extractFilesFromPage(page, filePath); + } + } + finally + { + if( document != null ) + { + document.close(); + } + } + + } + + private static void extractFilesFromPage(PDPage page, String filePath) throws IOException + { + for (PDAnnotation annotation : page.getAnnotations()) + { + if (annotation instanceof PDAnnotationFileAttachment) + { + PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation; + PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment.getFile(); + PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec); + if (embeddedFile != null) { - document.close(); + extractFile(filePath, fileSpec.getFilename(), embeddedFile); } } } } + private static void extractFilesFromEFTree(PDNameTreeNode efTree, String filePath) throws IOException + { + Map names = efTree.getNames(); + if (names != null) + { + extractFiles(names, filePath); + } + else + { + List> kids = efTree.getKids(); + if (kids == null) + { + return; + } + for (PDNameTreeNode node : kids) + { + extractFilesFromEFTree(node, filePath); + } + } + } + private static void extractFiles(Map names, String filePath) throws IOException { for (Entry entry : names.entrySet()) { - String filename = entry.getKey(); PDComplexFileSpecification fileSpec = entry.getValue(); PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec); - extractFile(filePath, filename, embeddedFile); + if (embeddedFile != null) + { + extractFile(filePath, fileSpec.getFilename(), embeddedFile); + } } } @@ -128,7 +144,14 @@ private static void extractFile(String filePath, String filename, PDEmbeddedFile throws IOException { String embeddedFilename = filePath + filename; - File file = new File(filePath + filename); + File file = new File(embeddedFilename); + File parentDir = file.getParentFile(); + if (!parentDir.exists()) + { + // sometimes paths contain a directory + System.out.println("Creating " + parentDir); + parentDir.mkdirs(); + } System.out.println("Writing " + embeddedFilename); FileOutputStream fos = null; try diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java index 4fb104cb0a4..20cf8fadbe5 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.text.DateFormat; import java.util.Calendar; -import java.util.Iterator; import java.util.List; import org.apache.xmpbox.XMPMetadata; @@ -74,37 +73,15 @@ public static void main(String[] args) throws IOException, XmpParsingException DomXmpParser xmpParser = new DomXmpParser(); try { - XMPMetadata metadata = xmpParser.parse(meta.createInputStream()); - - DublinCoreSchema dc = metadata.getDublinCoreSchema(); - if (dc != null) - { - display("Title:", dc.getTitle()); - display("Description:", dc.getDescription()); - listString("Creators: ", dc.getCreators()); - listCalendar("Dates:", dc.getDates()); - listString("Subjects:", dc.getSubjects()); - } - - AdobePDFSchema pdf = metadata.getAdobePDFSchema(); - if (pdf != null) - { - display("Keywords:", pdf.getKeywords()); - display("PDF Version:", pdf.getPDFVersion()); - display("PDF Producer:", pdf.getProducer()); - } - - XMPBasicSchema basic = metadata.getXMPBasicSchema(); - if (basic != null) - { - display("Create Date:", basic.getCreateDate()); - display("Modify Date:", basic.getModifyDate()); - display("Creator Tool:", basic.getCreatorTool()); - } + XMPMetadata metadata = xmpParser.parse(meta.toByteArray()); + + showDublinCoreSchema(metadata); + showAdobePDFSchema(metadata); + showXMPBasicSchema(metadata); } catch (XmpParsingException e) { - System.err.println("An error ouccred when parsing the meta data: " + System.err.println("An error occurred when parsing the metadata: " + e.getMessage()); } } @@ -130,6 +107,41 @@ public static void main(String[] args) throws IOException, XmpParsingException } } + private static void showXMPBasicSchema(XMPMetadata metadata) + { + XMPBasicSchema basic = metadata.getXMPBasicSchema(); + if (basic != null) + { + display("Create Date:", basic.getCreateDate()); + display("Modify Date:", basic.getModifyDate()); + display("Creator Tool:", basic.getCreatorTool()); + } + } + + private static void showAdobePDFSchema(XMPMetadata metadata) + { + AdobePDFSchema pdf = metadata.getAdobePDFSchema(); + if (pdf != null) + { + display("Keywords:", pdf.getKeywords()); + display("PDF Version:", pdf.getPDFVersion()); + display("PDF Producer:", pdf.getProducer()); + } + } + + private static void showDublinCoreSchema(XMPMetadata metadata) + { + DublinCoreSchema dc = metadata.getDublinCoreSchema(); + if (dc != null) + { + display("Title:", dc.getTitle()); + display("Description:", dc.getDescription()); + listString("Creators: ", dc.getCreators()); + listCalendar("Dates:", dc.getDates()); + listString("Subjects:", dc.getSubjects()); + } + } + private static void showDocumentInformation(PDDocumentInformation information) { display("Title:", information.getTitle()); @@ -146,10 +158,8 @@ private static void listString(String title, List list) return; } System.out.println(title); - Iterator iter = list.iterator(); - while (iter.hasNext()) + for (String string : list) { - String string = iter.next(); System.out.println(" " + string); } } @@ -161,10 +171,8 @@ private static void listCalendar(String title, List list) return; } System.out.println(title); - Iterator iter = list.iterator(); - while (iter.hasNext()) + for (Calendar calendar : list) { - Calendar calendar = iter.next(); System.out.println(" " + format(calendar)); } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java index 27ba9bbfeb4..2c92cd940ff 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java @@ -43,6 +43,7 @@ public final class ExtractTTFFonts { private int fontCounter = 1; + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; private static final String PREFIX = "-prefix"; private static final String ADDKEY = "-addkey"; @@ -73,6 +74,7 @@ private void extractFonts(String[] args) throws IOException else { String pdfFile = null; + @SuppressWarnings({"squid:S2068"}) String password = ""; String prefix = null; boolean addKey = false; diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/HelloWorldType1.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/HelloWorldType1.java index c2ba427ba73..bf9837968b8 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/HelloWorldType1.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/HelloWorldType1.java @@ -19,6 +19,7 @@ import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; @@ -54,7 +55,9 @@ public static void main(String[] args) throws IOException PDPage page = new PDPage(); doc.addPage(page); - PDFont font = new PDType1Font(doc, new FileInputStream(pfbPath)); + InputStream is = new FileInputStream(pfbPath); + PDFont font = new PDType1Font(doc, is); + is.close(); PDPageContentStream contents = new PDPageContentStream(doc, page); contents.beginText(); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/LightlyPdfToImage.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/LightlyPdfToImage.java new file mode 100644 index 00000000000..45e198c9fe8 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/LightlyPdfToImage.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import org.apache.pdfbox.io.MemoryUsageSetting; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.tools.imageio.ImageIOUtil; + +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; + +/** + * Convert PDF documents to images. + * + *

+ * The example is very important to prevent OOM when parsing complex PDF files. + *

+ *     PDDocument.load(in, MemoryUsageSetting.setupTempFileOnly())
+ *     renderer.setSubsamplingAllowed(true)
+ * 
+ * + * @author lanshiqin + */ +public final class LightlyPdfToImage { + + private static final int DPI = 100; + private static final String FILE_SUFFIX = ".pdf"; + private static final int ARGS_LENGTH = 2; + + private LightlyPdfToImage() { + } + + public static void main(String[] args) throws IOException { + if (args.length != ARGS_LENGTH) { + System.err.println("usage: " + LightlyPdfToImage.class.getName() + " "); + System.exit(1); + } + + String pdfPath = args[0]; + String outputPath = args[1]; + + if (!pdfPath.endsWith(FILE_SUFFIX)) { + System.err.println("Last argument must be the destination .pdf file"); + System.exit(1); + } + + InputStream in = new URL("file:///" + pdfPath).openStream(); + // Load document with temp file only + // This is very important to prevent OOM when parsing complex PDF files + PDDocument document = PDDocument.load(in, MemoryUsageSetting.setupTempFileOnly()); + try { + // no use resource cache, Preventing large objects + document.setResourceCache(null); + PDFRenderer renderer = new PDFRenderer(document); + // Indicates that the renderer is allowed to sub sample the image before drawing. + // This is very important to prevent OOM when parsing complex PDF files + renderer.setSubsamplingAllowed(true); + for (int i = 0; i < document.getNumberOfPages(); i++) { + BufferedImage bufferedImage = renderer.renderImageWithDPI(i, DPI); + try { + ImageIOUtil.writeImage(bufferedImage, outputPath + i + ".png", DPI, -1); + } finally { + bufferedImage.getGraphics().dispose(); + } + } + } finally { + document.close(); + in.close(); + } + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintBookmarks.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintBookmarks.java index e7c6509449b..e57ff14fd06 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintBookmarks.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintBookmarks.java @@ -16,14 +16,17 @@ */ package org.apache.pdfbox.examples.pdmodel; +import java.io.File; +import java.io.IOException; + import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDNamedDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode; -import java.io.File; -import java.io.IOException; - /** * This is an example on how to access the bookmarks that are part of a pdf document. * @@ -55,7 +58,7 @@ public static void main( String[] args ) throws IOException PDDocumentOutline outline = document.getDocumentCatalog().getDocumentOutline(); if( outline != null ) { - meta.printBookmark( outline, "" ); + meta.printBookmark(document, outline, ""); } else { @@ -83,20 +86,68 @@ private static void usage() /** * This will print the documents bookmarks to System.out. * + * @param document The document. * @param bookmark The bookmark to print out. * @param indentation A pretty printing parameter * * @throws IOException If there is an error getting the page count. */ - public void printBookmark( PDOutlineNode bookmark, String indentation ) throws IOException + public void printBookmark(PDDocument document, PDOutlineNode bookmark, String indentation) throws IOException { PDOutlineItem current = bookmark.getFirstChild(); while( current != null ) { + // one could also use current.findDestinationPage(document) to get the page number, + // but this example does it the hard way to explain the different types + // Note that bookmarks can also do completely different things, e.g. link to a website, + // or to an external file. This example focuses on internal pages. + + if (current.getDestination() instanceof PDPageDestination) + { + PDPageDestination pd = (PDPageDestination) current.getDestination(); + System.out.println(indentation + "Destination page: " + (pd.retrievePageNumber() + 1)); + } + else if (current.getDestination() instanceof PDNamedDestination) + { + PDPageDestination pd = document.getDocumentCatalog().findNamedDestinationPage((PDNamedDestination) current.getDestination()); + if (pd != null) + { + System.out.println(indentation + "Destination page: " + (pd.retrievePageNumber() + 1)); + } + } + else if (current.getDestination() != null) + { + System.out.println(indentation + "Destination class: " + current.getDestination().getClass().getSimpleName()); + } + + if (current.getAction() instanceof PDActionGoTo) + { + PDActionGoTo gta = (PDActionGoTo) current.getAction(); + if (gta.getDestination() instanceof PDPageDestination) + { + PDPageDestination pd = (PDPageDestination) gta.getDestination(); + System.out.println(indentation + "Destination page: " + (pd.retrievePageNumber() + 1)); + } + else if (gta.getDestination() instanceof PDNamedDestination) + { + PDPageDestination pd = document.getDocumentCatalog().findNamedDestinationPage((PDNamedDestination) gta.getDestination()); + if (pd != null) + { + System.out.println(indentation + "Destination page: " + (pd.retrievePageNumber() + 1)); + } + } + else + { + System.out.println(indentation + "Destination class: " + gta.getDestination().getClass().getSimpleName()); + } + } + else if (current.getAction() != null) + { + System.out.println(indentation + "Action class: " + current.getAction().getClass().getSimpleName()); + } System.out.println( indentation + current.getTitle() ); - printBookmark( current, indentation + " " ); + printBookmark( document, current, indentation + " " ); current = current.getNextSibling(); } - } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintURLs.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintURLs.java index a245cc8fc40..73aa6222122 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintURLs.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintURLs.java @@ -19,6 +19,8 @@ import java.awt.geom.Rectangle2D; import java.io.File; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.List; import org.apache.pdfbox.pdmodel.PDDocument; @@ -27,7 +29,6 @@ import org.apache.pdfbox.pdmodel.interactive.action.PDAction; import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; import org.apache.pdfbox.text.PDFTextStripperByArea; @@ -47,8 +48,8 @@ private PrintURLs() } /** - * This will create a hello world PDF document. - *
+ * This will output all URLs and the texts in the annotation rectangle of a document. + *
* see usage() for commandline * * @param args Command line arguments. @@ -77,10 +78,10 @@ public static void main(String[] args) throws IOException for( int j=0; j + *
* see usage() for commandline * * @param args Command line arguments. diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/RubberStampWithImage.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/RubberStampWithImage.java index 75a6159ca21..dc26c7c91f4 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/RubberStampWithImage.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/RubberStampWithImage.java @@ -37,7 +37,8 @@ /** - * This is an example on how to add a rubber stamp with an image to pages of a PDF document. + * This is an example on how to add a rubber stamp annotation with an image to pages of a PDF + * document. To add a normal image, use the AddImageToPDF.java example. */ public class RubberStampWithImage { @@ -47,7 +48,7 @@ public class RubberStampWithImage private static final String XOBJECT_DO = "Do\n"; private static final String SPACE = " "; - private static final NumberFormat formatDecimal = NumberFormat.getNumberInstance( Locale.US ); + private static final NumberFormat FORMATDECIMAL = NumberFormat.getNumberInstance( Locale.US ); /** * Add a rubber stamp with an jpg image to every page of the given document. @@ -86,12 +87,12 @@ public void doIt( String[] args ) throws IOException PDImageXObject ximage = PDImageXObject.createFromFile(args[2], document); // define and set the target rectangle - int lowerLeftX = 250; - int lowerLeftY = 550; - int formWidth = 150; - int formHeight = 25; - int imgWidth = 50; - int imgHeight = 25; + float lowerLeftX = 250; + float lowerLeftY = 550; + float formWidth = 150; + float formHeight = 25; + float imgWidth = 50; + float imgHeight = 25; PDRectangle rect = new PDRectangle(); rect.setLowerLeftX(lowerLeftX); @@ -139,17 +140,17 @@ private void drawXObject( PDImageXObject xobject, PDResources resources, OutputS COSName xObjectId = resources.add(xobject); appendRawCommands( os, SAVE_GRAPHICS_STATE ); - appendRawCommands( os, formatDecimal.format( width ) ); + appendRawCommands( os, FORMATDECIMAL.format( width ) ); appendRawCommands( os, SPACE ); - appendRawCommands( os, formatDecimal.format( 0 ) ); + appendRawCommands( os, FORMATDECIMAL.format( 0 ) ); appendRawCommands( os, SPACE ); - appendRawCommands( os, formatDecimal.format( 0 ) ); + appendRawCommands( os, FORMATDECIMAL.format( 0 ) ); appendRawCommands( os, SPACE ); - appendRawCommands( os, formatDecimal.format( height ) ); + appendRawCommands( os, FORMATDECIMAL.format( height ) ); appendRawCommands( os, SPACE ); - appendRawCommands( os, formatDecimal.format( x ) ); + appendRawCommands( os, FORMATDECIMAL.format( x ) ); appendRawCommands( os, SPACE ); - appendRawCommands( os, formatDecimal.format( y ) ); + appendRawCommands( os, FORMATDECIMAL.format( y ) ); appendRawCommands( os, SPACE ); appendRawCommands( os, CONCATENATE_MATRIX ); appendRawCommands( os, SPACE ); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ShowColorBoxes.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ShowColorBoxes.java index edd91e8bf1f..c6e0866ec4d 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ShowColorBoxes.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ShowColorBoxes.java @@ -21,6 +21,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.util.Matrix; /** * Creates a simple document. The example is taken from the pdf file format specification. @@ -60,6 +61,15 @@ public static void main(String[] args) throws IOException contents.addRect(10, 10, 100, 100); contents.fill(); + // draw a blue box with rect x=200, y=500, w=200, h=100 + // 105° rotation is around the bottom left corner + contents.saveGraphicsState(); + contents.setNonStrokingColor(Color.BLUE); + contents.transform(Matrix.getRotateInstance(Math.toRadians(105), 200, 500)); + contents.addRect(0, 0, 200, 100); + contents.fill(); + contents.restoreGraphicsState(); + contents.close(); doc.save(filename); } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ShowTextWithPositioning.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ShowTextWithPositioning.java new file mode 100644 index 00000000000..810ec5a477d --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ShowTextWithPositioning.java @@ -0,0 +1,168 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont; +import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; +import org.apache.pdfbox.util.Matrix; + +/** + * This example shows how to justify a string using the showTextWithPositioning method. First only + * spaces are adjusted, and then every letter. + * + * @author Dan Fickling + */ +public class ShowTextWithPositioning +{ + private static final float FONT_SIZE = 20.0f; + + private ShowTextWithPositioning() + { + } + + public static void main(String[] args) throws IOException + { + doIt("Hello World, this is a test!", "justify-example.pdf"); + } + + public static void doIt(String message, String outfile) throws IOException + { + // the document + PDDocument doc = new PDDocument(); + InputStream is = PDDocument.class.getResourceAsStream("/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"); + + // Page 1 + PDFont font = PDType0Font.load(doc, is, true); + PDPage page = new PDPage(PDRectangle.A4); + doc.addPage(page); + + // Get the non-justified string width in text space units. + float stringWidth = font.getStringWidth(message) * FONT_SIZE; + + // Get the string height in text space units. + float stringHeight = font.getFontDescriptor().getFontBoundingBox().getHeight() * FONT_SIZE; + + // Get the width we have to justify in. + PDRectangle pageSize = page.getMediaBox(); + + PDPageContentStream contentStream = new PDPageContentStream(doc, + page, AppendMode.OVERWRITE, false); + + contentStream.beginText(); + contentStream.setFont(font, FONT_SIZE); + + // Start at top of page. + contentStream.setTextMatrix( + Matrix.getTranslateInstance(0, pageSize.getHeight() - stringHeight / 1000f)); + + // First show non-justified. + contentStream.showText(message); + + // Move to next line. + contentStream.setTextMatrix( + Matrix.getTranslateInstance(0, pageSize.getHeight() - stringHeight / 1000f * 2)); + + // Now show word justified. + // The space we have to make up, in text space units. + float justifyWidth = pageSize.getWidth() * 1000f - stringWidth; + + List text = new ArrayList(); + String[] parts = message.split("\\s"); + + float spaceWidth = (justifyWidth / (parts.length - 1)) / FONT_SIZE; + + for (int i = 0; i < parts.length; i++) + { + if (i != 0) + { + text.add(" "); + // Positive values move to the left, negative to the right. + text.add(-spaceWidth); + } + text.add(parts[i]); + } + contentStream.showTextWithPositioning(text.toArray()); + contentStream.setTextMatrix(Matrix.getTranslateInstance(0, pageSize.getHeight() - stringHeight / 1000f * 3)); + + // Now show letter justified. + text = new ArrayList(); + justifyWidth = pageSize.getWidth() * 1000f - stringWidth; + float extraLetterWidth = (justifyWidth / (message.codePointCount(0, message.length()) - 1)) / FONT_SIZE; + + for (int i = 0; i < message.length(); i += Character.charCount(message.codePointAt(i))) + { + if (i != 0) + { + text.add(-extraLetterWidth); + } + + text.add(String.valueOf(Character.toChars(message.codePointAt(i)))); + } + contentStream.showTextWithPositioning(text.toArray()); + + // PDF specification about word spacing: + // "Word spacing shall be applied to every occurrence of the single-byte character + // code 32 in a string when using a simple font or a composite font that defines + // code 32 as a single-byte code. It shall not apply to occurrences of the byte + // value 32 in multiple-byte codes. + // TrueType font with no word spacing + contentStream.setTextMatrix( + Matrix.getTranslateInstance(0, pageSize.getHeight() - stringHeight / 1000f * 4)); + font = PDTrueTypeFont.load(doc, PDDocument.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"), WinAnsiEncoding.INSTANCE); + contentStream.setFont(font, FONT_SIZE); + contentStream.showText(message); + + float wordSpacing = (pageSize.getWidth() * 1000f - stringWidth) / (parts.length - 1) / 1000; + + // TrueType font with word spacing + contentStream.setTextMatrix( + Matrix.getTranslateInstance(0, pageSize.getHeight() - stringHeight / 1000f * 5)); + font = PDTrueTypeFont.load(doc, PDDocument.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"), WinAnsiEncoding.INSTANCE); + contentStream.setFont(font, FONT_SIZE); + contentStream.setWordSpacing(wordSpacing); + contentStream.showText(message); + + // Type0 font with word spacing that has no effect + contentStream.setTextMatrix( + Matrix.getTranslateInstance(0, pageSize.getHeight() - stringHeight / 1000f * 6)); + font = PDType0Font.load(doc, PDDocument.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf")); + contentStream.setFont(font, FONT_SIZE); + contentStream.setWordSpacing(wordSpacing); + contentStream.showText(message); + + // Finish up. + contentStream.endText(); + contentStream.close(); + + doc.save(outfile); + doc.close(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/UsingTextMatrix.java b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/UsingTextMatrix.java index f90f92c03a7..10133f7daaa 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/UsingTextMatrix.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/UsingTextMatrix.java @@ -37,7 +37,6 @@ public class UsingTextMatrix */ public UsingTextMatrix() { - super(); } /** @@ -100,7 +99,8 @@ public void doIt( String message, String outfile ) throws IOException // text scaling and translation for (int i=0;i<10;i++) { - contentStream.setTextMatrix(new Matrix(12 + (i * 6), 0, 0, 12+(i*6), 100, 100+i*50)); + contentStream.setTextMatrix(new Matrix(12f + (i * 6), 0, 0, 12f + (i * 6), + 100, 100f + i * 50)); contentStream.showText(message + " " + i); } contentStream.endText(); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/package.html b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/package.html index 674bb7c84be..2423e3f422a 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/package.html +++ b/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/examples/src/main/java/org/apache/pdfbox/examples/printing/Printing.java b/examples/src/main/java/org/apache/pdfbox/examples/printing/Printing.java index d82382a0138..14df38d300a 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/printing/Printing.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/printing/Printing.java @@ -27,7 +27,9 @@ import javax.print.attribute.HashPrintRequestAttributeSet; import javax.print.attribute.PrintRequestAttributeSet; import javax.print.attribute.standard.PageRanges; +import javax.print.attribute.standard.Sides; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences; import org.apache.pdfbox.printing.PDFPageable; import org.apache.pdfbox.printing.PDFPrintable; @@ -43,7 +45,7 @@ private Printing() /** * Entry point. */ - public static void main(String args[]) throws PrinterException, IOException + public static void main(String[] args) throws PrinterException, IOException { if (args.length != 1) { @@ -60,12 +62,13 @@ public static void main(String args[]) throws PrinterException, IOException //printWithDialog(document); //printWithDialogAndAttributes(document); //printWithPaper(document); + document.close(); } /** * Prints the document at its actual size. This is the recommended way to print. */ - private static void print(PDDocument document) throws IOException, PrinterException + private static void print(PDDocument document) throws PrinterException { PrinterJob job = PrinterJob.getPrinterJob(); job.setPageable(new PDFPageable(document)); @@ -75,8 +78,7 @@ private static void print(PDDocument document) throws IOException, PrinterExcept /** * Prints using custom PrintRequestAttribute values. */ - private static void printWithAttributes(PDDocument document) - throws IOException, PrinterException + private static void printWithAttributes(PDDocument document) throws PrinterException { PrinterJob job = PrinterJob.getPrinterJob(); job.setPageable(new PDFPageable(document)); @@ -90,7 +92,7 @@ private static void printWithAttributes(PDDocument document) /** * Prints with a print preview dialog. */ - private static void printWithDialog(PDDocument document) throws IOException, PrinterException + private static void printWithDialog(PDDocument document) throws PrinterException { PrinterJob job = PrinterJob.getPrinterJob(); job.setPageable(new PDFPageable(document)); @@ -104,14 +106,31 @@ private static void printWithDialog(PDDocument document) throws IOException, Pri /** * Prints with a print preview dialog and custom PrintRequestAttribute values. */ - private static void printWithDialogAndAttributes(PDDocument document) - throws IOException, PrinterException + private static void printWithDialogAndAttributes(PDDocument document) throws PrinterException { PrinterJob job = PrinterJob.getPrinterJob(); job.setPageable(new PDFPageable(document)); PrintRequestAttributeSet attr = new HashPrintRequestAttributeSet(); attr.add(new PageRanges(1, 1)); // pages 1 to 1 + + PDViewerPreferences vp = document.getDocumentCatalog().getViewerPreferences(); + if (vp != null && vp.getDuplex() != null) + { + String dp = vp.getDuplex(); + if (PDViewerPreferences.DUPLEX.DuplexFlipLongEdge.toString().equals(dp)) + { + attr.add(Sides.TWO_SIDED_LONG_EDGE); + } + else if (PDViewerPreferences.DUPLEX.DuplexFlipShortEdge.toString().equals(dp)) + { + attr.add(Sides.TWO_SIDED_SHORT_EDGE); + } + else if (PDViewerPreferences.DUPLEX.Simplex.toString().equals(dp)) + { + attr.add(Sides.ONE_SIDED); + } + } if (job.printDialog(attr)) { @@ -122,8 +141,7 @@ private static void printWithDialogAndAttributes(PDDocument document) /** * Prints using a custom page size and custom margins. */ - private static void printWithPaper(PDDocument document) - throws IOException, PrinterException + private static void printWithPaper(PDDocument document) throws PrinterException { PrinterJob job = PrinterJob.getPrinterJob(); job.setPageable(new PDFPageable(document)); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/rendering/CustomGraphicsStreamEngine.java b/examples/src/main/java/org/apache/pdfbox/examples/rendering/CustomGraphicsStreamEngine.java index 088d946ac92..3f5aa3b5f50 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/rendering/CustomGraphicsStreamEngine.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/rendering/CustomGraphicsStreamEngine.java @@ -42,6 +42,16 @@ */ public class CustomGraphicsStreamEngine extends PDFGraphicsStreamEngine { + /** + * Constructor. + * + * @param page PDF Page + */ + protected CustomGraphicsStreamEngine(PDPage page) + { + super(page); + } + public static void main(String[] args) throws IOException { File file = new File("src/main/resources/org/apache/pdfbox/examples/rendering/", @@ -54,16 +64,6 @@ public static void main(String[] args) throws IOException doc.close(); } - /** - * Constructor. - * - * @param page PDF Page - */ - protected CustomGraphicsStreamEngine(PDPage page) - { - super(page); - } - /** * Runs the engine on the current page. * @@ -82,7 +82,7 @@ public void run() throws IOException @Override public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException { - System.out.printf("appendRectangle %.2f %.2f, %.2f %.2f, %.2f %.2f, %.2f %.2f\n", + System.out.printf("appendRectangle %.2f %.2f, %.2f %.2f, %.2f %.2f, %.2f %.2f%n", p0.getX(), p0.getY(), p1.getX(), p1.getY(), p2.getX(), p2.getY(), p3.getX(), p3.getY()); } @@ -102,19 +102,19 @@ public void clip(int windingRule) throws IOException @Override public void moveTo(float x, float y) throws IOException { - System.out.printf("moveTo %.2f %.2f\n", x, y); + System.out.printf("moveTo %.2f %.2f%n", x, y); } @Override public void lineTo(float x, float y) throws IOException { - System.out.printf("lineTo %.2f %.2f\n", x, y); + System.out.printf("lineTo %.2f %.2f%n", x, y); } @Override public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException { - System.out.printf("curveTo %.2f %.2f, %.2f %.2f, %.2f %.2f\n", x1, y1, x2, y2, x3, y3); + System.out.printf("curveTo %.2f %.2f, %.2f %.2f, %.2f %.2f%n", x1, y1, x2, y2, x3, y3); } @Override @@ -186,11 +186,11 @@ public void showTextStrings(COSArray array) throws IOException * Overridden from PDFStreamEngine. */ @Override - protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, - Vector displacement) throws IOException + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, Vector displacement) + throws IOException { - System.out.print(unicode); - super.showGlyph(textRenderingMatrix, font, code, unicode, displacement); + System.out.print("showGlyph " + code); + super.showGlyph(textRenderingMatrix, font, code, displacement); } // NOTE: there are may more methods in PDFStreamEngine which can be overridden here too. diff --git a/examples/src/main/java/org/apache/pdfbox/examples/rendering/CustomPageDrawer.java b/examples/src/main/java/org/apache/pdfbox/examples/rendering/CustomPageDrawer.java index b61cfe280e7..a87e4fc3716 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/rendering/CustomPageDrawer.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/rendering/CustomPageDrawer.java @@ -96,15 +96,12 @@ private static class MyPageDrawer extends PageDrawer @Override protected Paint getPaint(PDColor color) throws IOException { - // if this is the non-stroking color - if (getGraphicsState().getNonStrokingColor() == color) + // if this is the non-stroking color, find red, ignoring alpha channel + if (getGraphicsState().getNonStrokingColor() == color && + color.toRGB() == (Color.RED.getRGB() & 0x00FFFFFF)) { - // find red, ignoring alpha channel - if (color.toRGB() == (Color.RED.getRGB() & 0x00FFFFFF)) - { - // replace it with blue - return Color.BLUE; - } + // replace it with blue + return Color.BLUE; } return super.getPaint(color); } @@ -113,11 +110,11 @@ protected Paint getPaint(PDColor color) throws IOException * Glyph bounding boxes. */ @Override - protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, - Vector displacement) throws IOException + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, + Vector displacement) throws IOException { // draw glyph - super.showGlyph(textRenderingMatrix, font, code, unicode, displacement); + super.showGlyph(textRenderingMatrix, font, code, displacement); // bbox in EM -> user units Shape bbox = new Rectangle2D.Float(0, 0, font.getWidth(code) / 1000, 1); @@ -182,7 +179,7 @@ public void showAnnotation(PDAnnotation annotation) throws IOException saveGraphicsState(); // 35% alpha - getGraphicsState().setNonStrokeAlphaConstants(0.35); + getGraphicsState().setNonStrokeAlphaConstant(0.35); super.showAnnotation(annotation); // restore diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CMSProcessableInputStream.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CMSProcessableInputStream.java index 5764af977fa..64bf51f882f 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/signature/CMSProcessableInputStream.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CMSProcessableInputStream.java @@ -24,10 +24,13 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import org.apache.pdfbox.io.IOUtils; /** - * Wraps a InputStream into a CMSProcessable object for bouncy castle. - * It's an alternative to the CMSProcessableByteArray. + * Wraps a InputStream into a CMSProcessable object for bouncy castle. It's a memory saving + * alternative to the {@link org.bouncycastle.cms.CMSProcessableByteArray CMSProcessableByteArray} + * class. + * * @author Thomas Chojecki */ class CMSProcessableInputStream implements CMSTypedData @@ -56,12 +59,7 @@ public Object getContent() public void write(OutputStream out) throws IOException, CMSException { // read the content only one time - byte[] buffer = new byte[8 * 1024]; - int read; - while ((read = in.read(buffer)) != -1) - { - out.write(buffer, 0, read); - } + IOUtils.copy(in, out); in.close(); } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateEmbeddedTimeStamp.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateEmbeddedTimeStamp.java new file mode 100644 index 00000000000..25479667b9a --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateEmbeddedTimeStamp.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.signature; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.apache.pdfbox.util.Hex; +import org.bouncycastle.cms.CMSException; +import org.bouncycastle.cms.CMSSignedData; + +/** + * An example for timestamp-signing a PDF for PADeS-Specification. The document will only be changed + * in its existing signature by a signed timestamp (A timestamp and the Hash-Value of the document + * are signed by a Time Stamp Authority (TSA)). + * + * This method only changes the unsigned parameters of a signature, so that it is kept valid. + * + * Use case: sign offline to avoid zero-day attacks against the signing machine. Once the signature + * is there and the pdf is transferred to a network connected machine, one is likely to want to add + * a timestamp. (Ralf Hauser) + * + * @author Alexis Suter + */ +public class CreateEmbeddedTimeStamp +{ + private final String tsaUrl; + private PDDocument document; + private PDSignature signature; + private byte[] changedEncodedSignature; + + public CreateEmbeddedTimeStamp(String tsaUrl) + { + this.tsaUrl = tsaUrl; + } + + /** + * Embeds the given PDF file with signed timestamp(s). Alters the original file on disk. + * + * @param file the PDF file to sign and to overwrite + * @throws IOException + */ + public void embedTimeStamp(File file) throws IOException + { + embedTimeStamp(file, file); + } + + /** + * Embeds signed timestamp(s) into existing signatures of the given document + * + * @param inFile The pdf file possibly containing signatures + * @param outFile Where the changed document will be saved + * @throws IOException + */ + public void embedTimeStamp(File inFile, File outFile) throws IOException + { + if (inFile == null || !inFile.exists()) + { + throw new FileNotFoundException("Document for signing does not exist"); + } + + // sign + PDDocument doc = PDDocument.load(inFile); + document = doc; + processTimeStamping(outFile, inFile.getAbsolutePath()); + doc.close(); + } + + /** + * Processes the time-stamping of the Signature. + * + * @param outFile Where the new file will be written to + * @param fileName of the existing file containing the pdf + * @throws IOException + */ + private void processTimeStamping(File outFile, String fileName) throws IOException + { + try + { + byte[] documentBytes; + FileInputStream fis = new FileInputStream(fileName); + documentBytes = IOUtils.toByteArray(fis); + fis.close(); + processRelevantSignatures(documentBytes); + + if (changedEncodedSignature != null) + { + FileOutputStream output = new FileOutputStream(outFile); + embedNewSignatureIntoDocument(documentBytes, output); + output.close(); + } + } + catch (IOException e) + { + throw new IOException(e); + } + catch (NoSuchAlgorithmException e) + { + throw new IOException(e); + } + catch (CMSException e) + { + throw new IOException(e); + } + } + + /** + * Create changed Signature with embedded TimeStamp from TSA + * + * @param documentBytes byte[] of the input file + * @throws IOException + * @throws CMSException + * @throws NoSuchAlgorithmException + */ + private void processRelevantSignatures(byte[] documentBytes) + throws IOException, CMSException, NoSuchAlgorithmException + { + signature = SigUtils.getLastRelevantSignature(document); + if (signature == null) + { + return; + } + + byte[] sigBlock = signature.getContents(documentBytes); + CMSSignedData signedData = new CMSSignedData(sigBlock); + + System.out.println("INFO: Byte Range: " + Arrays.toString(signature.getByteRange())); + + if (tsaUrl != null && tsaUrl.length() > 0) + { + ValidationTimeStamp validation = new ValidationTimeStamp(tsaUrl); + signedData = validation.addSignedTimeStamp(signedData); + } + + byte[] newEncoded = Hex.getBytes(signedData.getEncoded()); + int maxSize = signature.getByteRange()[2] - signature.getByteRange()[1]; + System.out.println( + "INFO: New Signature has Size: " + newEncoded.length + " maxSize: " + maxSize); + + if (newEncoded.length > maxSize - 2) + { + throw new IOException( + "New Signature is too big for existing Signature-Placeholder. Max Place: " + + maxSize); + } + else + { + changedEncodedSignature = newEncoded; + } + } + + /** + * Embeds the new signature into the document, by copying the rest of the document + * + * @param docBytes byte array of the document + * @param output target, where the file will be written + * @throws IOException + */ + private void embedNewSignatureIntoDocument(byte[] docBytes, OutputStream output) + throws IOException + { + int[] byteRange = signature.getByteRange(); + output.write(docBytes, byteRange[0], byteRange[1] + 1); + output.write(changedEncodedSignature); + int addingLength = byteRange[2] - byteRange[1] - 2 - changedEncodedSignature.length; + byte[] zeroes = Hex.getBytes(new byte[(addingLength + 1) / 2]); + output.write(zeroes); + output.write(docBytes, byteRange[2] - 1, byteRange[3] + 1); + } + + public static void main(String[] args) throws IOException + { + if (args.length != 3) + { + usage(); + System.exit(1); + } + + String tsaUrl = null; + for (int i = 0; i < args.length; i++) + { + if (args[i].equals("-tsa")) + { + i++; + if (i >= args.length) + { + usage(); + System.exit(1); + } + tsaUrl = args[i]; + } + } + + File inFile = new File(args[0]); + System.out.println("Input File: " + args[0]); + String name = inFile.getName(); + String substring = name.substring(0, name.lastIndexOf('.')); + + File outFile = new File(inFile.getParent(), substring + "_eTs.pdf"); + System.out.println("Output File: " + outFile.getAbsolutePath()); + + // Embed TimeStamp + CreateEmbeddedTimeStamp signing = new CreateEmbeddedTimeStamp(tsaUrl); + signing.embedTimeStamp(inFile, outFile); + } + + private static void usage() + { + System.err.println("usage: java " + CreateEmbeddedTimeStamp.class.getName() + " " + + "\n" + "mandatory option:\n" + + " -tsa sign timestamp using the given TSA server\n"); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateEmptySignatureForm.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateEmptySignatureForm.java new file mode 100644 index 00000000000..61288cd677c --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateEmptySignatureForm.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.signature; + +import java.io.IOException; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; + +/** + * An example of creating an AcroForm and an empty signature field from scratch. + * + * An actual signature can be added by clicking on it in Adobe Reader. + * + */ +public final class CreateEmptySignatureForm +{ + private CreateEmptySignatureForm() + { + } + + public static void main(String[] args) throws IOException + { + // Create a new document with an empty page. + PDDocument document = new PDDocument(); + PDPage page = new PDPage(PDRectangle.A4); + document.addPage(page); + + // Adobe Acrobat uses Helvetica as a default font and + // stores that under the name '/Helv' in the resources dictionary + PDFont font = PDType1Font.HELVETICA; + PDResources resources = new PDResources(); + resources.put(COSName.getPDFName("Helv"), font); + + // Add a new AcroForm and add that to the document + PDAcroForm acroForm = new PDAcroForm(document); + document.getDocumentCatalog().setAcroForm(acroForm); + + // Add and set the resources and default appearance at the form level + acroForm.setDefaultResources(resources); + + // Acrobat sets the font size on the form level to be + // auto sized as default. This is done by setting the font size to '0' + String defaultAppearanceString = "/Helv 0 Tf 0 g"; + acroForm.setDefaultAppearance(defaultAppearanceString); + + // --- end of general AcroForm stuff --- + + // Create empty signature field, it will get the name "Signature1" + PDSignatureField signatureField = new PDSignatureField(acroForm); + PDAnnotationWidget widget = signatureField.getWidgets().get(0); + PDRectangle rect = new PDRectangle(50, 650, 200, 50); + widget.setRectangle(rect); + widget.setPage(page); + + // see thread from PDFBox users mailing list 17.2.2021 - 19.2.2021 + // https://mail-archives.apache.org/mod_mbox/pdfbox-users/202102.mbox/thread + widget.setPrinted(true); + + page.getAnnotations().add(widget); + + acroForm.getFields().add(signatureField); + + document.save(args[0]); + document.close(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignature.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignature.java index cc663d46ce0..e12b30c39de 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignature.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignature.java @@ -22,38 +22,22 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.net.URL; import java.security.GeneralSecurityException; import java.security.KeyStore; import java.security.KeyStoreException; -import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.security.PrivateKey; import java.security.UnrecoverableKeyException; -import java.security.cert.Certificate; -import java.util.ArrayList; +import java.security.cert.CertificateException; import java.util.Calendar; -import java.util.Enumeration; -import java.util.List; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.ExternalSigningSupport; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; -import org.bouncycastle.asn1.ASN1Encodable; -import org.bouncycastle.asn1.ASN1EncodableVector; -import org.bouncycastle.asn1.ASN1ObjectIdentifier; -import org.bouncycastle.asn1.ASN1Primitive; -import org.bouncycastle.asn1.DERSet; -import org.bouncycastle.asn1.cms.Attribute; -import org.bouncycastle.asn1.cms.AttributeTable; -import org.bouncycastle.asn1.cms.Attributes; -import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers; -import org.bouncycastle.cms.CMSSignedData; -import org.bouncycastle.cms.SignerInformation; -import org.bouncycastle.cms.SignerInformationStore; -import org.bouncycastle.tsp.TSPException; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions; /** - * An example for singing a PDF with bouncy castle. + * An example for signing a PDF with bouncy castle. * A keystore can be created with the java keytool, for example: * * {@code keytool -genkeypair -storepass 123456 -storetype pkcs12 -alias test -validity 365 @@ -67,32 +51,20 @@ public class CreateSignature extends CreateSignatureBase { /** - * Initialize the signature creator with a keystore and certficate password. - * @param keystore the keystore containing the signing certificate - * @param password the password for recovering the key + * Initialize the signature creator with a keystore and certificate password. + * + * @param keystore the pkcs12 keystore containing the signing certificate + * @param pin the password for recovering the key * @throws KeyStoreException if the keystore has not been initialized (loaded) * @throws NoSuchAlgorithmException if the algorithm for recovering the key cannot be found * @throws UnrecoverableKeyException if the given password is wrong + * @throws CertificateException if the certificate is not valid as signing time + * @throws IOException if no certificate could be found */ - public CreateSignature(KeyStore keystore, char[] password) - throws KeyStoreException, UnrecoverableKeyException, NoSuchAlgorithmException + public CreateSignature(KeyStore keystore, char[] pin) + throws KeyStoreException, UnrecoverableKeyException, NoSuchAlgorithmException, CertificateException, IOException { - // grabs the first alias from the keystore and get the private key. An - // TODO alternative method or constructor could be used for setting a specific - // alias that should be used. - Enumeration aliases = keystore.aliases(); - String alias; - if (aliases.hasMoreElements()) - { - alias = aliases.nextElement(); - } - else - { - throw new KeyStoreException("Keystore is empty"); - } - setPrivateKey((PrivateKey) keystore.getKey(alias, password)); - Certificate[] certificateChain = keystore.getCertificateChain(alias); - setCertificate(certificateChain[0]); + super(keystore, pin); } /** @@ -120,28 +92,42 @@ public void signDetached(File inFile, File outFile) throws IOException * Signs the given PDF file. * @param inFile input PDF file * @param outFile output PDF file - * @param tsaClient optional TSA client + * @param tsaUrl optional TSA url * @throws IOException if the input file could not be read */ - public void signDetached(File inFile, File outFile, TSAClient tsaClient) throws IOException + public void signDetached(File inFile, File outFile, String tsaUrl) throws IOException { if (inFile == null || !inFile.exists()) { throw new FileNotFoundException("Document for signing does not exist"); } + setTsaUrl(tsaUrl); + FileOutputStream fos = new FileOutputStream(outFile); // sign - PDDocument doc = PDDocument.load(inFile); - signDetached(doc, fos, tsaClient); - doc.close(); + PDDocument doc = null; + try + { + doc = PDDocument.load(inFile); + signDetached(doc, fos); + } + finally + { + IOUtils.closeQuietly(doc); + IOUtils.closeQuietly(fos); + } } - public void signDetached(PDDocument document, OutputStream output, TSAClient tsaClient) + public void signDetached(PDDocument document, OutputStream output) throws IOException { - setTsaClient(tsaClient); + int accessPermissions = SigUtils.getMDPPermission(document); + if (accessPermissions == 1) + { + throw new IllegalStateException("No changes to the document are permitted due to DocMDP transform parameters dictionary"); + } // create signature dictionary PDSignature signature = new PDSignature(); @@ -155,69 +141,33 @@ public void signDetached(PDDocument document, OutputStream output, TSAClient tsa // the signing date, needed for valid signature signature.setSignDate(Calendar.getInstance()); - // register signature dictionary and sign interface - document.addSignature(signature, this); - - // write incremental (only for signing purpose) - document.saveIncremental(output); - } - - /** - * We just extend CMS signed Data - * - * @param signedData -Generated CMS signed data - * @return CMSSignedData - Extended CMS signed data - */ - @Override - protected CMSSignedData signTimeStamps(CMSSignedData signedData) - throws IOException, TSPException - { - SignerInformationStore signerStore = signedData.getSignerInfos(); - List newSigners = new ArrayList(); - - for (SignerInformation signer : signerStore.getSigners()) + // Optional: certify + if (accessPermissions == 0) { - newSigners.add(signTimeStamp(signer)); - } - - // TODO do we have to return a new store? - return CMSSignedData.replaceSigners(signedData, new SignerInformationStore(newSigners)); - } - - /** - * We are extending CMS Signature - * - * @param signer information about signer - * @return information about SignerInformation - */ - private SignerInformation signTimeStamp(SignerInformation signer) - throws IOException, TSPException - { - AttributeTable unsignedAttributes = signer.getUnsignedAttributes(); + SigUtils.setMDPPermission(document, signature, 2); + } - ASN1EncodableVector vector = new ASN1EncodableVector(); - if (unsignedAttributes != null) + if (isExternalSigning()) { - vector = unsignedAttributes.toASN1EncodableVector(); + document.addSignature(signature); + ExternalSigningSupport externalSigning = + document.saveIncrementalForExternalSigning(output); + // invoke external signature service + byte[] cmsSignature = sign(externalSigning.getContent()); + // set signature bytes received from the service + externalSigning.setSignature(cmsSignature); } - - byte[] token = getTsaClient().getTimeStampToken(signer.getSignature()); - ASN1ObjectIdentifier oid = PKCSObjectIdentifiers.id_aa_signatureTimeStampToken; - ASN1Encodable signatureTimeStamp = new Attribute(oid, new DERSet(ASN1Primitive.fromByteArray(token))); - - vector.add(signatureTimeStamp); - Attributes signedAttributes = new Attributes(vector); - - SignerInformation newSigner = SignerInformation.replaceUnsignedAttributes( - signer, new AttributeTable(signedAttributes)); - - // TODO can this actually happen? - if (newSigner == null) + else { - return signer; + SignatureOptions signatureOptions = new SignatureOptions(); + // Size can vary, but should be enough for purpose. + signatureOptions.setPreferredSignatureSize(SignatureOptions.DEFAULT_SIGNATURE_SIZE * 2); + // register signature dictionary and sign interface + document.addSignature(signature, this, signatureOptions); + + // write incremental (only for signing purpose) + document.saveIncremental(output); } - - return newSigner; } public static void main(String[] args) throws IOException, GeneralSecurityException @@ -229,7 +179,8 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept } String tsaUrl = null; - for(int i = 0; i < args.length; i++) + boolean externalSig = false; + for (int i = 0; i < args.length; i++) { if (args[i].equals("-tsa")) { @@ -237,9 +188,14 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept if (i >= args.length) { usage(); + System.exit(1); } tsaUrl = args[i]; } + if (args[i].equals("-e")) + { + externalSig = true; + } } // load the keystore @@ -248,23 +204,16 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept keystore.load(new FileInputStream(args[0]), password); // TODO alias command line argument - // TSA client - TSAClient tsaClient = null; - if (tsaUrl != null) - { - MessageDigest digest = MessageDigest.getInstance("SHA-256"); - tsaClient = new TSAClient(new URL(tsaUrl), null, null, digest); - } - // sign PDF CreateSignature signing = new CreateSignature(keystore, password); + signing.setExternalSigning(externalSig); File inFile = new File(args[2]); String name = inFile.getName(); String substring = name.substring(0, name.lastIndexOf('.')); File outFile = new File(inFile.getParent(), substring + "_signed.pdf"); - signing.signDetached(inFile, outFile, tsaClient); + signing.signDetached(inFile, outFile, tsaUrl); } private static void usage() @@ -272,6 +221,7 @@ private static void usage() System.err.println("usage: java " + CreateSignature.class.getName() + " " + " \n" + "" + "options:\n" + - " -tsa sign timestamp using the given TSA server"); + " -tsa sign timestamp using the given TSA server\n" + + " -e sign using external signature creation scenario"); } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignatureBase.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignatureBase.java index 415728478a2..ee2c94b0fda 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignatureBase.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignatureBase.java @@ -19,13 +19,17 @@ import java.io.IOException; import java.io.InputStream; import java.security.GeneralSecurityException; +import java.security.KeyStore; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; import java.security.PrivateKey; +import java.security.UnrecoverableKeyException; import java.security.cert.Certificate; -import java.util.ArrayList; -import java.util.List; +import java.security.cert.CertificateException; +import java.security.cert.X509Certificate; +import java.util.Arrays; +import java.util.Enumeration; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; -import org.bouncycastle.asn1.ASN1Primitive; -import org.bouncycastle.cert.X509CertificateHolder; import org.bouncycastle.cert.jcajce.JcaCertStore; import org.bouncycastle.cms.CMSException; import org.bouncycastle.cms.CMSSignedData; @@ -35,74 +39,112 @@ import org.bouncycastle.operator.OperatorCreationException; import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder; import org.bouncycastle.operator.jcajce.JcaDigestCalculatorProviderBuilder; -import org.bouncycastle.tsp.TSPException; -import org.bouncycastle.util.Store; public abstract class CreateSignatureBase implements SignatureInterface { private PrivateKey privateKey; - private Certificate certificate; - private TSAClient tsaClient; + private Certificate[] certificateChain; + private String tsaUrl; + private boolean externalSigning; - public void setPrivateKey(PrivateKey privateKey) + /** + * Initialize the signature creator with a keystore (pkcs12) and pin that should be used for the + * signature. + * + * @param keystore is a pkcs12 keystore. + * @param pin is the pin for the keystore / private key + * @throws KeyStoreException if the keystore has not been initialized (loaded) + * @throws NoSuchAlgorithmException if the algorithm for recovering the key cannot be found + * @throws UnrecoverableKeyException if the given password is wrong + * @throws CertificateException if the certificate is not valid as signing time + * @throws IOException if no certificate could be found + */ + public CreateSignatureBase(KeyStore keystore, char[] pin) + throws KeyStoreException, UnrecoverableKeyException, NoSuchAlgorithmException, IOException, CertificateException { - this.privateKey = privateKey; + // grabs the first alias from the keystore and get the private key. An + // alternative method or constructor could be used for setting a specific + // alias that should be used. + Enumeration aliases = keystore.aliases(); + String alias; + Certificate cert = null; + while (cert == null && aliases.hasMoreElements()) + { + alias = aliases.nextElement(); + setPrivateKey((PrivateKey) keystore.getKey(alias, pin)); + Certificate[] certChain = keystore.getCertificateChain(alias); + if (certChain != null) + { + setCertificateChain(certChain); + cert = certChain[0]; + if (cert instanceof X509Certificate) + { + // avoid expired certificate + ((X509Certificate) cert).checkValidity(); + + SigUtils.checkCertificateUsage((X509Certificate) cert); + } + } + } + + if (cert == null) + { + throw new IOException("Could not find certificate"); + } } - public void setCertificate(Certificate certificate) + public final void setPrivateKey(PrivateKey privateKey) { - this.certificate = certificate; + this.privateKey = privateKey; } - public void setTsaClient(TSAClient tsaClient) + public final void setCertificateChain(final Certificate[] certificateChain) { - this.tsaClient = tsaClient; + this.certificateChain = certificateChain; } - public TSAClient getTsaClient() + public Certificate[] getCertificateChain() { - return tsaClient; + return certificateChain; } - /** - * Does nothing. Override this if needed. - * - * @param signedData Generated CMS signed data - * @return CMSSignedData Extended CMS signed data - */ - protected CMSSignedData signTimeStamps(CMSSignedData signedData) throws IOException, TSPException + public void setTsaUrl(String tsaUrl) { - return signedData; + this.tsaUrl = tsaUrl; } /** - * SignatureInterface implementation. - * + * SignatureInterface sample implementation. + *

* This method will be called from inside of the pdfbox and create the PKCS #7 signature. * The given InputStream contains the bytes that are given by the byte range. - * - * This method is for internal use only. <-- TODO this method should be private - * + *

+ * This method is for internal use only. + *

* Use your favorite cryptographic library to implement PKCS #7 signature creation. + * If you want to create the hash and the signature separately (e.g. to transfer only the hash + * to an external application), read this + * answer or this answer. + * + * @throws IOException */ @Override public byte[] sign(InputStream content) throws IOException { + // cannot be done private (interface) try { - List certList = new ArrayList(); - certList.add(certificate); - Store certs = new JcaCertStore(certList); CMSSignedDataGenerator gen = new CMSSignedDataGenerator(); - org.bouncycastle.asn1.x509.Certificate cert = org.bouncycastle.asn1.x509.Certificate.getInstance(ASN1Primitive.fromByteArray(certificate.getEncoded())); + X509Certificate cert = (X509Certificate) certificateChain[0]; ContentSigner sha1Signer = new JcaContentSignerBuilder("SHA256WithRSA").build(privateKey); - gen.addSignerInfoGenerator(new JcaSignerInfoGeneratorBuilder(new JcaDigestCalculatorProviderBuilder().build()).build(sha1Signer, new X509CertificateHolder(cert))); - gen.addCertificates(certs); + gen.addSignerInfoGenerator(new JcaSignerInfoGeneratorBuilder(new JcaDigestCalculatorProviderBuilder().build()).build(sha1Signer, cert)); + gen.addCertificates(new JcaCertStore(Arrays.asList(certificateChain))); CMSProcessableInputStream msg = new CMSProcessableInputStream(content); CMSSignedData signedData = gen.generate(msg, false); - if (tsaClient != null) + if (tsaUrl != null && tsaUrl.length() > 0) { - signedData = signTimeStamps(signedData); + ValidationTimeStamp validation = new ValidationTimeStamp(tsaUrl); + signedData = validation.addSignedTimeStamp(signedData); } return signedData.getEncoded(); } @@ -114,14 +156,27 @@ public byte[] sign(InputStream content) throws IOException { throw new IOException(e); } - catch (TSPException e) - { - throw new IOException(e); - } catch (OperatorCreationException e) { throw new IOException(e); } } + /** + * Set if external signing scenario should be used. + * If {@code false}, SignatureInterface would be used for signing. + *

+ * Default: {@code false} + *

+ * @param externalSigning {@code true} if external signing should be performed + */ + public void setExternalSigning(boolean externalSigning) + { + this.externalSigning = externalSigning; + } + + public boolean isExternalSigning() + { + return externalSigning; + } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignedTimeStamp.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignedTimeStamp.java new file mode 100644 index 00000000000..bb4f81f2a4b --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignedTimeStamp.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.signature; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.security.GeneralSecurityException; +import java.security.NoSuchAlgorithmException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; + +/** + * An example for timestamp-signing a PDF for PADeS-Specification. The document will be extended by + * a signed TimeStamp (another kind of signature) (Signed TimeStamp and Hash-Value of the document + * are signed by a Time Stamp Authority (TSA)). + * + * @author Thomas Chojecki + * @author Vakhtang Koroghlishvili + * @author John Hewson + * @author Alexis Suter + */ +public class CreateSignedTimeStamp implements SignatureInterface +{ + private static final Log LOG = LogFactory.getLog(CreateSignedTimeStamp.class); + + private final String tsaUrl; + + /** + * Initialize the signed timestamp creator + * + * @param tsaUrl The url where TS-Request will be done. + */ + public CreateSignedTimeStamp(String tsaUrl) + { + this.tsaUrl = tsaUrl; + } + + /** + * Signs the given PDF file. Alters the original file on disk. + * + * @param file the PDF file to sign + * @throws IOException if the file could not be read or written + */ + public void signDetached(File file) throws IOException + { + signDetached(file, file); + } + + /** + * Signs the given PDF file. + * + * @param inFile input PDF file + * @param outFile output PDF file + * @throws IOException if the input file could not be read + */ + public void signDetached(File inFile, File outFile) throws IOException + { + if (inFile == null || !inFile.exists()) + { + throw new FileNotFoundException("Document for signing does not exist"); + } + + FileOutputStream fos = new FileOutputStream(outFile); + + // sign + PDDocument doc = PDDocument.load(inFile); + signDetached(doc, fos); + doc.close(); + fos.close(); + } + + /** + * Prepares the TimeStamp-Signature and starts the saving-process. + * + * @param document given Pdf + * @param output Where the file will be written + * @throws IOException + */ + public void signDetached(PDDocument document, OutputStream output) throws IOException + { + int accessPermissions = SigUtils.getMDPPermission(document); + if (accessPermissions == 1) + { + throw new IllegalStateException( + "No changes to the document are permitted due to DocMDP transform parameters dictionary"); + } + + // create signature dictionary + PDSignature signature = new PDSignature(); + signature.setType(COSName.DOC_TIME_STAMP); + signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); + signature.setSubFilter(COSName.getPDFName("ETSI.RFC3161")); + + // No certification allowed because /Reference not allowed in signature directory + // see ETSI EN 319 142-1 Part 1 and ETSI TS 102 778-4 + // http://www.etsi.org/deliver/etsi_en%5C319100_319199%5C31914201%5C01.01.00_30%5Cen_31914201v010100v.pdf + // http://www.etsi.org/deliver/etsi_ts/102700_102799/10277804/01.01.01_60/ts_10277804v010101p.pdf + + // register signature dictionary and sign interface + document.addSignature(signature, this); + + // write incremental (only for signing purpose) + document.saveIncremental(output); + } + + @Override + public byte[] sign(InputStream content) throws IOException + { + ValidationTimeStamp validation; + try + { + validation = new ValidationTimeStamp(tsaUrl); + return validation.getTimeStampToken(content); + } + catch (NoSuchAlgorithmException e) + { + LOG.error("Hashing-Algorithm not found for TimeStamping", e); + } + return new byte[] {}; + } + + public static void main(String[] args) throws IOException, GeneralSecurityException + { + if (args.length != 3) + { + usage(); + System.exit(1); + } + + String tsaUrl = null; + if (args[1].equals("-tsa")) + { + tsaUrl = args[2]; + } + else + { + usage(); + System.exit(1); + } + + // sign PDF + CreateSignedTimeStamp signing = new CreateSignedTimeStamp(tsaUrl); + + File inFile = new File(args[0]); + String name = inFile.getName(); + String substring = name.substring(0, name.lastIndexOf('.')); + + File outFile = new File(inFile.getParent(), substring + "_timestamped.pdf"); + signing.signDetached(inFile, outFile); + } + + private static void usage() + { + System.err.println("usage: java " + CreateSignedTimeStamp.class.getName() + " " + + "\n" + "mandatory options:\n" + + " -tsa sign timestamp using the given TSA server\n"); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignedTimestampBase.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignedTimestampBase.java new file mode 100644 index 00000000000..963a8611b59 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateSignedTimestampBase.java @@ -0,0 +1,58 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.signature; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; +import org.bouncycastle.tsp.TimeStampToken; + +public abstract class CreateSignedTimestampBase implements SignatureInterface +{ + private TSAClient tsaClient; + + public void setTsaClient(TSAClient tsaClient) + { + this.tsaClient = tsaClient; + } + + public TSAClient getTsaClient() + { + return tsaClient; + } + + /** + * SignatureInterface implementation. + * + * This method will be called from inside of the pdfbox and create the PKCS #7 signature. The given InputStream + * contains the bytes that are given by the byte range. + * + * This method is for internal use only. + * + * Use your favorite cryptographic library to implement PKCS #7 signature creation. + * + * @throws IOException + */ + @Override + public byte[] sign(InputStream content) throws IOException + { + TimeStampToken timeStampToken = getTsaClient().getTimeStampToken(IOUtils.toByteArray(content)); + return timeStampToken.getEncoded(); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateVisibleSignature.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateVisibleSignature.java index 3605435884a..3ab424f5d6f 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateVisibleSignature.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateVisibleSignature.java @@ -20,51 +20,122 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; import java.security.KeyStore; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; -import java.security.PrivateKey; import java.security.UnrecoverableKeyException; import java.security.cert.CertificateException; import java.util.Calendar; -import java.util.Enumeration; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.ExternalSigningSupport; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSigProperties; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSignDesigner; -import org.bouncycastle.jce.provider.BouncyCastleProvider; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; +import org.apache.pdfbox.util.Hex; /** - * This is an example for visual signing a pdf with bouncy castle. + * This is an example for visual signing a pdf. * @see CreateSignature * @author Vakhtang Koroghlishvili */ public class CreateVisibleSignature extends CreateSignatureBase { - private static final BouncyCastleProvider BCPROVIDER = new BouncyCastleProvider(); - - private SignatureOptions options; + private SignatureOptions signatureOptions; private PDVisibleSignDesigner visibleSignDesigner; private final PDVisibleSigProperties visibleSignatureProperties = new PDVisibleSigProperties(); + private boolean lateExternalSigning = false; + + public boolean isLateExternalSigning() + { + return lateExternalSigning; + } - public void setVisibleSignatureProperties(String filename, int x, int y, int zoomPercent, - FileInputStream image, int page) + /** + * Set late external signing. Enable this if you want to activate the demo code where the + * signature is kept and added in an extra step without using PDFBox methods. This is disabled + * by default. + * + * @param lateExternalSigning + */ + public void setLateExternalSigning(boolean lateExternalSigning) + { + this.lateExternalSigning = lateExternalSigning; + } + + /** + * Set visible signature designer for a new signature field. + * + * @param filename + * @param x position of the signature field + * @param y position of the signature field + * @param zoomPercent increase (positive value) or decrease (negative value) image with x percent. + * @param imageStream input stream of an image. + * @param page the signature should be placed on + * @throws IOException + */ + public void setVisibleSignDesigner(String filename, int x, int y, int zoomPercent, + InputStream imageStream, int page) throws IOException { - visibleSignDesigner = new PDVisibleSignDesigner(filename, image, page); - visibleSignDesigner.xAxis(x).yAxis(y).zoom(zoomPercent).signatureFieldName("signature"); + visibleSignDesigner = new PDVisibleSignDesigner(filename, imageStream, page); + visibleSignDesigner.xAxis(x).yAxis(y).zoom(zoomPercent).adjustForRotation(); } - public void setSignatureProperties(String name, String location, String reason, int preferredSize, - int page, boolean visualSignEnabled) throws IOException + /** + * Set visible signature designer for an existing signature field. + * + * @param zoomPercent increase (positive value) or decrease (negative value) image with x percent. + * @param imageStream input stream of an image. + * @throws IOException + */ + public void setVisibleSignDesigner(int zoomPercent, InputStream imageStream) + throws IOException + { + visibleSignDesigner = new PDVisibleSignDesigner(imageStream); + visibleSignDesigner.zoom(zoomPercent); + } + + /** + * Set visible signature properties for new signature fields. + * + * @param name + * @param location + * @param reason + * @param preferredSize + * @param page + * @param visualSignEnabled + */ + public void setVisibleSignatureProperties(String name, String location, String reason, int preferredSize, + int page, boolean visualSignEnabled) { visibleSignatureProperties.signerName(name).signerLocation(location).signatureReason(reason). preferredSize(preferredSize).page(page).visualSignEnabled(visualSignEnabled). - setPdVisibleSignature(visibleSignDesigner).buildSignature(); + setPdVisibleSignature(visibleSignDesigner); + } + + /** + * Set visible signature properties for existing signature fields. + * + * @param name + * @param location + * @param reason + * @param visualSignEnabled + */ + public void setVisibleSignatureProperties(String name, String location, String reason, + boolean visualSignEnabled) + { + visibleSignatureProperties.signerName(name).signerLocation(location).signatureReason(reason). + visualSignEnabled(visualSignEnabled).setPdVisibleSignature(visibleSignDesigner); } /** @@ -73,25 +144,29 @@ public void setSignatureProperties(String name, String location, String reason, * * @param keystore is a pkcs12 keystore. * @param pin is the pin for the keystore / private key + * @throws KeyStoreException if the keystore has not been initialized (loaded) + * @throws NoSuchAlgorithmException if the algorithm for recovering the key cannot be found + * @throws UnrecoverableKeyException if the given password is wrong + * @throws CertificateException if the certificate is not valid as signing time + * @throws IOException if no certificate could be found */ public CreateVisibleSignature(KeyStore keystore, char[] pin) - throws KeyStoreException, UnrecoverableKeyException, NoSuchAlgorithmException, IOException + throws KeyStoreException, UnrecoverableKeyException, NoSuchAlgorithmException, IOException, CertificateException { - // grabs the first alias from the keystore and get the private key. An - // alternative method or constructor could be used for setting a specific - // alias that should be used. - Enumeration aliases = keystore.aliases(); - String alias = null; - if (aliases.hasMoreElements()) - { - alias = aliases.nextElement(); - } - else - { - throw new IOException("Could not find alias"); - } - setPrivateKey((PrivateKey) keystore.getKey(alias, pin)); - setCertificate(keystore.getCertificateChain(alias)[0]); + super(keystore, pin); + } + + /** + * Sign pdf file and create new file that ends with "_signed.pdf". + * + * @param inputFile The source pdf document file. + * @param signedFile The file to be signed. + * @param tsaUrl optional TSA url + * @throws IOException + */ + public void signPDF(File inputFile, File signedFile, String tsaUrl) throws IOException + { + this.signPDF(inputFile, signedFile, tsaUrl, null); } /** @@ -99,53 +174,187 @@ public CreateVisibleSignature(KeyStore keystore, char[] pin) * * @param inputFile The source pdf document file. * @param signedFile The file to be signed. + * @param tsaUrl optional TSA url + * @param signatureFieldName optional name of an existing (unsigned) signature field * @throws IOException */ - public void signPDF(File inputFile, File signedFile) throws IOException + public void signPDF(File inputFile, File signedFile, String tsaUrl, String signatureFieldName) throws IOException { if (inputFile == null || !inputFile.exists()) { throw new IOException("Document for signing does not exist"); } + setTsaUrl(tsaUrl); + // creating output document and prepare the IO streams. FileOutputStream fos = new FileOutputStream(signedFile); // load document PDDocument doc = PDDocument.load(inputFile); - // create signature dictionary - PDSignature signature = new PDSignature(); - signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); // default filter + int accessPermissions = SigUtils.getMDPPermission(doc); + if (accessPermissions == 1) + { + throw new IllegalStateException("No changes to the document are permitted due to DocMDP transform parameters dictionary"); + } + // Note that PDFBox has a bug that visual signing on certified files with permission 2 + // doesn't work properly, see PDFBOX-3699. As long as this issue is open, you may want to + // be careful with such files. + + PDSignature signature; + + // sign a PDF with an existing empty signature, as created by the CreateEmptySignatureForm example. + signature = findExistingSignature(doc, signatureFieldName); + + if (signature == null) + { + // create signature dictionary + signature = new PDSignature(); + } + + // Optional: certify + // can be done only if version is at least 1.5 and if not already set + // doing this on a PDF/A-1b file fails validation by Adobe preflight (PDFBOX-3821) + // PDF/A-1b requires PDF version 1.4 max, so don't increase the version on such files. + if (doc.getVersion() >= 1.5f && accessPermissions == 0) + { + SigUtils.setMDPPermission(doc, signature, 2); + } + + PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(null); + if (acroForm != null && acroForm.getNeedAppearances()) + { + // PDFBOX-3738 NeedAppearances true results in visible signature becoming invisible + // with Adobe Reader + if (acroForm.getFields().isEmpty()) + { + // we can safely delete it if there are no fields + acroForm.getCOSObject().removeItem(COSName.NEED_APPEARANCES); + // note that if you've set MDP permissions, the removal of this item + // may result in Adobe Reader claiming that the document has been changed. + // and/or that field content won't be displayed properly. + // ==> decide what you prefer and adjust your code accordingly. + } + else + { + System.out.println("/NeedAppearances is set, signature may be ignored by Adobe Reader"); + } + } + + // default filter + signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); + // subfilter for basic and PAdES Part 2 signatures signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED); - signature.setName("signer name"); - signature.setLocation("signer location"); - signature.setReason("reason for signature"); + + if (visibleSignatureProperties != null) + { + // this builds the signature structures in a separate document + visibleSignatureProperties.buildSignature(); + + signature.setName(visibleSignatureProperties.getSignerName()); + signature.setLocation(visibleSignatureProperties.getSignerLocation()); + signature.setReason(visibleSignatureProperties.getSignatureReason()); + } // the signing date, needed for valid signature signature.setSignDate(Calendar.getInstance()); + // do not set SignatureInterface instance, if external signing used + SignatureInterface signatureInterface = isExternalSigning() ? null : this; + // register signature dictionary and sign interface if (visibleSignatureProperties != null && visibleSignatureProperties.isVisualSignEnabled()) { - options = new SignatureOptions(); - options.setVisualSignature(visibleSignatureProperties); - options.setPage(visibleSignatureProperties.getPage() - 1); - doc.addSignature(signature, this, options); + signatureOptions = new SignatureOptions(); + signatureOptions.setVisualSignature(visibleSignatureProperties.getVisibleSignature()); + signatureOptions.setPage(visibleSignatureProperties.getPage() - 1); + doc.addSignature(signature, signatureInterface, signatureOptions); } else { - doc.addSignature(signature, this); + doc.addSignature(signature, signatureInterface); } - // write incremental (only for signing purpose) - doc.saveIncremental(fos); + if (isExternalSigning()) + { + ExternalSigningSupport externalSigning = doc.saveIncrementalForExternalSigning(fos); + // invoke external signature service + byte[] cmsSignature = sign(externalSigning.getContent()); + + // Explanation of late external signing (off by default): + // If you want to add the signature in a separate step, then set an empty byte array + // and call signature.getByteRange() and remember the offset signature.getByteRange()[1]+1. + // you can write the ascii hex signature at a later time even if you don't have this + // PDDocument object anymore, with classic java file random access methods. + // If you can't remember the offset value from ByteRange because your context has changed, + // then open the file with PDFBox, find the field with findExistingSignature() or + // PDDocument.getLastSignatureDictionary() and get the ByteRange from there. + // Close the file and then write the signature as explained earlier in this comment. + if (isLateExternalSigning()) + { + // this saves the file with a 0 signature + externalSigning.setSignature(new byte[0]); + + // remember the offset (add 1 because of "<") + int offset = signature.getByteRange()[1] + 1; + + // now write the signature at the correct offset without any PDFBox methods + RandomAccessFile raf = new RandomAccessFile(signedFile, "rw"); + raf.seek(offset); + raf.write(Hex.getBytes(cmsSignature)); + raf.close(); + } + else + { + // set signature bytes received from the service and save the file + externalSigning.setSignature(cmsSignature); + } + } + else + { + // write incremental (only for signing purpose) + doc.saveIncremental(fos); + } doc.close(); - // do not close options before saving, because some COSStream objects within options + // Do not close signatureOptions before saving, because some COSStream objects within // are transferred to the signed document. - IOUtils.closeQuietly(options); + // Do not allow signatureOptions get out of scope before saving, because then the COSDocument + // in signature options might by closed by gc, which would close COSStream objects prematurely. + // See https://issues.apache.org/jira/browse/PDFBOX-3743 + IOUtils.closeQuietly(signatureOptions); + } + + // Find an existing signature (assumed to be empty). You will usually not need this. + private PDSignature findExistingSignature(PDDocument doc, String sigFieldName) + { + PDSignature signature = null; + PDSignatureField signatureField; + PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(null); + if (acroForm != null) + { + signatureField = (PDSignatureField) acroForm.getField(sigFieldName); + if (signatureField != null) + { + // retrieve signature dictionary + signature = signatureField.getSignature(); + if (signature == null) + { + signature = new PDSignature(); + // after solving PDFBOX-3524 + // signatureField.setValue(signature) + // until then: + signatureField.getCOSObject().setItem(COSName.V, signature); + } + else + { + throw new IllegalStateException("The signature field " + sigFieldName + " is already signed."); + } + } + } + return signature; } /** @@ -154,38 +363,69 @@ public void signPDF(File inputFile, File signedFile) throws IOException * [1] pin * [2] document that will be signed * [3] image of visible signature + * + * @param args + * @throws java.security.KeyStoreException + * @throws java.security.cert.CertificateException + * @throws java.io.IOException + * @throws java.security.NoSuchAlgorithmException + * @throws java.security.UnrecoverableKeyException */ public static void main(String[] args) throws KeyStoreException, CertificateException, IOException, NoSuchAlgorithmException, UnrecoverableKeyException { - if (args.length != 4) + // generate with + // keytool -storepass 123456 -storetype PKCS12 -keystore file.p12 -genkey -alias client -keyalg RSA + if (args.length < 4) { usage(); System.exit(1); } - else + + String tsaUrl = null; + // External signing is needed if you are using an external signing service, e.g. to sign + // several files at once. + boolean externalSig = false; + for (int i = 0; i < args.length; i++) { - File ksFile = new File(args[0]); - KeyStore keystore = KeyStore.getInstance("PKCS12", BCPROVIDER); - char[] pin = args[1].toCharArray(); - keystore.load(new FileInputStream(ksFile), pin); - - File documentFile = new File(args[2]); - - CreateVisibleSignature signing = new CreateVisibleSignature(keystore, pin.clone()); - - FileInputStream image = new FileInputStream(args[3]); - - String name = documentFile.getName(); - String substring = name.substring(0, name.lastIndexOf('.')); - File signedDocumentFile = new File(documentFile.getParent(), substring + "_signed.pdf"); - - // page is 1-based here - int page = 1; - signing.setVisibleSignatureProperties (args[2], 0, 0, -50, image, page); - signing.setSignatureProperties ("name", "location", "Security", 0, page, true); - signing.signPDF(documentFile, signedDocumentFile); + if (args[i].equals("-tsa")) + { + i++; + if (i >= args.length) + { + usage(); + System.exit(1); + } + tsaUrl = args[i]; + } + if (args[i].equals("-e")) + { + externalSig = true; + } } + + File ksFile = new File(args[0]); + KeyStore keystore = KeyStore.getInstance("PKCS12"); + char[] pin = args[1].toCharArray(); + keystore.load(new FileInputStream(ksFile), pin); + + File documentFile = new File(args[2]); + + CreateVisibleSignature signing = new CreateVisibleSignature(keystore, pin.clone()); + + InputStream imageStream = new FileInputStream(args[3]); + + String name = documentFile.getName(); + String substring = name.substring(0, name.lastIndexOf('.')); + File signedDocumentFile = new File(documentFile.getParent(), substring + "_signed.pdf"); + + // page is 1-based here + int page = 1; + signing.setVisibleSignDesigner(args[2], 0, 0, -50, imageStream, page); + imageStream.close(); + signing.setVisibleSignatureProperties("name", "location", "Security", 0, page, true); + signing.setExternalSigning(externalSig); + signing.signPDF(documentFile, signedDocumentFile, tsaUrl); } /** @@ -194,6 +434,9 @@ public static void main(String[] args) throws KeyStoreException, CertificateExce private static void usage() { System.err.println("Usage: java " + CreateVisibleSignature.class.getName() - + " "); + + " \n" + "" + + "options:\n" + + " -tsa sign timestamp using the given TSA server\n"+ + " -e sign using external signature creation scenario"); } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateVisibleSignature2.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateVisibleSignature2.java new file mode 100644 index 00000000000..e34c19c5c2a --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateVisibleSignature2.java @@ -0,0 +1,584 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.signature; + +import java.awt.Color; +import java.awt.geom.AffineTransform; +import java.awt.geom.Rectangle2D; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; +import java.security.KeyStore; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.security.UnrecoverableKeyException; +import java.security.cert.CertificateException; +import java.security.cert.X509Certificate; +import java.util.Calendar; +import java.util.List; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.ExternalSigningSupport; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDField; +import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; +import org.apache.pdfbox.util.Hex; +import org.apache.pdfbox.util.Matrix; +import org.bouncycastle.asn1.x500.RDN; +import org.bouncycastle.asn1.x500.X500Name; +import org.bouncycastle.asn1.x500.style.BCStyle; +import org.bouncycastle.asn1.x500.style.IETFUtils; + +/** + * This is a second example for visual signing a pdf. It doesn't use the "design pattern" influenced + * PDVisibleSignDesigner, and doesn't create its complex multilevel forms described in the Adobe + * document + * Digital + * Signature Appearances, because this isn't required by the PDF specification. See the + * discussion in December 2017 in PDFBOX-3198. + * + * @author Vakhtang Koroghlishvili + * @author Tilman Hausherr + */ +public class CreateVisibleSignature2 extends CreateSignatureBase +{ + private SignatureOptions signatureOptions; + private boolean lateExternalSigning = false; + private File imageFile = null; + + /** + * Initialize the signature creator with a keystore (pkcs12) and pin that + * should be used for the signature. + * + * @param keystore is a pkcs12 keystore. + * @param pin is the pin for the keystore / private key + * @throws KeyStoreException if the keystore has not been initialized (loaded) + * @throws NoSuchAlgorithmException if the algorithm for recovering the key cannot be found + * @throws UnrecoverableKeyException if the given password is wrong + * @throws CertificateException if the certificate is not valid as signing time + * @throws IOException if no certificate could be found + */ + public CreateVisibleSignature2(KeyStore keystore, char[] pin) + throws KeyStoreException, UnrecoverableKeyException, NoSuchAlgorithmException, IOException, CertificateException + { + super(keystore, pin); + } + + public File getImageFile() + { + return imageFile; + } + + public void setImageFile(File imageFile) + { + this.imageFile = imageFile; + } + + public boolean isLateExternalSigning() + { + return lateExternalSigning; + } + + /** + * Set late external signing. Enable this if you want to activate the demo code where the + * signature is kept and added in an extra step without using PDFBox methods. This is disabled + * by default. + * + * @param lateExternalSigning + */ + public void setLateExternalSigning(boolean lateExternalSigning) + { + this.lateExternalSigning = lateExternalSigning; + } + + /** + * Sign pdf file and create new file that ends with "_signed.pdf". + * + * @param inputFile The source pdf document file. + * @param signedFile The file to be signed. + * @param humanRect rectangle from a human viewpoint (coordinates start at top left) + * @param tsaUrl optional TSA url + * @throws IOException + */ + public void signPDF(File inputFile, File signedFile, Rectangle2D humanRect, String tsaUrl) throws IOException + { + this.signPDF(inputFile, signedFile, humanRect, tsaUrl, null); + } + + /** + * Sign pdf file and create new file that ends with "_signed.pdf". + * + * @param inputFile The source pdf document file. + * @param signedFile The file to be signed. + * @param humanRect rectangle from a human viewpoint (coordinates start at top left) + * @param tsaUrl optional TSA url + * @param signatureFieldName optional name of an existing (unsigned) signature field + * @throws IOException + */ + public void signPDF(File inputFile, File signedFile, Rectangle2D humanRect, String tsaUrl, String signatureFieldName) throws IOException + { + if (inputFile == null || !inputFile.exists()) + { + throw new IOException("Document for signing does not exist"); + } + + setTsaUrl(tsaUrl); + + // creating output document and prepare the IO streams. + FileOutputStream fos = new FileOutputStream(signedFile); + + PDDocument doc = PDDocument.load(inputFile); + int accessPermissions = SigUtils.getMDPPermission(doc); + if (accessPermissions == 1) + { + throw new IllegalStateException("No changes to the document are permitted due to DocMDP transform parameters dictionary"); + } + // Note that PDFBox has a bug that visual signing on certified files with permission 2 + // doesn't work properly, see PDFBOX-3699. As long as this issue is open, you may want to + // be careful with such files. + + PDSignature signature = null; + PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(null); + PDRectangle rect = null; + + // sign a PDF with an existing empty signature, as created by the CreateEmptySignatureForm example. + if (acroForm != null) + { + signature = findExistingSignature(acroForm, signatureFieldName); + if (signature != null) + { + rect = acroForm.getField(signatureFieldName).getWidgets().get(0).getRectangle(); + } + } + + if (signature == null) + { + // create signature dictionary + signature = new PDSignature(); + } + + if (rect == null) + { + rect = createSignatureRectangle(doc, humanRect); + } + + // Optional: certify + // can be done only if version is at least 1.5 and if not already set + // doing this on a PDF/A-1b file fails validation by Adobe preflight (PDFBOX-3821) + // PDF/A-1b requires PDF version 1.4 max, so don't increase the version on such files. + if (doc.getVersion() >= 1.5f && accessPermissions == 0) + { + SigUtils.setMDPPermission(doc, signature, 2); + } + + if (acroForm != null && acroForm.getNeedAppearances()) + { + // PDFBOX-3738 NeedAppearances true results in visible signature becoming invisible + // with Adobe Reader + if (acroForm.getFields().isEmpty()) + { + // we can safely delete it if there are no fields + acroForm.getCOSObject().removeItem(COSName.NEED_APPEARANCES); + // note that if you've set MDP permissions, the removal of this item + // may result in Adobe Reader claiming that the document has been changed. + // and/or that field content won't be displayed properly. + // ==> decide what you prefer and adjust your code accordingly. + } + else + { + System.out.println("/NeedAppearances is set, signature may be ignored by Adobe Reader"); + } + } + + // default filter + signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); + + // subfilter for basic and PAdES Part 2 signatures + signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED); + + signature.setName("Name"); + signature.setLocation("Location"); + signature.setReason("Reason"); + + // the signing date, needed for valid signature + signature.setSignDate(Calendar.getInstance()); + + // do not set SignatureInterface instance, if external signing used + SignatureInterface signatureInterface = isExternalSigning() ? null : this; + + // register signature dictionary and sign interface + signatureOptions = new SignatureOptions(); + signatureOptions.setVisualSignature(createVisualSignatureTemplate(doc, 0, rect, signature)); + signatureOptions.setPage(0); + doc.addSignature(signature, signatureInterface, signatureOptions); + + if (isExternalSigning()) + { + ExternalSigningSupport externalSigning = doc.saveIncrementalForExternalSigning(fos); + // invoke external signature service + byte[] cmsSignature = sign(externalSigning.getContent()); + + // Explanation of late external signing (off by default): + // If you want to add the signature in a separate step, then set an empty byte array + // and call signature.getByteRange() and remember the offset signature.getByteRange()[1]+1. + // you can write the ascii hex signature at a later time even if you don't have this + // PDDocument object anymore, with classic java file random access methods. + // If you can't remember the offset value from ByteRange because your context has changed, + // then open the file with PDFBox, find the field with findExistingSignature() or + // PDDocument.getLastSignatureDictionary() and get the ByteRange from there. + // Close the file and then write the signature as explained earlier in this comment. + if (isLateExternalSigning()) + { + // this saves the file with a 0 signature + externalSigning.setSignature(new byte[0]); + + // remember the offset (add 1 because of "<") + int offset = signature.getByteRange()[1] + 1; + + // now write the signature at the correct offset without any PDFBox methods + RandomAccessFile raf = new RandomAccessFile(signedFile, "rw"); + raf.seek(offset); + raf.write(Hex.getBytes(cmsSignature)); + raf.close(); + } + else + { + // set signature bytes received from the service and save the file + externalSigning.setSignature(cmsSignature); + } + } + else + { + // write incremental (only for signing purpose) + doc.saveIncremental(fos); + } + doc.close(); + + // Do not close signatureOptions before saving, because some COSStream objects within + // are transferred to the signed document. + // Do not allow signatureOptions get out of scope before saving, because then the COSDocument + // in signature options might by closed by gc, which would close COSStream objects prematurely. + // See https://issues.apache.org/jira/browse/PDFBOX-3743 + IOUtils.closeQuietly(signatureOptions); + } + + private PDRectangle createSignatureRectangle(PDDocument doc, Rectangle2D humanRect) + { + float x = (float) humanRect.getX(); + float y = (float) humanRect.getY(); + float width = (float) humanRect.getWidth(); + float height = (float) humanRect.getHeight(); + PDPage page = doc.getPage(0); + PDRectangle pageRect = page.getCropBox(); + PDRectangle rect = new PDRectangle(); + // signing should be at the same position regardless of page rotation. + switch (page.getRotation()) + { + case 90: + rect.setLowerLeftY(x); + rect.setUpperRightY(x + width); + rect.setLowerLeftX(y); + rect.setUpperRightX(y + height); + break; + case 180: + rect.setUpperRightX(pageRect.getWidth() - x); + rect.setLowerLeftX(pageRect.getWidth() - x - width); + rect.setLowerLeftY(y); + rect.setUpperRightY(y + height); + break; + case 270: + rect.setLowerLeftY(pageRect.getHeight() - x - width); + rect.setUpperRightY(pageRect.getHeight() - x); + rect.setLowerLeftX(pageRect.getWidth() - y - height); + rect.setUpperRightX(pageRect.getWidth() - y); + break; + case 0: + default: + rect.setLowerLeftX(x); + rect.setUpperRightX(x + width); + rect.setLowerLeftY(pageRect.getHeight() - y - height); + rect.setUpperRightY(pageRect.getHeight() - y); + break; + } + return rect; + } + + // create a template PDF document with empty signature and return it as a stream. + private InputStream createVisualSignatureTemplate(PDDocument srcDoc, int pageNum, + PDRectangle rect, PDSignature signature) throws IOException + { + PDDocument doc = new PDDocument(); + + PDPage page = new PDPage(srcDoc.getPage(pageNum).getMediaBox()); + doc.addPage(page); + PDAcroForm acroForm = new PDAcroForm(doc); + doc.getDocumentCatalog().setAcroForm(acroForm); + PDSignatureField signatureField = new PDSignatureField(acroForm); + PDAnnotationWidget widget = signatureField.getWidgets().get(0); + List acroFormFields = acroForm.getFields(); + acroForm.setSignaturesExist(true); + acroForm.setAppendOnly(true); + acroForm.getCOSObject().setDirect(true); + acroFormFields.add(signatureField); + + widget.setRectangle(rect); + + // from PDVisualSigBuilder.createHolderForm() + PDStream stream = new PDStream(doc); + PDFormXObject form = new PDFormXObject(stream); + PDResources res = new PDResources(); + form.setResources(res); + form.setFormType(1); + PDRectangle bbox = new PDRectangle(rect.getWidth(), rect.getHeight()); + float height = bbox.getHeight(); + Matrix initialScale = null; + switch (srcDoc.getPage(pageNum).getRotation()) + { + case 90: + form.setMatrix(AffineTransform.getQuadrantRotateInstance(1)); + initialScale = Matrix.getScaleInstance(bbox.getWidth() / bbox.getHeight(), bbox.getHeight() / bbox.getWidth()); + height = bbox.getWidth(); + break; + case 180: + form.setMatrix(AffineTransform.getQuadrantRotateInstance(2)); + break; + case 270: + form.setMatrix(AffineTransform.getQuadrantRotateInstance(3)); + initialScale = Matrix.getScaleInstance(bbox.getWidth() / bbox.getHeight(), bbox.getHeight() / bbox.getWidth()); + height = bbox.getWidth(); + break; + case 0: + default: + break; + } + form.setBBox(bbox); + PDFont font = PDType1Font.HELVETICA_BOLD; + + // from PDVisualSigBuilder.createAppearanceDictionary() + PDAppearanceDictionary appearance = new PDAppearanceDictionary(); + appearance.getCOSObject().setDirect(true); + PDAppearanceStream appearanceStream = new PDAppearanceStream(form.getCOSObject()); + appearance.setNormalAppearance(appearanceStream); + widget.setAppearance(appearance); + + PDPageContentStream cs = new PDPageContentStream(doc, appearanceStream); + + // for 90° and 270° scale ratio of width / height + // not really sure about this + // why does scale have no effect when done in the form matrix??? + if (initialScale != null) + { + cs.transform(initialScale); + } + + // show background (just for debugging, to see the rect size + position) + cs.setNonStrokingColor(Color.yellow); + cs.addRect(-5000, -5000, 10000, 10000); + cs.fill(); + + if (imageFile != null) + { + // show background image + // save and restore graphics if the image is too large and needs to be scaled + cs.saveGraphicsState(); + cs.transform(Matrix.getScaleInstance(0.25f, 0.25f)); + PDImageXObject img = PDImageXObject.createFromFileByExtension(imageFile, doc); + cs.drawImage(img, 0, 0); + cs.restoreGraphicsState(); + } + + // show text + float fontSize = 10; + float leading = fontSize * 1.5f; + cs.beginText(); + cs.setFont(font, fontSize); + cs.setNonStrokingColor(Color.black); + cs.newLineAtOffset(fontSize, height - leading); + cs.setLeading(leading); + + X509Certificate cert = (X509Certificate) getCertificateChain()[0]; + + // https://stackoverflow.com/questions/2914521/ + X500Name x500Name = new X500Name(cert.getSubjectX500Principal().getName()); + RDN cn = x500Name.getRDNs(BCStyle.CN)[0]; + String name = IETFUtils.valueToString(cn.getFirst().getValue()); + + // See https://stackoverflow.com/questions/12575990 + // for better date formatting + String date = signature.getSignDate().getTime().toString(); + String reason = signature.getReason(); + + cs.showText("Signer: " + name); + cs.newLine(); + cs.showText(date); + cs.newLine(); + cs.showText("Reason: " + reason); + + cs.endText(); + + cs.close(); + + // no need to set annotations and /P entry + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + doc.save(baos); + doc.close(); + return new ByteArrayInputStream(baos.toByteArray()); + } + + // Find an existing signature (assumed to be empty). You will usually not need this. + private PDSignature findExistingSignature(PDAcroForm acroForm, String sigFieldName) + { + PDSignature signature = null; + PDSignatureField signatureField; + if (acroForm != null) + { + signatureField = (PDSignatureField) acroForm.getField(sigFieldName); + if (signatureField != null) + { + // retrieve signature dictionary + signature = signatureField.getSignature(); + if (signature == null) + { + signature = new PDSignature(); + // after solving PDFBOX-3524 + // signatureField.setValue(signature) + // until then: + signatureField.getCOSObject().setItem(COSName.V, signature); + } + else + { + throw new IllegalStateException("The signature field " + sigFieldName + " is already signed."); + } + } + } + return signature; + } + + /** + * Arguments are + * [0] key store + * [1] pin + * [2] document that will be signed + * [3] image of visible signature + * + * @param args + * @throws java.security.KeyStoreException + * @throws java.security.cert.CertificateException + * @throws java.io.IOException + * @throws java.security.NoSuchAlgorithmException + * @throws java.security.UnrecoverableKeyException + */ + public static void main(String[] args) throws KeyStoreException, CertificateException, + IOException, NoSuchAlgorithmException, UnrecoverableKeyException + { + if (args.length < 3) + { + usage(); + System.exit(1); + } + + String tsaUrl = null; + // External signing is needed if you are using an external signing service, e.g. to sign + // several files at once. + boolean externalSig = false; + for (int i = 0; i < args.length; i++) + { + if (args[i].equals("-tsa")) + { + i++; + if (i >= args.length) + { + usage(); + System.exit(1); + } + tsaUrl = args[i]; + } + if (args[i].equals("-e")) + { + externalSig = true; + } + } + + File ksFile = new File(args[0]); + KeyStore keystore = KeyStore.getInstance("PKCS12"); + char[] pin = args[1].toCharArray(); + keystore.load(new FileInputStream(ksFile), pin); + + File documentFile = new File(args[2]); + + CreateVisibleSignature2 signing = new CreateVisibleSignature2(keystore, pin.clone()); + + if (args.length >= 4 && !"-tsa".equals(args[3])) + { + signing.setImageFile(new File(args[3])); + } + + File signedDocumentFile; + String name = documentFile.getName(); + String substring = name.substring(0, name.lastIndexOf('.')); + signedDocumentFile = new File(documentFile.getParent(), substring + "_signed.pdf"); + + signing.setExternalSigning(externalSig); + + // Set the signature rectangle + // Although PDF coordinates start from the bottom, humans start from the top. + // So a human would want to position a signature (x,y) units from the + // top left of the displayed page, and the field has a horizontal width and a vertical height + // regardless of page rotation. + Rectangle2D humanRect = new Rectangle2D.Float(100, 200, 150, 50); + + signing.signPDF(documentFile, signedDocumentFile, humanRect, tsaUrl, "Signature1"); + } + + /** + * This will print the usage for this program. + */ + private static void usage() + { + System.err.println("Usage: java " + CreateVisibleSignature2.class.getName() + + " \n" + "" + + "options:\n" + + " -tsa sign timestamp using the given TSA server\n"+ + " -e sign using external signature creation scenario"); + + // generate pkcs12-keystore-file with + // keytool -storepass 123456 -storetype PKCS12 -keystore file.p12 -genkey -alias client -keyalg RSA + } + +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java index bb8efdb6b01..8a52b25744c 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java @@ -18,25 +18,71 @@ import java.io.ByteArrayInputStream; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; +import java.security.GeneralSecurityException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.Security; import java.security.cert.Certificate; import java.security.cert.CertificateException; +import java.security.cert.CertificateExpiredException; import java.security.cert.CertificateFactory; +import java.security.cert.CertificateNotYetValidException; +import java.security.cert.X509Certificate; +import java.text.SimpleDateFormat; +import java.util.Arrays; import java.util.Collection; - +import java.util.HashSet; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.examples.signature.cert.CertificateVerificationException; +import org.apache.pdfbox.examples.signature.cert.CertificateVerifier; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.encryption.SecurityProvider; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.apache.pdfbox.util.Charsets; +import org.apache.pdfbox.util.Hex; +import org.bouncycastle.asn1.cms.Attribute; +import org.bouncycastle.asn1.cms.CMSAttributes; +import org.bouncycastle.asn1.x509.Time; +import org.bouncycastle.cert.X509CertificateHolder; +import org.bouncycastle.cert.jcajce.JcaCertStore; +import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter; +import org.bouncycastle.cms.CMSException; +import org.bouncycastle.cms.CMSProcessable; +import org.bouncycastle.cms.CMSProcessableByteArray; +import org.bouncycastle.cms.CMSSignedData; +import org.bouncycastle.cms.SignerInformation; +import org.bouncycastle.cms.jcajce.JcaSimpleSignerInfoVerifierBuilder; +import org.bouncycastle.operator.OperatorCreationException; +import org.bouncycastle.tsp.TSPException; +import org.bouncycastle.tsp.TimeStampToken; +import org.bouncycastle.tsp.TimeStampTokenInfo; +import org.bouncycastle.util.CollectionStore; +import org.bouncycastle.util.Store; /** - * This will read a document from the filesystem, decrypt it and do something with the signature. + * This will get the signature(s) from the document, do some verifications and + * show the signature(s) and the certificates. This is a complex topic - the + * code here is an example and not a production-ready solution. * * @author Ben Litchfield */ public final class ShowSignature { + private final SimpleDateFormat sdf = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss"); + private ShowSignature() { } @@ -47,15 +93,22 @@ private ShowSignature() * @param args The command-line arguments. * * @throws IOException If there is an error reading the file. - * @throws CertificateException + * @throws org.bouncycastle.tsp.TSPException + * @throws java.security.GeneralSecurityException + * @throws org.apache.pdfbox.examples.signature.cert.CertificateVerificationException */ - public static void main( String[] args ) throws IOException, CertificateException + public static void main(String[] args) throws IOException, TSPException, GeneralSecurityException, + CertificateVerificationException { + // register BouncyCastle provider, needed for "exotic" algorithms + Security.addProvider(SecurityProvider.getProvider()); + ShowSignature show = new ShowSignature(); show.showSignature( args ); } - private void showSignature( String[] args ) throws IOException, CertificateException + private void showSignature(String[] args) throws IOException, TSPException, GeneralSecurityException, + CertificateVerificationException { if( args.length != 2 ) { @@ -64,79 +117,501 @@ private void showSignature( String[] args ) throws IOException, CertificateExcep else { String password = args[0]; - String infile = args[1]; + File infile = new File(args[1]); PDDocument document = null; try { - document = PDDocument.load( new File(infile), password ); - if( !document.isEncrypted() ) + // use old-style document loading to disable leniency + // see also https://www.pdf-insecurity.org/ + RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(infile); + // If your files are not too large, you can also download the PDF into a byte array + // with IOUtils.toByteArray() and pass a RandomAccessBuffer() object to the + // PDFParser constructor. + PDFParser parser = new PDFParser(raFile, password); + parser.setLenient(false); + parser.parse(); + document = parser.getPDDocument(); + for (PDSignature sig : document.getSignatureDictionaries()) { - System.err.println( "Warning: Document is not encrypted." ); - } + COSDictionary sigDict = sig.getCOSObject(); + byte[] contents = sig.getContents(); - COSDictionary trailer = document.getDocument().getTrailer(); - COSDictionary root = (COSDictionary)trailer.getDictionaryObject( COSName.ROOT ); - COSDictionary acroForm = (COSDictionary)root.getDictionaryObject( COSName.ACRO_FORM ); - COSArray fields = (COSArray)acroForm.getDictionaryObject( COSName.FIELDS ); - for( int i=0; i certs = factory.generateCertificates(certStream); + System.out.println("certs=" + certs); + + byte[] hash = MessageDigest.getInstance("SHA1").digest(buf); + verifyPKCS7(hash, contents, sig); + } + else if (subFilter.equals("adbe.x509.rsa_sha1")) + { + // example: PDFBOX-2693.pdf + COSString certString = (COSString) sigDict.getDictionaryObject(COSName.CERT); + //TODO this could also be an array. + if (certString == null) { - if( subFilter.getName().equals( "adbe.x509.rsa_sha1" ) ) - { - COSString certString = (COSString)cert.getDictionaryObject( - COSName.getPDFName( "Cert" ) ); - byte[] certData = certString.getBytes(); - CertificateFactory factory = CertificateFactory.getInstance( "X.509" ); - ByteArrayInputStream certStream = new ByteArrayInputStream( certData ); - Collection certs = factory.generateCertificates( certStream ); - System.out.println( "certs=" + certs ); - } - else if( subFilter.getName().equals( "adbe.pkcs7.sha1" ) ) + System.err.println("The /Cert certificate string is missing in the signature dictionary"); + return; + } + byte[] certData = certString.getBytes(); + CertificateFactory factory = CertificateFactory.getInstance("X.509"); + ByteArrayInputStream certStream = new ByteArrayInputStream(certData); + Collection certs = factory.generateCertificates(certStream); + System.out.println("certs=" + certs); + + X509Certificate cert = (X509Certificate) certs.iterator().next(); + + // to verify signature, see code at + // https://stackoverflow.com/questions/43383859/ + + try + { + if (sig.getSignDate() != null) { - COSString certString = (COSString)cert.getDictionaryObject( - COSName.CONTENTS ); - byte[] certData = certString.getBytes(); - CertificateFactory factory = CertificateFactory.getInstance( "X.509" ); - ByteArrayInputStream certStream = new ByteArrayInputStream( certData ); - Collection certs = factory.generateCertificates( certStream ); - System.out.println( "certs=" + certs ); + cert.checkValidity(sig.getSignDate().getTime()); + System.out.println("Certificate valid at signing time"); } else { - System.err.println( "Unknown certificate type:" + subFilter ); + System.err.println("Certificate cannot be verified without signing time"); } } + catch (CertificateExpiredException ex) + { + System.err.println("Certificate expired at signing time"); + } + catch (CertificateNotYetValidException ex) + { + System.err.println("Certificate not yet valid at signing time"); + } + if (CertificateVerifier.isSelfSigned(cert)) + { + System.err.println("Certificate is self-signed, LOL!"); + } else { - throw new IOException( "Missing subfilter for cert dictionary" ); + System.out.println("Certificate is not self-signed"); + + if (sig.getSignDate() != null) + { + @SuppressWarnings("unchecked") + Store store = new JcaCertStore(certs); + SigUtils.verifyCertificateChain(store, cert, sig.getSignDate().getTime()); + } } } + else if (subFilter.equals("ETSI.RFC3161")) + { + // e.g. PDFBOX-1848, file_timestamped.pdf + verifyETSIdotRFC3161(buf, contents); + } else { - System.out.println( "Signature found, but no certificate" ); + System.err.println("Unknown certificate type: " + subFilter); } } + else + { + throw new IOException("Missing subfilter for cert dictionary"); + } + + int[] byteRange = sig.getByteRange(); + if (byteRange.length != 4) + { + System.err.println("Signature byteRange must have 4 items"); + } + else + { + long fileLen = infile.length(); + long rangeMax = byteRange[2] + (long) byteRange[3]; + // multiply content length with 2 (because it is in hex in the PDF) and add 2 for < and > + int contentLen = contents.length * 2 + 2; + if (fileLen != rangeMax || byteRange[0] != 0 || byteRange[1] + contentLen != byteRange[2]) + { + // a false result doesn't necessarily mean that the PDF is a fake + // see this answer why: + // https://stackoverflow.com/a/48185913/535646 + System.out.println("Signature does not cover whole document"); + } + else + { + System.out.println("Signature covers whole document"); + } + checkContentValueWithFile(infile, byteRange, contents); + } } + analyseDSS(document); + } + catch (CMSException ex) + { + throw new IOException(ex); + } + catch (OperatorCreationException ex) + { + throw new IOException(ex); } finally { - if( document != null ) + if (document != null) { document.close(); } } + System.out.println("Analyzed: " + args[1]); + } + } + + private void checkContentValueWithFile(File file, int[] byteRange, byte[] contents) throws IOException + { + // https://stackoverflow.com/questions/55049270 + // comment by mkl: check whether gap contains a hex value equal + // byte-by-byte to the Content value, to prevent attacker from using a literal string + // to allow extra space + RandomAccessBufferedFileInputStream raf = new RandomAccessBufferedFileInputStream(file); + raf.seek(byteRange[1]); + int c = raf.read(); + if (c != '<') + { + System.err.println("'<' expected at offset " + byteRange[1] + ", but got " + (char) c); + } + byte[] contentFromFile = raf.readFully(byteRange[2] - byteRange[1] - 2); + byte[] contentAsHex = Hex.getString(contents).getBytes(Charsets.US_ASCII); + if (contentFromFile.length != contentAsHex.length) + { + System.err.println("Raw content length from file is " + + contentFromFile.length + + ", but internal content string in hex has length " + + contentAsHex.length); + } + // Compare the two, we can't do byte comparison because of upper/lower case + // also check that it is really hex + for (int i = 0; i < contentFromFile.length; ++i) + { + try + { + if (Integer.parseInt(String.valueOf((char) contentFromFile[i]), 16) != + Integer.parseInt(String.valueOf((char) contentAsHex[i]), 16)) + { + System.err.println("Possible manipulation at file offset " + + (byteRange[1] + i + 1) + " in signature content"); + break; + } + } + catch (NumberFormatException ex) + { + System.err.println("Incorrect hex value"); + System.err.println("Possible manipulation at file offset " + + (byteRange[1] + i + 1) + " in signature content"); + break; + } + } + c = raf.read(); + if (c != '>') + { + System.err.println("'>' expected at offset " + byteRange[2] + ", but got " + (char) c); + } + raf.close(); + } + + /** + * Verify ETSI.RFC3161 TimeStampToken + * + * @param buf the byte sequence that has been signed + * @param contents the /Contents field as a COSString + * @throws CMSException + * @throws NoSuchAlgorithmException + * @throws IOException + * @throws TSPException + * @throws OperatorCreationException + * @throws CertificateVerificationException + * @throws CertificateException + */ + private void verifyETSIdotRFC3161(byte[] buf, byte[] contents) + throws CertificateException, CMSException, IOException, OperatorCreationException, + TSPException, NoSuchAlgorithmException, CertificateVerificationException + { + TimeStampToken timeStampToken = new TimeStampToken(new CMSSignedData(contents)); + TimeStampTokenInfo timeStampInfo = timeStampToken.getTimeStampInfo(); + System.out.println("Time stamp gen time: " + timeStampInfo.getGenTime()); + if (timeStampInfo.getTsa() != null) + { + System.out.println("Time stamp tsa name: " + timeStampInfo.getTsa().getName()); + } + + CertificateFactory factory = CertificateFactory.getInstance("X.509"); + ByteArrayInputStream certStream = new ByteArrayInputStream(contents); + Collection certs = factory.generateCertificates(certStream); + System.out.println("certs=" + certs); + + String hashAlgorithm = timeStampInfo.getMessageImprintAlgOID().getId(); + // compare the hash of the signed content with the hash in the timestamp + if (Arrays.equals(MessageDigest.getInstance(hashAlgorithm).digest(buf), + timeStampInfo.getMessageImprintDigest())) + { + System.out.println("ETSI.RFC3161 timestamp signature verified"); + } + else + { + System.err.println("ETSI.RFC3161 timestamp signature verification failed"); + } + + X509Certificate certFromTimeStamp = (X509Certificate) certs.iterator().next(); + SigUtils.checkTimeStampCertificateUsage(certFromTimeStamp); + SigUtils.validateTimestampToken(timeStampToken); + SigUtils.verifyCertificateChain(timeStampToken.getCertificates(), + certFromTimeStamp, + timeStampInfo.getGenTime()); + } + + /** + * Verify a PKCS7 signature. + * + * @param byteArray the byte sequence that has been signed + * @param contents the /Contents field as a COSString + * @param sig the PDF signature (the /V dictionary) + * @throws CMSException + * @throws OperatorCreationException + * @throws IOException + * @throws GeneralSecurityException + * @throws TSPException + */ + private void verifyPKCS7(byte[] byteArray, byte[] contents, PDSignature sig) + throws CMSException, OperatorCreationException, + IOException, GeneralSecurityException, TSPException, CertificateVerificationException + { + // inspiration: + // http://stackoverflow.com/a/26702631/535646 + // http://stackoverflow.com/a/9261365/535646 + CMSProcessable signedContent = new CMSProcessableByteArray(byteArray); + CMSSignedData signedData = new CMSSignedData(signedContent, contents); + @SuppressWarnings("unchecked") + Store certificatesStore = signedData.getCertificates(); + if (certificatesStore.getMatches(null).isEmpty()) + { + throw new IOException("No certificates in signature"); + } + Collection signers = signedData.getSignerInfos().getSigners(); + if (signers.isEmpty()) + { + throw new IOException("No signers in signature"); + } + SignerInformation signerInformation = signers.iterator().next(); + @SuppressWarnings("unchecked") + Collection matches = + certificatesStore.getMatches(signerInformation.getSID()); + if (matches.isEmpty()) + { + throw new IOException("Signer '" + signerInformation.getSID().getIssuer() + + ", serial# " + signerInformation.getSID().getSerialNumber() + + " does not match any certificates"); + } + X509CertificateHolder certificateHolder = matches.iterator().next(); + X509Certificate certFromSignedData = new JcaX509CertificateConverter().getCertificate(certificateHolder); + System.out.println("certFromSignedData: " + certFromSignedData); + + SigUtils.checkCertificateUsage(certFromSignedData); + + // Embedded timestamp + TimeStampToken timeStampToken = SigUtils.extractTimeStampTokenFromSignerInformation(signerInformation); + if (timeStampToken != null) + { + // tested with QV_RCA1_RCA3_CPCPS_V4_11.pdf + // https://www.quovadisglobal.com/~/media/Files/Repository/QV_RCA1_RCA3_CPCPS_V4_11.ashx + // also 021496.pdf and 036351.pdf from digitalcorpora + SigUtils.validateTimestampToken(timeStampToken); + @SuppressWarnings("unchecked") // TimeStampToken.getSID() is untyped + Collection tstMatches = + timeStampToken.getCertificates().getMatches(timeStampToken.getSID()); + X509CertificateHolder tstCertHolder = tstMatches.iterator().next(); + X509Certificate certFromTimeStamp = new JcaX509CertificateConverter().getCertificate(tstCertHolder); + // merge both stores using a set to remove duplicates + HashSet certificateHolderSet = new HashSet(); + certificateHolderSet.addAll(certificatesStore.getMatches(null)); + certificateHolderSet.addAll(timeStampToken.getCertificates().getMatches(null)); + SigUtils.verifyCertificateChain(new CollectionStore(certificateHolderSet), + certFromTimeStamp, + timeStampToken.getTimeStampInfo().getGenTime()); + SigUtils.checkTimeStampCertificateUsage(certFromTimeStamp); + + // compare the hash of the signature with the hash in the timestamp + byte[] tsMessageImprintDigest = timeStampToken.getTimeStampInfo().getMessageImprintDigest(); + String hashAlgorithm = timeStampToken.getTimeStampInfo().getMessageImprintAlgOID().getId(); + byte[] sigMessageImprintDigest = MessageDigest.getInstance(hashAlgorithm).digest(signerInformation.getSignature()); + if (Arrays.equals(tsMessageImprintDigest, sigMessageImprintDigest)) + { + System.out.println("timestamp signature verified"); + } + else + { + System.err.println("timestamp signature verification failed"); + } + } + + try + { + if (sig.getSignDate() != null) + { + certFromSignedData.checkValidity(sig.getSignDate().getTime()); + System.out.println("Certificate valid at signing time"); + } + else + { + System.err.println("Certificate cannot be verified without signing time"); + } + } + catch (CertificateExpiredException ex) + { + System.err.println("Certificate expired at signing time"); + } + catch (CertificateNotYetValidException ex) + { + System.err.println("Certificate not yet valid at signing time"); + } + + // usually not available + if (signerInformation.getSignedAttributes() != null) + { + // From SignedMailValidator.getSignatureTime() + Attribute signingTime = signerInformation.getSignedAttributes().get(CMSAttributes.signingTime); + if (signingTime != null) + { + Time timeInstance = Time.getInstance(signingTime.getAttrValues().getObjectAt(0)); + try + { + certFromSignedData.checkValidity(timeInstance.getDate()); + System.out.println("Certificate valid at signing time: " + timeInstance.getDate()); + } + catch (CertificateExpiredException ex) + { + System.err.println("Certificate expired at signing time"); + } + catch (CertificateNotYetValidException ex) + { + System.err.println("Certificate not yet valid at signing time"); + } + } + } + + if (signerInformation.verify(new JcaSimpleSignerInfoVerifierBuilder(). + setProvider(SecurityProvider.getProvider()).build(certFromSignedData))) + { + System.out.println("Signature verified"); + } + else + { + System.out.println("Signature verification failed"); + } + + if (CertificateVerifier.isSelfSigned(certFromSignedData)) + { + System.err.println("Certificate is self-signed, LOL!"); + } + else + { + System.out.println("Certificate is not self-signed"); + + if (sig.getSignDate() != null) + { + SigUtils.verifyCertificateChain(certificatesStore, certFromSignedData, sig.getSignDate().getTime()); + } + else + { + System.err.println("Certificate cannot be verified without signing time"); + } + } + } + + /** + * Analyzes the DSS-Dictionary (Document Security Store) of the document. Which is used for + * signature validation. The DSS is defined in PAdES Part 4 - Long Term Validation. + * + * @param document PDDocument, to get the DSS from + */ + private void analyseDSS(PDDocument document) throws IOException + { + PDDocumentCatalog catalog = document.getDocumentCatalog(); + COSBase dssElement = catalog.getCOSObject().getDictionaryObject("DSS"); + + if (dssElement instanceof COSDictionary) + { + COSDictionary dss = (COSDictionary) dssElement; + System.out.println("DSS Dictionary: " + dss); + COSBase certsElement = dss.getDictionaryObject("Certs"); + if (certsElement instanceof COSArray) + { + printStreamsFromArray((COSArray) certsElement, "Cert"); + } + COSBase ocspsElement = dss.getDictionaryObject("OCSPs"); + if (ocspsElement instanceof COSArray) + { + printStreamsFromArray((COSArray) ocspsElement, "Ocsp"); + } + COSBase crlElement = dss.getDictionaryObject("CRLs"); + if (crlElement instanceof COSArray) + { + printStreamsFromArray((COSArray) crlElement, "CRL"); + } + // TODO: go through VRIs (which indirectly point to the DSS-Data) + } + } + + /** + * Go through the elements of a COSArray containing each an COSStream to print in Hex. + * + * @param elements COSArray of elements containing a COS Stream + * @param description to append on Print + * @throws IOException + */ + private void printStreamsFromArray(COSArray elements, String description) throws IOException + { + for (COSBase baseElem : elements) + { + COSObject streamObj = (COSObject) baseElem; + if (streamObj.getObject() instanceof COSStream) + { + COSStream cosStream = (COSStream) streamObj.getObject(); + + InputStream input = cosStream.createInputStream(); + byte[] streamBytes = IOUtils.toByteArray(input); + input.close(); + + System.out.println(description + " (" + elements.indexOf(streamObj) + "): " + + Hex.getString(streamBytes)); + } } } @@ -146,6 +621,8 @@ else if( subFilter.getName().equals( "adbe.pkcs7.sha1" ) ) private static void usage() { System.err.println( "usage: java " + ShowSignature.class.getName() + - " " ); + " " ); + // The password is for encrypted files and has nothing to do with the signature. + // (A PDF can be both encrypted and signed) } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/SigUtils.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/SigUtils.java new file mode 100644 index 00000000000..d3db76a4dc9 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/SigUtils.java @@ -0,0 +1,347 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.signature; + +import java.io.IOException; +import java.security.cert.CertificateException; +import java.security.cert.CertificateParsingException; +import java.security.cert.X509Certificate; +import java.util.Collection; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.examples.signature.cert.CertificateVerificationException; +import org.apache.pdfbox.examples.signature.cert.CertificateVerifier; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.encryption.SecurityProvider; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.bouncycastle.asn1.ASN1Object; +import org.bouncycastle.asn1.cms.Attribute; +import org.bouncycastle.asn1.cms.AttributeTable; +import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers; +import org.bouncycastle.asn1.x509.KeyPurposeId; +import org.bouncycastle.cert.X509CertificateHolder; +import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter; +import org.bouncycastle.cms.CMSException; +import org.bouncycastle.cms.CMSSignedData; +import org.bouncycastle.cms.SignerInformation; +import org.bouncycastle.cms.SignerInformationVerifier; +import org.bouncycastle.cms.jcajce.JcaSimpleSignerInfoVerifierBuilder; +import org.bouncycastle.operator.OperatorCreationException; +import org.bouncycastle.tsp.TSPException; +import org.bouncycastle.tsp.TimeStampToken; +import org.bouncycastle.util.Store; + +/** + * Utility class for the signature / timestamp examples. + * + * @author Tilman Hausherr + */ +public class SigUtils +{ + private static final Log LOG = LogFactory.getLog(SigUtils.class); + + private SigUtils() + { + } + + /** + * Get the access permissions granted for this document in the DocMDP transform parameters + * dictionary. Details are described in the table "Entries in the DocMDP transform parameters + * dictionary" in the PDF specification. + * + * @param doc document. + * @return the permission value. 0 means no DocMDP transform parameters dictionary exists. Other + * return values are 1, 2 or 3. 2 is also returned if the DocMDP transform parameters dictionary + * is found but did not contain a /P entry, or if the value is outside the valid range. + */ + public static int getMDPPermission(PDDocument doc) + { + COSBase base = doc.getDocumentCatalog().getCOSObject().getDictionaryObject(COSName.PERMS); + if (base instanceof COSDictionary) + { + COSDictionary permsDict = (COSDictionary) base; + base = permsDict.getDictionaryObject(COSName.DOCMDP); + if (base instanceof COSDictionary) + { + COSDictionary signatureDict = (COSDictionary) base; + base = signatureDict.getDictionaryObject(COSName.REFERENCE); + if (base instanceof COSArray) + { + COSArray refArray = (COSArray) base; + for (int i = 0; i < refArray.size(); ++i) + { + base = refArray.getObject(i); + if (base instanceof COSDictionary) + { + COSDictionary sigRefDict = (COSDictionary) base; + if (COSName.DOCMDP.equals(sigRefDict.getDictionaryObject(COSName.TRANSFORM_METHOD))) + { + base = sigRefDict.getDictionaryObject(COSName.TRANSFORM_PARAMS); + if (base instanceof COSDictionary) + { + COSDictionary transformDict = (COSDictionary) base; + int accessPermissions = transformDict.getInt(COSName.P, 2); + if (accessPermissions < 1 || accessPermissions > 3) + { + accessPermissions = 2; + } + return accessPermissions; + } + } + } + } + } + } + } + return 0; + } + + /** + * Set the "modification detection and prevention" permissions granted for this document in the + * DocMDP transform parameters dictionary. Details are described in the table "Entries in the + * DocMDP transform parameters dictionary" in the PDF specification. + * + * @param doc The document. + * @param signature The signature object. + * @param accessPermissions The permission value (1, 2 or 3). + * + * @throws IOException if a signature exists. + */ + public static void setMDPPermission(PDDocument doc, PDSignature signature, int accessPermissions) + throws IOException + { + for (PDSignature sig : doc.getSignatureDictionaries()) + { + // "Approval signatures shall follow the certification signature if one is present" + // thus we don't care about timestamp signatures + if (COSName.DOC_TIME_STAMP.equals(sig.getCOSObject().getItem(COSName.TYPE))) + { + continue; + } + if (sig.getCOSObject().containsKey(COSName.CONTENTS)) + { + throw new IOException("DocMDP transform method not allowed if an approval signature exists"); + } + } + + COSDictionary sigDict = signature.getCOSObject(); + + // DocMDP specific stuff + COSDictionary transformParameters = new COSDictionary(); + transformParameters.setItem(COSName.TYPE, COSName.TRANSFORM_PARAMS); + transformParameters.setInt(COSName.P, accessPermissions); + transformParameters.setName(COSName.V, "1.2"); + transformParameters.setNeedToBeUpdated(true); + + COSDictionary referenceDict = new COSDictionary(); + referenceDict.setItem(COSName.TYPE, COSName.SIG_REF); + referenceDict.setItem(COSName.TRANSFORM_METHOD, COSName.DOCMDP); + referenceDict.setItem(COSName.DIGEST_METHOD, COSName.getPDFName("SHA1")); + referenceDict.setItem(COSName.TRANSFORM_PARAMS, transformParameters); + referenceDict.setNeedToBeUpdated(true); + + COSArray referenceArray = new COSArray(); + referenceArray.add(referenceDict); + sigDict.setItem(COSName.REFERENCE, referenceArray); + referenceArray.setNeedToBeUpdated(true); + + // Catalog + COSDictionary catalogDict = doc.getDocumentCatalog().getCOSObject(); + COSDictionary permsDict = new COSDictionary(); + catalogDict.setItem(COSName.PERMS, permsDict); + permsDict.setItem(COSName.DOCMDP, signature); + catalogDict.setNeedToBeUpdated(true); + permsDict.setNeedToBeUpdated(true); + } + + /** + * Log if the certificate is not valid for signature usage. Doing this + * anyway results in Adobe Reader failing to validate the PDF. + * + * @param x509Certificate + * @throws java.security.cert.CertificateParsingException + */ + public static void checkCertificateUsage(X509Certificate x509Certificate) + throws CertificateParsingException + { + // Check whether signer certificate is "valid for usage" + // https://stackoverflow.com/a/52765021/535646 + // https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/changes.html#id1 + boolean[] keyUsage = x509Certificate.getKeyUsage(); + if (keyUsage != null && !keyUsage[0] && !keyUsage[1]) + { + // (unclear what "signTransaction" is) + // https://tools.ietf.org/html/rfc5280#section-4.2.1.3 + LOG.error("Certificate key usage does not include " + + "digitalSignature nor nonRepudiation"); + } + List extendedKeyUsage = x509Certificate.getExtendedKeyUsage(); + if (extendedKeyUsage != null && + !extendedKeyUsage.contains(KeyPurposeId.id_kp_emailProtection.toString()) && + !extendedKeyUsage.contains(KeyPurposeId.id_kp_codeSigning.toString()) && + !extendedKeyUsage.contains(KeyPurposeId.anyExtendedKeyUsage.toString()) && + !extendedKeyUsage.contains("1.2.840.113583.1.1.5") && + // not mentioned in Adobe document, but tolerated in practice + !extendedKeyUsage.contains("1.3.6.1.4.1.311.10.3.12")) + { + LOG.error("Certificate extended key usage does not include " + + "emailProtection, nor codeSigning, nor anyExtendedKeyUsage, " + + "nor 'Adobe Authentic Documents Trust'"); + } + } + + /** + * Log if the certificate is not valid for timestamping. + * + * @param x509Certificate + * @throws java.security.cert.CertificateParsingException + */ + public static void checkTimeStampCertificateUsage(X509Certificate x509Certificate) + throws CertificateParsingException + { + List extendedKeyUsage = x509Certificate.getExtendedKeyUsage(); + // https://tools.ietf.org/html/rfc5280#section-4.2.1.12 + if (extendedKeyUsage != null && + !extendedKeyUsage.contains(KeyPurposeId.id_kp_timeStamping.toString())) + { + LOG.error("Certificate extended key usage does not include timeStamping"); + } + } + + /** + * Log if the certificate is not valid for responding. + * + * @param x509Certificate + * @throws java.security.cert.CertificateParsingException + */ + public static void checkResponderCertificateUsage(X509Certificate x509Certificate) + throws CertificateParsingException + { + List extendedKeyUsage = x509Certificate.getExtendedKeyUsage(); + // https://tools.ietf.org/html/rfc5280#section-4.2.1.12 + if (extendedKeyUsage != null && + !extendedKeyUsage.contains(KeyPurposeId.id_kp_OCSPSigning.toString())) + { + LOG.error("Certificate extended key usage does not include OCSP responding"); + } + } + + /** + * Gets the last relevant signature in the document, i.e. the one with the highest offset. + * + * @param document to get its last signature + * @return last signature or null when none found + * @throws IOException + */ + public static PDSignature getLastRelevantSignature(PDDocument document) throws IOException + { + SortedMap sortedMap = new TreeMap(); + for (PDSignature signature : document.getSignatureDictionaries()) + { + int sigOffset = signature.getByteRange()[1]; + sortedMap.put(sigOffset, signature); + } + if (sortedMap.size() > 0) + { + PDSignature lastSignature = sortedMap.get(sortedMap.lastKey()); + COSBase type = lastSignature.getCOSObject().getItem(COSName.TYPE); + if (type == null || COSName.SIG.equals(type) || COSName.DOC_TIME_STAMP.equals(type)) + { + return lastSignature; + } + } + return null; + } + + static public TimeStampToken extractTimeStampTokenFromSignerInformation(SignerInformation signerInformation) + throws CMSException, IOException, TSPException + { + if (signerInformation.getUnsignedAttributes() == null) + { + return null; + } + AttributeTable unsignedAttributes = signerInformation.getUnsignedAttributes(); + // https://stackoverflow.com/questions/1647759/how-to-validate-if-a-signed-jar-contains-a-timestamp + Attribute attribute = unsignedAttributes.get( + PKCSObjectIdentifiers.id_aa_signatureTimeStampToken); + if (attribute == null) + { + return null; + } + ASN1Object obj = (ASN1Object) attribute.getAttrValues().getObjectAt(0); + CMSSignedData signedTSTData = new CMSSignedData(obj.getEncoded()); + return new TimeStampToken(signedTSTData); + } + + public static void validateTimestampToken(TimeStampToken timeStampToken) + throws IOException, CertificateException, TSPException, OperatorCreationException + { + // https://stackoverflow.com/questions/42114742/ + @SuppressWarnings("unchecked") // TimeStampToken.getSID() is untyped + Collection tstMatches = + timeStampToken.getCertificates().getMatches(timeStampToken.getSID()); + X509CertificateHolder certificateHolder = tstMatches.iterator().next(); + SignerInformationVerifier siv = + new JcaSimpleSignerInfoVerifierBuilder().setProvider(SecurityProvider.getProvider()).build(certificateHolder); + timeStampToken.validate(siv); + } + + + /** + * Verify the certificate chain up to the root, including OCSP or CRL. However this does not + * test whether the root certificate is in a trusted list.

+ * Please post bad PDF files that succeed and good PDF files that fail in + * PDFBOX-3017. + * + * @param certificatesStore + * @param certFromSignedData + * @param signDate + * @throws CertificateVerificationException + * @throws CertificateException + */ + public static void verifyCertificateChain(Store certificatesStore, + X509Certificate certFromSignedData, Date signDate) + throws CertificateVerificationException, CertificateException + { + Collection certificateHolders = certificatesStore.getMatches(null); + Set additionalCerts = new HashSet(); + JcaX509CertificateConverter certificateConverter = new JcaX509CertificateConverter(); + for (X509CertificateHolder certHolder : certificateHolders) + { + X509Certificate certificate = certificateConverter.getCertificate(certHolder); + if (!certificate.equals(certFromSignedData)) + { + additionalCerts.add(certificate); + } + } + CertificateVerifier.verifyCertificate(certFromSignedData, additionalCerts, true, signDate); + //TODO check whether the root certificate is in our trusted list. + // For the EU, get a list here: + // https://ec.europa.eu/digital-single-market/en/eu-trusted-lists-trust-service-providers + // ( getRootCertificates() is not helpful because these are SSL certificates) + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/TSAClient.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/TSAClient.java index 74b5aea87ec..59e2de9c8b2 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/signature/TSAClient.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/TSAClient.java @@ -24,6 +24,7 @@ import java.net.URLConnection; import java.security.MessageDigest; import java.security.SecureRandom; +import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -52,6 +53,9 @@ public class TSAClient private final String password; private final MessageDigest digest; + // SecureRandom.getInstanceStrong() would be better, but sometimes blocks on Linux + private static final Random RANDOM = new SecureRandom(); + /** * * @param url the URL of the TSA service @@ -69,19 +73,18 @@ public TSAClient(URL url, String username, String password, MessageDigest digest /** * - * @param messageImprint imprint of message contents - * @return the encoded time stamp token + * @param content + * @return the time stamp token * @throws IOException if there was an error with the connection or data from the TSA server, * or if the time stamp response could not be validated */ - public byte[] getTimeStampToken(byte[] messageImprint) throws IOException + public TimeStampToken getTimeStampToken(byte[] content) throws IOException { digest.reset(); - byte[] hash = digest.digest(messageImprint); + byte[] hash = digest.digest(content); // 32-bit cryptographic nonce - SecureRandom random = new SecureRandom(); - int nonce = random.nextInt(); + int nonce = RANDOM.nextInt(); // generate TSA request TimeStampRequestGenerator tsaGenerator = new TimeStampRequestGenerator(); @@ -102,14 +105,17 @@ public byte[] getTimeStampToken(byte[] messageImprint) throws IOException { throw new IOException(e); } - - TimeStampToken token = response.getTimeStampToken(); - if (token == null) + + TimeStampToken timeStampToken = response.getTimeStampToken(); + if (timeStampToken == null) { - throw new IOException("Response does not have a time stamp token"); + // https://www.ietf.org/rfc/rfc3161.html#section-2.4.2 + throw new IOException("Response from " + url + + " does not have a time stamp token, status: " + response.getStatus() + + " (" + response.getStatusString() + ")"); } - return token.getEncoded(); + return timeStampToken; } // gets response data for the given encoded TimeStampRequest data @@ -128,7 +134,9 @@ private byte[] getTSAResponse(byte[] request) throws IOException if (username != null && password != null && !username.isEmpty() && !password.isEmpty()) { - connection.setRequestProperty(username, password); + // See https://stackoverflow.com/questions/12732422/ (needs jdk8) + // or see implementation in 3.0 + throw new UnsupportedOperationException("authentication not implemented yet"); } // read response @@ -138,6 +146,11 @@ private byte[] getTSAResponse(byte[] request) throws IOException output = connection.getOutputStream(); output.write(request); } + catch (IOException ex) + { + LOG.error("Exception when writing to " + this.url, ex); + throw ex; + } finally { IOUtils.closeQuietly(output); @@ -152,6 +165,11 @@ private byte[] getTSAResponse(byte[] request) throws IOException input = connection.getInputStream(); response = IOUtils.toByteArray(input); } + catch (IOException ex) + { + LOG.error("Exception when reading from " + this.url, ex); + throw ex; + } finally { IOUtils.closeQuietly(input); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/ValidationTimeStamp.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/ValidationTimeStamp.java new file mode 100644 index 00000000000..b868ed1d78f --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/ValidationTimeStamp.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.signature; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.io.IOUtils; +import org.bouncycastle.asn1.ASN1Encodable; +import org.bouncycastle.asn1.ASN1EncodableVector; +import org.bouncycastle.asn1.ASN1ObjectIdentifier; +import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.DERSet; +import org.bouncycastle.asn1.cms.Attribute; +import org.bouncycastle.asn1.cms.AttributeTable; +import org.bouncycastle.asn1.cms.Attributes; +import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers; +import org.bouncycastle.cms.CMSSignedData; +import org.bouncycastle.cms.SignerInformation; +import org.bouncycastle.cms.SignerInformationStore; +import org.bouncycastle.tsp.TimeStampToken; + +/** + * This class wraps the TSAClient and the work that has to be done with it. Like Adding Signed + * TimeStamps to a signature, or creating a CMS timestamp attribute (with a signed timestamp) + * + * @author Others + * @author Alexis Suter + */ +public class ValidationTimeStamp +{ + private TSAClient tsaClient; + + /** + * @param tsaUrl The url where TS-Request will be done. + * @throws NoSuchAlgorithmException + * @throws MalformedURLException + */ + public ValidationTimeStamp(String tsaUrl) throws NoSuchAlgorithmException, MalformedURLException + { + if (tsaUrl != null) + { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + this.tsaClient = new TSAClient(new URL(tsaUrl), null, null, digest); + } + } + + /** + * Creates a signed timestamp token by the given input stream. + * + * @param content InputStream of the content to sign + * @return the byte[] of the timestamp token + * @throws IOException + */ + public byte[] getTimeStampToken(InputStream content) throws IOException + { + TimeStampToken timeStampToken = tsaClient.getTimeStampToken(IOUtils.toByteArray(content)); + return timeStampToken.getEncoded(); + } + + /** + * Extend cms signed data with TimeStamp first or to all signers + * + * @param signedData Generated CMS signed data + * @return CMSSignedData Extended CMS signed data + * @throws IOException + */ + public CMSSignedData addSignedTimeStamp(CMSSignedData signedData) + throws IOException + { + SignerInformationStore signerStore = signedData.getSignerInfos(); + List newSigners = new ArrayList(); + + for (SignerInformation signer : signerStore.getSigners()) + { + // This adds a timestamp to every signer (into his unsigned attributes) in the signature. + newSigners.add(signTimeStamp(signer)); + } + + // Because new SignerInformation is created, new SignerInfoStore has to be created + // and also be replaced in signedData. Which creates a new signedData object. + return CMSSignedData.replaceSigners(signedData, new SignerInformationStore(newSigners)); + } + + /** + * Extend CMS Signer Information with the TimeStampToken into the unsigned Attributes. + * + * @param signer information about signer + * @return information about SignerInformation + * @throws IOException + */ + private SignerInformation signTimeStamp(SignerInformation signer) + throws IOException + { + AttributeTable unsignedAttributes = signer.getUnsignedAttributes(); + + ASN1EncodableVector vector = new ASN1EncodableVector(); + if (unsignedAttributes != null) + { + vector = unsignedAttributes.toASN1EncodableVector(); + } + + TimeStampToken timeStampToken = tsaClient.getTimeStampToken(signer.getSignature()); + byte[] token = timeStampToken.getEncoded(); + ASN1ObjectIdentifier oid = PKCSObjectIdentifiers.id_aa_signatureTimeStampToken; + ASN1Encodable signatureTimeStamp = new Attribute(oid, + new DERSet(ASN1Primitive.fromByteArray(token))); + + vector.add(signatureTimeStamp); + Attributes signedAttributes = new Attributes(vector); + + // There is no other way changing the unsigned attributes of the signer information. + // result is never null, new SignerInformation always returned, + // see source code of replaceUnsignedAttributes + return SignerInformation.replaceUnsignedAttributes(signer, new AttributeTable(signedAttributes)); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CRLVerifier.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CRLVerifier.java new file mode 100644 index 00000000000..5009c7b427b --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CRLVerifier.java @@ -0,0 +1,358 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pdfbox.examples.signature.cert; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.security.GeneralSecurityException; +import java.security.cert.CRLException; +import java.security.cert.CertificateException; +import java.security.cert.CertificateFactory; +import java.security.cert.X509CRL; +import java.security.cert.X509CRLEntry; +import java.security.cert.X509Certificate; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.Hashtable; +import java.util.List; +import java.util.Set; + +import javax.naming.Context; +import javax.naming.NamingException; +import javax.naming.directory.Attribute; +import javax.naming.directory.Attributes; +import javax.naming.directory.DirContext; +import javax.naming.directory.InitialDirContext; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.pdmodel.encryption.SecurityProvider; + +import org.bouncycastle.asn1.ASN1InputStream; +import org.bouncycastle.asn1.ASN1OctetString; +import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.DERIA5String; +import org.bouncycastle.asn1.x509.CRLDistPoint; +import org.bouncycastle.asn1.x509.DistributionPoint; +import org.bouncycastle.asn1.x509.DistributionPointName; +import org.bouncycastle.asn1.x509.Extension; +import org.bouncycastle.asn1.x509.GeneralName; +import org.bouncycastle.asn1.x509.GeneralNames; + +/** + * Copied from Apache CXF 2.4.9, initial version: + * https://svn.apache.org/repos/asf/cxf/tags/cxf-2.4.9/distribution/src/main/release/samples/sts_issue_operation/src/main/java/demo/sts/provider/cert/ + * + */ +public final class CRLVerifier +{ + private static final Log LOG = LogFactory.getLog(CRLVerifier.class); + + private CRLVerifier() + { + } + + /** + * Extracts the CRL distribution points from the certificate (if available) + * and checks the certificate revocation status against the CRLs coming from + * the distribution points. Supports HTTP, HTTPS, FTP and LDAP based URLs. + * + * @param cert the certificate to be checked for revocation + * @param signDate the date when the signing took place + * @param additionalCerts set of trusted root CA certificates that will be + * used as "trust anchors" and intermediate CA certificates that will be + * used as part of the certification chain. + * @throws CertificateVerificationException if the certificate could not be verified + * @throws RevokedCertificateException if the certificate is revoked + */ + public static void verifyCertificateCRLs(X509Certificate cert, Date signDate, + Set additionalCerts) + throws CertificateVerificationException, RevokedCertificateException + { + try + { + Date now = Calendar.getInstance().getTime(); + Exception firstException = null; + List crlDistributionPointsURLs = getCrlDistributionPoints(cert); + for (String crlDistributionPointsURL : crlDistributionPointsURLs) + { + LOG.info("Checking distribution point URL: " + crlDistributionPointsURL); + X509CRL crl; + try + { + crl = downloadCRL(crlDistributionPointsURL); + } + catch (Exception ex) + { + // e.g. LDAP behind corporate proxy + LOG.warn("Caught " + ex.getClass().getSimpleName() + " downloading CRL, will try next distribution point if available"); + if (firstException == null) + { + firstException = ex; + } + continue; + } + + Set mergedCertSet = CertificateVerifier.downloadExtraCertificates(crl); + mergedCertSet.addAll(additionalCerts); + + // Verify CRL, see wikipedia: + // "To validate a specific CRL prior to relying on it, + // the certificate of its corresponding CA is needed" + X509Certificate crlIssuerCert = null; + for (X509Certificate possibleCert : mergedCertSet) + { + try + { + cert.verify(possibleCert.getPublicKey(), SecurityProvider.getProvider().getName()); + crlIssuerCert = possibleCert; + break; + } + catch (GeneralSecurityException ex) + { + // not the issuer + } + } + if (crlIssuerCert == null) + { + throw new CertificateVerificationException( + "Certificate for " + crl.getIssuerX500Principal() + + "not found in certificate chain, so the CRL at " + + crlDistributionPointsURL + " could not be verified"); + } + crl.verify(crlIssuerCert.getPublicKey(), SecurityProvider.getProvider().getName()); + //TODO these should be exceptions, but for that we need a test case where + // a PDF has a broken OCSP and a working CRL + if (crl.getThisUpdate().after(now)) + { + LOG.error("CRL not yet valid, thisUpdate is " + crl.getThisUpdate()); + } + if (crl.getNextUpdate().before(now)) + { + LOG.error("CRL no longer valid, nextUpdate is " + crl.getNextUpdate()); + } + + if (!crl.getIssuerX500Principal().equals(cert.getIssuerX500Principal())) + { + LOG.info("CRL issuer certificate is not identical to cert issuer, check needed"); + CertificateVerifier.verifyCertificate(crlIssuerCert, mergedCertSet, true, now); + LOG.info("CRL issuer certificate checked successfully"); + } + else + { + LOG.info("CRL issuer certificate is identical to cert issuer, no extra check needed"); + } + + checkRevocation(crl, cert, signDate, crlDistributionPointsURL); + + // https://tools.ietf.org/html/rfc5280#section-4.2.1.13 + // If the DistributionPointName contains multiple values, + // each name describes a different mechanism to obtain the same + // CRL. For example, the same CRL could be available for + // retrieval through both LDAP and HTTP. + // + // => thus no need to check several protocols + return; + } + if (firstException != null) + { + throw firstException; + } + } + catch (CertificateVerificationException ex) + { + throw ex; + } + catch (RevokedCertificateException ex) + { + throw ex; + } + catch (Exception ex) + { + throw new CertificateVerificationException( + "Cannot verify CRL for certificate: " + + cert.getSubjectX500Principal(), ex); + + } + } + + /** + * Check whether the certificate was revoked at signing time. + * + * @param crl certificate revocation list + * @param cert certificate to be checked + * @param signDate date the certificate was used for signing + * @param crlDistributionPointsURL URL for log message or exception text + * @throws RevokedCertificateException if the certificate was revoked at signing time + */ + public static void checkRevocation( + X509CRL crl, X509Certificate cert, Date signDate, String crlDistributionPointsURL) + throws RevokedCertificateException + { + X509CRLEntry revokedCRLEntry = crl.getRevokedCertificate(cert); + if (revokedCRLEntry != null && + revokedCRLEntry.getRevocationDate().compareTo(signDate) <= 0) + { + throw new RevokedCertificateException( + "The certificate was revoked by CRL " + + crlDistributionPointsURL + " on " + revokedCRLEntry.getRevocationDate(), + revokedCRLEntry.getRevocationDate()); + } + else if (revokedCRLEntry != null) + { + LOG.info("The certificate was revoked after signing by CRL " + + crlDistributionPointsURL + " on " + revokedCRLEntry.getRevocationDate()); + } + else + { + LOG.info("The certificate was not revoked by CRL " + crlDistributionPointsURL); + } + } + + /** + * Downloads CRL from given URL. Supports http, https, ftp and ldap based URLs. + */ + private static X509CRL downloadCRL(String crlURL) throws IOException, + CertificateException, CRLException, + CertificateVerificationException, NamingException + { + if (crlURL.startsWith("http://") || crlURL.startsWith("https://") + || crlURL.startsWith("ftp://")) + { + return downloadCRLFromWeb(crlURL); + } + else if (crlURL.startsWith("ldap://")) + { + return downloadCRLFromLDAP(crlURL); + } + else + { + throw new CertificateVerificationException( + "Can not download CRL from certificate " + + "distribution point: " + crlURL); + } + } + + /** + * Downloads a CRL from given LDAP url, e.g. + * ldap://ldap.infonotary.com/dc=identity-ca,dc=infonotary,dc=com + */ + private static X509CRL downloadCRLFromLDAP(String ldapURL) throws CertificateException, + NamingException, CRLException, + CertificateVerificationException + { + @SuppressWarnings({"squid:S1149"}) + Hashtable env = new Hashtable(); + env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory"); + env.put(Context.PROVIDER_URL, ldapURL); + + // https://docs.oracle.com/javase/jndi/tutorial/ldap/connect/create.html + // don't wait forever behind corporate proxy + env.put("com.sun.jndi.ldap.connect.timeout", "1000"); + + DirContext ctx = new InitialDirContext(env); + Attributes avals = ctx.getAttributes(""); + Attribute aval = avals.get("certificateRevocationList;binary"); + byte[] val = (byte[]) aval.get(); + if (val == null || val.length == 0) + { + throw new CertificateVerificationException("Can not download CRL from: " + ldapURL); + } + else + { + InputStream inStream = new ByteArrayInputStream(val); + CertificateFactory cf = CertificateFactory.getInstance("X.509"); + return (X509CRL) cf.generateCRL(inStream); + } + } + + /** + * Downloads a CRL from given HTTP/HTTPS/FTP URL, e.g. + * http://crl.infonotary.com/crl/identity-ca.crl + */ + public static X509CRL downloadCRLFromWeb(String crlURL) + throws IOException, CertificateException, CRLException + { + InputStream crlStream = new URL(crlURL).openStream(); + try + { + return (X509CRL) CertificateFactory.getInstance("X.509").generateCRL(crlStream); + } + finally + { + crlStream.close(); + } + } + + /** + * Extracts all CRL distribution point URLs from the "CRL Distribution + * Point" extension in a X.509 certificate. If CRL distribution point + * extension is unavailable, returns an empty list. + * @param cert + * @return List of CRL distribution point URLs. + * @throws java.io.IOException + */ + public static List getCrlDistributionPoints(X509Certificate cert) + throws IOException + { + byte[] crldpExt = cert.getExtensionValue(Extension.cRLDistributionPoints.getId()); + if (crldpExt == null) + { + return new ArrayList(); + } + ASN1InputStream oAsnInStream = new ASN1InputStream(crldpExt); + ASN1Primitive derObjCrlDP = oAsnInStream.readObject(); + oAsnInStream.close(); + if (!(derObjCrlDP instanceof ASN1OctetString)) + { + LOG.warn("CRL distribution points for certificate subject " + + cert.getSubjectX500Principal().getName() + + " should be an octet string, but is " + derObjCrlDP); + return new ArrayList(); + } + ASN1OctetString dosCrlDP = (ASN1OctetString) derObjCrlDP; + byte[] crldpExtOctets = dosCrlDP.getOctets(); + ASN1InputStream oAsnInStream2 = new ASN1InputStream(crldpExtOctets); + ASN1Primitive derObj2 = oAsnInStream2.readObject(); + oAsnInStream2.close(); + CRLDistPoint distPoint = CRLDistPoint.getInstance(derObj2); + List crlUrls = new ArrayList(); + for (DistributionPoint dp : distPoint.getDistributionPoints()) + { + DistributionPointName dpn = dp.getDistributionPoint(); + // Look for URIs in fullName + if (dpn != null && dpn.getType() == DistributionPointName.FULL_NAME) + { + // Look for an URI + for (GeneralName genName : GeneralNames.getInstance(dpn.getName()).getNames()) + { + if (genName.getTagNo() == GeneralName.uniformResourceIdentifier) + { + String url = DERIA5String.getInstance(genName.getName()).getString(); + crlUrls.add(url); + } + } + } + } + return crlUrls; + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerificationException.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerificationException.java new file mode 100644 index 00000000000..362cd7b521b --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerificationException.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pdfbox.examples.signature.cert; + +/** + * Copied from Apache CXF 2.4.9, initial version: + * https://svn.apache.org/repos/asf/cxf/tags/cxf-2.4.9/distribution/src/main/release/samples/sts_issue_operation/src/main/java/demo/sts/provider/cert/ + * + */ +public class CertificateVerificationException extends Exception +{ + private static final long serialVersionUID = 1L; + + public CertificateVerificationException(String message, Throwable cause) + { + super(message, cause); + } + + public CertificateVerificationException(String message) + { + super(message); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerificationResult.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerificationResult.java new file mode 100644 index 00000000000..61f7a503fbb --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerificationResult.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pdfbox.examples.signature.cert; + +import java.security.cert.PKIXCertPathBuilderResult; + +/** + * Copied from Apache CXF 2.4.9, initial version: + * https://svn.apache.org/repos/asf/cxf/tags/cxf-2.4.9/distribution/src/main/release/samples/sts_issue_operation/src/main/java/demo/sts/provider/cert/ + * + */ +public class CertificateVerificationResult +{ + private boolean valid; + private PKIXCertPathBuilderResult result; + private Throwable exception; + + /** + * Constructs a certificate verification result for valid certificate by + * given certification path. + */ + public CertificateVerificationResult(PKIXCertPathBuilderResult result) + { + this.valid = true; + this.result = result; + } + + public CertificateVerificationResult(Throwable exception) + { + this.valid = false; + this.exception = exception; + } + + public boolean isValid() + { + return valid; + } + + public PKIXCertPathBuilderResult getResult() + { + return result; + } + + public Throwable getException() + { + return exception; + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerifier.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerifier.java new file mode 100644 index 00000000000..5141cbd03c1 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/CertificateVerifier.java @@ -0,0 +1,537 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pdfbox.examples.signature.cert; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.security.GeneralSecurityException; +import java.security.InvalidKeyException; +import java.security.PublicKey; +import java.security.SignatureException; +import java.security.cert.CertPathBuilder; +import java.security.cert.CertPathBuilderException; +import java.security.cert.CertStore; +import java.security.cert.Certificate; +import java.security.cert.CertificateException; +import java.security.cert.CertificateFactory; +import java.security.cert.CollectionCertStoreParameters; +import java.security.cert.PKIXBuilderParameters; +import java.security.cert.PKIXCertPathBuilderResult; +import java.security.cert.TrustAnchor; +import java.security.cert.X509CertSelector; +import java.security.cert.X509Certificate; +import java.security.cert.X509Extension; +import java.util.Calendar; +import java.util.Collection; +import java.util.Date; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.encryption.SecurityProvider; +import org.bouncycastle.asn1.ASN1Encodable; +import org.bouncycastle.asn1.ASN1OctetString; +import org.bouncycastle.asn1.ASN1Primitive; +import org.bouncycastle.asn1.ASN1Sequence; +import org.bouncycastle.asn1.ASN1TaggedObject; +import org.bouncycastle.asn1.ocsp.OCSPObjectIdentifiers; +import org.bouncycastle.asn1.x509.Extension; +import org.bouncycastle.asn1.x509.GeneralName; +import org.bouncycastle.asn1.x509.X509ObjectIdentifiers; +import org.bouncycastle.cert.X509CertificateHolder; +import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter; +import org.bouncycastle.cert.jcajce.JcaX509ExtensionUtils; +import org.bouncycastle.cert.ocsp.BasicOCSPResp; +import org.bouncycastle.cert.ocsp.OCSPException; +import org.bouncycastle.cert.ocsp.OCSPResp; + +/** + * Copied from Apache CXF 2.4.9, initial version: + * https://svn.apache.org/repos/asf/cxf/tags/cxf-2.4.9/distribution/src/main/release/samples/sts_issue_operation/src/main/java/demo/sts/provider/cert/ + * + */ +public final class CertificateVerifier +{ + private static final Log LOG = LogFactory.getLog(CertificateVerifier.class); + + private CertificateVerifier() + { + + } + + /** + * Attempts to build a certification chain for given certificate and to + * verify it. Relies on a set of root CA certificates and intermediate + * certificates that will be used for building the certification chain. The + * verification process assumes that all self-signed certificates in the set + * are trusted root CA certificates and all other certificates in the set + * are intermediate certificates. + * + * @param cert - certificate for validation + * @param additionalCerts - set of trusted root CA certificates that will be + * used as "trust anchors" and intermediate CA certificates that will be + * used as part of the certification chain. All self-signed certificates are + * considered to be trusted root CA certificates. All the rest are + * considered to be intermediate CA certificates. + * @param verifySelfSignedCert true if a self-signed certificate is accepted, false if not. + * @param signDate the date when the signing took place + * @return the certification chain (if verification is successful) + * @throws CertificateVerificationException - if the certification is not + * successful (e.g. certification path cannot be built or some certificate + * in the chain is expired or CRL checks are failed) + */ + public static PKIXCertPathBuilderResult verifyCertificate( + X509Certificate cert, Set additionalCerts, + boolean verifySelfSignedCert, Date signDate) + throws CertificateVerificationException + { + try + { + // Check for self-signed certificate + if (!verifySelfSignedCert && isSelfSigned(cert)) + { + throw new CertificateVerificationException("The certificate is self-signed."); + } + + Set certSet = new HashSet(additionalCerts); + + // Download extra certificates. However, each downloaded certificate can lead to + // more extra certificates, e.g. with the file from PDFBOX-4091, which has + // an incomplete chain. + // You can skip this block if you know that the certificate chain is complete + Set certsToTrySet = new HashSet(); + certsToTrySet.add(cert); + certsToTrySet.addAll(additionalCerts); + int downloadSize = 0; + while (!certsToTrySet.isEmpty()) + { + Set nextCertsToTrySet = new HashSet(); + for (X509Certificate tryCert : certsToTrySet) + { + Set downloadedExtraCertificatesSet = + CertificateVerifier.downloadExtraCertificates(tryCert); + for (X509Certificate downloadedCertificate : downloadedExtraCertificatesSet) + { + if (!certSet.contains(downloadedCertificate)) + { + nextCertsToTrySet.add(downloadedCertificate); + certSet.add(downloadedCertificate); + downloadSize++; + } + } + } + certsToTrySet = nextCertsToTrySet; + } + if (downloadSize > 0) + { + LOG.info("CA issuers: " + downloadSize + " downloaded certificate(s) are new"); + } + + // Prepare a set of trust anchors (set of root CA certificates) + // and a set of intermediate certificates + Set intermediateCerts = new HashSet(); + Set trustAnchors = new HashSet(); + for (X509Certificate additionalCert : certSet) + { + if (isSelfSigned(additionalCert)) + { + trustAnchors.add(new TrustAnchor(additionalCert, null)); + } + else + { + intermediateCerts.add(additionalCert); + } + } + + if (trustAnchors.isEmpty()) + { + throw new CertificateVerificationException("No root certificate in the chain"); + } + + // Attempt to build the certification chain and verify it + PKIXCertPathBuilderResult verifiedCertChain = verifyCertificate( + cert, trustAnchors, intermediateCerts, signDate); + + LOG.info("Certification chain verified successfully up to this root: " + + verifiedCertChain.getTrustAnchor().getTrustedCert().getSubjectX500Principal()); + + checkRevocations(cert, certSet, signDate); + + return verifiedCertChain; + } + catch (CertPathBuilderException certPathEx) + { + throw new CertificateVerificationException( + "Error building certification path: " + + cert.getSubjectX500Principal(), certPathEx); + } + catch (CertificateVerificationException cvex) + { + throw cvex; + } + catch (Exception ex) + { + throw new CertificateVerificationException( + "Error verifying the certificate: " + + cert.getSubjectX500Principal(), ex); + } + } + + private static void checkRevocations(X509Certificate cert, + Set additionalCerts, + Date signDate) + throws IOException, CertificateVerificationException, OCSPException, + RevokedCertificateException, GeneralSecurityException + { + if (isSelfSigned(cert)) + { + // root, we're done + return; + } + for (X509Certificate additionalCert : additionalCerts) + { + try + { + cert.verify(additionalCert.getPublicKey(), SecurityProvider.getProvider().getName()); + checkRevocationsWithIssuer(cert, additionalCert, additionalCerts, signDate); + // there can be several issuers + } + catch (GeneralSecurityException ex) + { + // not the issuer + } + } + } + + private static void checkRevocationsWithIssuer(X509Certificate cert, X509Certificate issuerCert, + Set additionalCerts, Date signDate) + throws CertificateVerificationException, IOException, RevokedCertificateException, + GeneralSecurityException, OCSPException + { + // Try checking the certificate through OCSP (faster than CRL) + String ocspURL = extractOCSPURL(cert); + if (ocspURL != null) + { + OcspHelper ocspHelper = new OcspHelper(cert, signDate, issuerCert, additionalCerts, ocspURL); + try + { + verifyOCSP(ocspHelper, additionalCerts); + } + catch (IOException ex) + { + // happens with 021496.pdf because OCSP responder no longer exists + LOG.warn("IOException trying OCSP, will try CRL", ex); + LOG.warn("Certificate# to check: " + cert.getSerialNumber().toString(16)); + CRLVerifier.verifyCertificateCRLs(cert, signDate, additionalCerts); + } + catch (OCSPException ex) + { + // happens with QV_RCA1_RCA3_CPCPS_V4_11.pdf + LOG.warn("OCSPException trying OCSP, will try CRL", ex); + LOG.warn("Certificate# to check: " + cert.getSerialNumber().toString(16)); + CRLVerifier.verifyCertificateCRLs(cert, signDate, additionalCerts); + } + } + else + { + LOG.info("OCSP not available, will try CRL"); + + // Check whether the certificate is revoked by the CRL + // given in its CRL distribution point extension + CRLVerifier.verifyCertificateCRLs(cert, signDate, additionalCerts); + } + + // now check the issuer + checkRevocations(issuerCert, additionalCerts, signDate); + } + + /** + * Checks whether given X.509 certificate is self-signed. + * @param cert The X.509 certificate to check. + * @return true if the certificate is self-signed, false if not. + * @throws java.security.GeneralSecurityException + */ + public static boolean isSelfSigned(X509Certificate cert) throws GeneralSecurityException + { + try + { + // Try to verify certificate signature with its own public key + PublicKey key = cert.getPublicKey(); + cert.verify(key, SecurityProvider.getProvider().getName()); + return true; + } + catch (SignatureException ex) + { + // Invalid signature --> not self-signed + LOG.debug("Couldn't get signature information - returning false", ex); + return false; + } + catch (InvalidKeyException ex) + { + // Invalid signature --> not self-signed + LOG.debug("Couldn't get signature information - returning false", ex); + return false; + } + catch (IOException ex) + { + // Invalid signature --> not self-signed + LOG.debug("Couldn't get signature information - returning false", ex); + return false; + } + } + + /** + * Download extra certificates from the URI mentioned in id-ad-caIssuers in the "authority + * information access" extension. The method is lenient, i.e. catches all exceptions. + * + * @param ext an X509 object that can have extensions. + * + * @return a certificate set, never null. + */ + public static Set downloadExtraCertificates(X509Extension ext) + { + // https://tools.ietf.org/html/rfc2459#section-4.2.2.1 + // https://tools.ietf.org/html/rfc3280#section-4.2.2.1 + // https://tools.ietf.org/html/rfc4325 + Set resultSet = new HashSet(); + byte[] authorityExtensionValue = ext.getExtensionValue(Extension.authorityInfoAccess.getId()); + if (authorityExtensionValue == null) + { + return resultSet; + } + ASN1Primitive asn1Prim; + try + { + asn1Prim = JcaX509ExtensionUtils.parseExtensionValue(authorityExtensionValue); + } + catch (IOException ex) + { + LOG.warn(ex.getMessage(), ex); + return resultSet; + } + if (!(asn1Prim instanceof ASN1Sequence)) + { + LOG.warn("ASN1Sequence expected, got " + asn1Prim.getClass().getSimpleName()); + return resultSet; + } + ASN1Sequence asn1Seq = (ASN1Sequence) asn1Prim; + Enumeration objects = asn1Seq.getObjects(); + while (objects.hasMoreElements()) + { + // AccessDescription + ASN1Sequence obj = (ASN1Sequence) objects.nextElement(); + ASN1Encodable oid = obj.getObjectAt(0); + if (!X509ObjectIdentifiers.id_ad_caIssuers.equals(oid)) + { + continue; + } + ASN1TaggedObject location = (ASN1TaggedObject) obj.getObjectAt(1); + ASN1OctetString uri = (ASN1OctetString) location.getObject(); + String urlString = new String(uri.getOctets()); + InputStream in = null; + try + { + LOG.info("CA issuers URL: " + urlString); + in = new URL(urlString).openStream(); + CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); + Collection altCerts = certFactory.generateCertificates(in); + for (Certificate altCert : altCerts) + { + resultSet.add((X509Certificate) altCert); + } + LOG.info("CA issuers URL: " + altCerts.size() + " certificate(s) downloaded"); + } + catch (IOException ex) + { + LOG.warn(urlString + " failure: " + ex.getMessage(), ex); + } + catch (CertificateException ex) + { + LOG.warn(ex.getMessage(), ex); + } + finally + { + IOUtils.closeQuietly(in); + } + } + LOG.info("CA issuers: Downloaded " + resultSet.size() + " certificate(s) total"); + return resultSet; + } + + /** + * Attempts to build a certification chain for given certificate and to + * verify it. Relies on a set of root CA certificates (trust anchors) and a + * set of intermediate certificates (to be used as part of the chain). + * + * @param cert - certificate for validation + * @param trustAnchors - set of trust anchors + * @param intermediateCerts - set of intermediate certificates + * @param signDate the date when the signing took place + * @return the certification chain (if verification is successful) + * @throws GeneralSecurityException - if the verification is not successful + * (e.g. certification path cannot be built or some certificate in the chain + * is expired) + */ + private static PKIXCertPathBuilderResult verifyCertificate( + X509Certificate cert, Set trustAnchors, + Set intermediateCerts, Date signDate) + throws GeneralSecurityException + { + // Create the selector that specifies the starting certificate + X509CertSelector selector = new X509CertSelector(); + selector.setCertificate(cert); + + // Configure the PKIX certificate builder algorithm parameters + PKIXBuilderParameters pkixParams = new PKIXBuilderParameters(trustAnchors, selector); + + // Disable CRL checks (this is done manually as additional step) + pkixParams.setRevocationEnabled(false); + + // not doing this brings + // "SunCertPathBuilderException: unable to find valid certification path to requested target" + // (when using -Djava.security.debug=certpath: "critical policy qualifiers present in certificate") + // for files like 021496.pdf that have the "Adobe CDS Certificate Policy" 1.2.840.113583.1.2.1 + // CDS = "Certified Document Services" + // https://www.adobe.com/misc/pdfs/Adobe_CDS_CP.pdf + pkixParams.setPolicyQualifiersRejected(false); + // However, maybe there is still work to do: + // "If the policyQualifiersRejected flag is set to false, it is up to the application + // to validate all policy qualifiers in this manner in order to be PKIX compliant." + + pkixParams.setDate(signDate); + + // Specify a list of intermediate certificates + CertStore intermediateCertStore = CertStore.getInstance("Collection", + new CollectionCertStoreParameters(intermediateCerts)); + pkixParams.addCertStore(intermediateCertStore); + + // Build and verify the certification chain + // If this doesn't work although it should, it can be debugged + // by starting java with -Djava.security.debug=certpath + // see also + // https://docs.oracle.com/javase/8/docs/technotes/guides/security/troubleshooting-security.html + CertPathBuilder builder = CertPathBuilder.getInstance("PKIX"); + return (PKIXCertPathBuilderResult) builder.build(pkixParams); + } + + /** + * Extract the OCSP URL from an X.509 certificate if available. + * + * @param cert X.509 certificate + * @return the URL of the OCSP validation service + * @throws IOException + */ + private static String extractOCSPURL(X509Certificate cert) throws IOException + { + byte[] authorityExtensionValue = cert.getExtensionValue(Extension.authorityInfoAccess.getId()); + if (authorityExtensionValue != null) + { + // copied from CertInformationHelper.getAuthorityInfoExtensionValue() + // DRY refactor should be done some day + ASN1Sequence asn1Seq = (ASN1Sequence) JcaX509ExtensionUtils.parseExtensionValue(authorityExtensionValue); + Enumeration objects = asn1Seq.getObjects(); + while (objects.hasMoreElements()) + { + // AccessDescription + ASN1Sequence obj = (ASN1Sequence) objects.nextElement(); + ASN1Encodable oid = obj.getObjectAt(0); + // accessLocation + ASN1TaggedObject location = (ASN1TaggedObject) obj.getObjectAt(1); + if (X509ObjectIdentifiers.id_ad_ocsp.equals(oid) + && location.getTagNo() == GeneralName.uniformResourceIdentifier) + { + ASN1OctetString url = (ASN1OctetString) location.getObject(); + String ocspURL = new String(url.getOctets()); + LOG.info("OCSP URL: " + ocspURL); + return ocspURL; + } + } + } + return null; + } + + /** + * Verify whether the certificate has been revoked at signing date, and verify whether the + * certificate of the responder has been revoked now. + * + * @param ocspHelper the OCSP helper. + * @param additionalCerts + * @throws RevokedCertificateException + * @throws IOException + * @throws OCSPException + * @throws CertificateVerificationException + */ + private static void verifyOCSP(OcspHelper ocspHelper, Set additionalCerts) + throws RevokedCertificateException, IOException, OCSPException, CertificateVerificationException + { + Date now = Calendar.getInstance().getTime(); + OCSPResp ocspResponse; + ocspResponse = ocspHelper.getResponseOcsp(); + if (ocspResponse.getStatus() != OCSPResp.SUCCESSFUL) + { + throw new CertificateVerificationException("OCSP check not successful, status: " + + ocspResponse.getStatus()); + } + LOG.info("OCSP check successful"); + + BasicOCSPResp basicResponse = (BasicOCSPResp) ocspResponse.getResponseObject(); + X509Certificate ocspResponderCertificate = ocspHelper.getOcspResponderCertificate(); + if (ocspResponderCertificate.getExtensionValue(OCSPObjectIdentifiers.id_pkix_ocsp_nocheck.getId()) != null) + { + // https://tools.ietf.org/html/rfc6960#section-4.2.2.2.1 + // A CA may specify that an OCSP client can trust a responder for the + // lifetime of the responder's certificate. The CA does so by + // including the extension id-pkix-ocsp-nocheck. + LOG.info("Revocation check of OCSP responder certificate skipped (id-pkix-ocsp-nocheck is set)"); + return; + } + + if (ocspHelper.getCertificateToCheck().equals(ocspResponderCertificate)) + { + LOG.info("OCSP responder certificate is identical to certificate to check"); + return; + } + + LOG.info("Check of OCSP responder certificate"); + Set additionalCerts2 = new HashSet(additionalCerts); + JcaX509CertificateConverter certificateConverter = new JcaX509CertificateConverter(); + for (X509CertificateHolder certHolder : basicResponse.getCerts()) + { + try + { + X509Certificate cert = certificateConverter.getCertificate(certHolder); + if (!ocspResponderCertificate.equals(cert)) + { + additionalCerts2.add(cert); + } + } + catch (CertificateException ex) + { + // unlikely to happen because the certificate existed as an object + LOG.error(ex, ex); + } + } + CertificateVerifier.verifyCertificate(ocspResponderCertificate, additionalCerts2, true, now); + LOG.info("Check of OCSP responder certificate done"); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/OcspHelper.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/OcspHelper.java new file mode 100644 index 00000000000..13f19e4bfbb --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/OcspHelper.java @@ -0,0 +1,633 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.signature.cert; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.security.Security; +import java.security.cert.CertificateEncodingException; +import java.security.cert.CertificateException; +import java.security.cert.CertificateParsingException; +import java.security.cert.X509Certificate; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; +import java.util.Random; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.examples.signature.SigUtils; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.encryption.SecurityProvider; +import org.bouncycastle.asn1.DEROctetString; +import org.bouncycastle.asn1.DLSequence; +import org.bouncycastle.asn1.ocsp.OCSPObjectIdentifiers; +import org.bouncycastle.asn1.ocsp.OCSPResponseStatus; +import org.bouncycastle.asn1.ocsp.ResponderID; +import org.bouncycastle.asn1.oiw.OIWObjectIdentifiers; +import org.bouncycastle.asn1.x500.X500Name; +import org.bouncycastle.asn1.x509.AlgorithmIdentifier; +import org.bouncycastle.asn1.x509.Extension; +import org.bouncycastle.asn1.x509.Extensions; +import org.bouncycastle.asn1.x509.SubjectPublicKeyInfo; +import org.bouncycastle.cert.X509CertificateHolder; +import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter; +import org.bouncycastle.cert.jcajce.JcaX509CertificateHolder; +import org.bouncycastle.cert.ocsp.BasicOCSPResp; +import org.bouncycastle.cert.ocsp.CertificateID; +import org.bouncycastle.cert.ocsp.CertificateStatus; +import org.bouncycastle.cert.ocsp.OCSPException; +import org.bouncycastle.cert.ocsp.OCSPReq; +import org.bouncycastle.cert.ocsp.OCSPReqBuilder; +import org.bouncycastle.cert.ocsp.OCSPResp; +import org.bouncycastle.cert.ocsp.RevokedStatus; +import org.bouncycastle.cert.ocsp.SingleResp; +import org.bouncycastle.operator.ContentVerifierProvider; +import org.bouncycastle.operator.DigestCalculator; +import org.bouncycastle.operator.OperatorCreationException; +import org.bouncycastle.operator.jcajce.JcaContentVerifierProviderBuilder; + +/** + * Helper Class for OCSP-Operations with bouncy castle. + * + * @author Alexis Suter + */ +public class OcspHelper +{ + private static final Log LOG = LogFactory.getLog(OcspHelper.class); + + private final X509Certificate issuerCertificate; + private final Date signDate; + private final X509Certificate certificateToCheck; + private final Set additionalCerts; + private final String ocspUrl; + private DEROctetString encodedNonce; + private X509Certificate ocspResponderCertificate; + private final JcaX509CertificateConverter certificateConverter = new JcaX509CertificateConverter(); + + // SecureRandom.getInstanceStrong() would be better, but sometimes blocks on Linux + private static final Random RANDOM = new SecureRandom(); + + /** + * @param checkCertificate Certificate to be OCSP-checked + * @param signDate the date when the signing took place + * @param issuerCertificate Certificate of the issuer + * @param additionalCerts Set of trusted root CA certificates that will be used as "trust + * anchors" and intermediate CA certificates that will be used as part of the certification + * chain. All self-signed certificates are considered to be trusted root CA certificates. All + * the rest are considered to be intermediate CA certificates. + * @param ocspUrl where to fetch for OCSP + */ + public OcspHelper(X509Certificate checkCertificate, Date signDate, X509Certificate issuerCertificate, + Set additionalCerts, String ocspUrl) + { + this.certificateToCheck = checkCertificate; + this.signDate = signDate; + this.issuerCertificate = issuerCertificate; + this.additionalCerts = additionalCerts; + this.ocspUrl = ocspUrl; + } + + /** + * Get the certificate to be OCSP-checked. + * + * @return The certificate to be OCSP-checked. + */ + X509Certificate getCertificateToCheck() + { + return certificateToCheck; + } + + /** + * Performs and verifies the OCSP-Request + * + * @return the OCSPResp, when the request was successful, else a corresponding exception will be + * thrown. Never returns null. + * + * @throws IOException + * @throws OCSPException + * @throws RevokedCertificateException + */ + public OCSPResp getResponseOcsp() throws IOException, OCSPException, RevokedCertificateException + { + OCSPResp ocspResponse = performRequest(); + verifyOcspResponse(ocspResponse); + return ocspResponse; + } + + /** + * Get responder certificate. This is available after {@link #getResponseOcsp()} has been + * called. This method should be used instead of {@code basicResponse.getCerts()[0]} + * + * @return The certificate of the responder. + */ + public X509Certificate getOcspResponderCertificate() + { + return ocspResponderCertificate; + } + + /** + * Verifies the status and the response itself (including nonce), but not the signature. + * + * @param ocspResponse to be verified + * @throws OCSPException + * @throws RevokedCertificateException + * @throws IOException if the default security provider can't be instantiated + */ + private void verifyOcspResponse(OCSPResp ocspResponse) + throws OCSPException, RevokedCertificateException, IOException + { + verifyRespStatus(ocspResponse); + + BasicOCSPResp basicResponse = (BasicOCSPResp) ocspResponse.getResponseObject(); + if (basicResponse != null) + { + ResponderID responderID = basicResponse.getResponderId().toASN1Primitive(); + // https://tools.ietf.org/html/rfc6960#section-4.2.2.3 + // The basic response type contains: + // (...) + // either the name of the responder or a hash of the responder's + // public key as the ResponderID + // (...) + // The responder MAY include certificates in the certs field of + // BasicOCSPResponse that help the OCSP client verify the responder's + // signature. + X500Name name = responderID.getName(); + if (name != null) + { + findResponderCertificateByName(basicResponse, name); + } + else + { + byte[] keyHash = responderID.getKeyHash(); + if (keyHash != null) + { + findResponderCertificateByKeyHash(basicResponse, keyHash); + } + else + { + throw new OCSPException("OCSP: basic response must provide name or key hash"); + } + } + + if (ocspResponderCertificate == null) + { + throw new OCSPException("OCSP: certificate for responder " + name + " not found"); + } + + try + { + SigUtils.checkResponderCertificateUsage(ocspResponderCertificate); + } + catch (CertificateParsingException ex) + { + // unlikely to happen because the certificate existed as an object + LOG.error(ex, ex); + } + checkOcspSignature(ocspResponderCertificate, basicResponse); + + boolean nonceChecked = checkNonce(basicResponse); + + SingleResp[] responses = basicResponse.getResponses(); + if (responses.length != 1) + { + throw new OCSPException( + "OCSP: Received " + responses.length + " responses instead of 1!"); + } + + SingleResp resp = responses[0]; + Object status = resp.getCertStatus(); + + if (!nonceChecked) + { + // https://tools.ietf.org/html/rfc5019 + // fall back to validating the OCSPResponse based on time + checkOcspResponseFresh(resp); + } + + if (status instanceof RevokedStatus) + { + RevokedStatus revokedStatus = (RevokedStatus) status; + if (revokedStatus.getRevocationTime().compareTo(signDate) <= 0) + { + throw new RevokedCertificateException( + "OCSP: Certificate is revoked since " + + revokedStatus.getRevocationTime(), + revokedStatus.getRevocationTime()); + } + LOG.info("The certificate was revoked after signing by OCSP " + ocspUrl + + " on " + revokedStatus.getRevocationTime()); + } + else if (status != CertificateStatus.GOOD) + { + throw new OCSPException("OCSP: Status of Cert is unknown"); + } + } + } + + private byte[] getKeyHashFromCertHolder(X509CertificateHolder certHolder) throws IOException + { + // https://tools.ietf.org/html/rfc2560#section-4.2.1 + // KeyHash ::= OCTET STRING -- SHA-1 hash of responder's public key + // -- (i.e., the SHA-1 hash of the value of the + // -- BIT STRING subjectPublicKey [excluding + // -- the tag, length, and number of unused + // -- bits] in the responder's certificate) + + // code below inspired by org.bouncycastle.cert.ocsp.CertificateID.createCertID() + // tested with SO52757037-Signed3-OCSP-with-KeyHash.pdf + SubjectPublicKeyInfo info = certHolder.getSubjectPublicKeyInfo(); + try + { + return MessageDigest.getInstance("SHA-1").digest(info.getPublicKeyData().getBytes()); + } + catch (NoSuchAlgorithmException ex) + { + // should not happen + LOG.error("SHA-1 Algorithm not found", ex); + return new byte[0]; + } + } + + private void findResponderCertificateByKeyHash(BasicOCSPResp basicResponse, byte[] keyHash) + throws IOException + { + X509CertificateHolder[] certHolders = basicResponse.getCerts(); + for (X509CertificateHolder certHolder : certHolders) + { + byte[] digest = getKeyHashFromCertHolder(certHolder); + if (Arrays.equals(keyHash, digest)) + { + try + { + ocspResponderCertificate = certificateConverter.getCertificate(certHolder); + return; + } + catch (CertificateException ex) + { + // unlikely to happen because the certificate existed as an object + LOG.error(ex, ex); + } + break; + } + } + + // DO NOT use the certificate found in additionalCerts first. One file had a + // responder certificate in the PDF itself with SHA1withRSA algorithm, but + // the responder delivered a different (newer, more secure) certificate + // with SHA256withRSA (tried with QV_RCA1_RCA3_CPCPS_V4_11.pdf) + // https://www.quovadisglobal.com/~/media/Files/Repository/QV_RCA1_RCA3_CPCPS_V4_11.ashx + for (X509Certificate cert : additionalCerts) + { + try + { + byte[] digest = getKeyHashFromCertHolder(new X509CertificateHolder(cert.getEncoded())); + if (Arrays.equals(keyHash, digest)) + { + ocspResponderCertificate = cert; + return; + } + } + catch (CertificateEncodingException ex) + { + // unlikely to happen because the certificate existed as an object + LOG.error(ex, ex); + } + } + } + + private void findResponderCertificateByName(BasicOCSPResp basicResponse, X500Name name) + { + X509CertificateHolder[] certHolders = basicResponse.getCerts(); + for (X509CertificateHolder certHolder : certHolders) + { + if (name.equals(certHolder.getSubject())) + { + try + { + ocspResponderCertificate = certificateConverter.getCertificate(certHolder); + return; + } + catch (CertificateException ex) + { + // unlikely to happen because the certificate existed as an object + LOG.error(ex, ex); + } + } + } + + // DO NOT use the certificate found in additionalCerts first. One file had a + // responder certificate in the PDF itself with SHA1withRSA algorithm, but + // the responder delivered a different (newer, more secure) certificate + // with SHA256withRSA (tried with QV_RCA1_RCA3_CPCPS_V4_11.pdf) + // https://www.quovadisglobal.com/~/media/Files/Repository/QV_RCA1_RCA3_CPCPS_V4_11.ashx + for (X509Certificate cert : additionalCerts) + { + X500Name certSubjectName = new X500Name(cert.getSubjectX500Principal().getName()); + if (certSubjectName.equals(name)) + { + ocspResponderCertificate = cert; + return; + } + } + } + + private void checkOcspResponseFresh(SingleResp resp) throws OCSPException + { + // https://tools.ietf.org/html/rfc5019 + // Clients MUST check for the existence of the nextUpdate field and MUST + // ensure the current time, expressed in GMT time as described in + // Section 2.2.4, falls between the thisUpdate and nextUpdate times. If + // the nextUpdate field is absent, the client MUST reject the response. + + Date curDate = Calendar.getInstance().getTime(); + + Date thisUpdate = resp.getThisUpdate(); + if (thisUpdate == null) + { + throw new OCSPException("OCSP: thisUpdate field is missing in response (RFC 5019 2.2.4.)"); + } + Date nextUpdate = resp.getNextUpdate(); + if (nextUpdate == null) + { + throw new OCSPException("OCSP: nextUpdate field is missing in response (RFC 5019 2.2.4.)"); + } + if (curDate.compareTo(thisUpdate) < 0) + { + LOG.error(curDate + " < " + thisUpdate); + throw new OCSPException("OCSP: current date < thisUpdate field (RFC 5019 2.2.4.)"); + } + if (curDate.compareTo(nextUpdate) > 0) + { + LOG.error(curDate + " > " + nextUpdate); + throw new OCSPException("OCSP: current date > nextUpdate field (RFC 5019 2.2.4.)"); + } + LOG.info("OCSP response is fresh"); + } + + /** + * Checks whether the OCSP response is signed by the given certificate. + * + * @param certificate the certificate to check the signature + * @param basicResponse OCSP response containing the signature + * @throws OCSPException when the signature is invalid or could not be checked + * @throws IOException if the default security provider can't be instantiated + */ + private void checkOcspSignature(X509Certificate certificate, BasicOCSPResp basicResponse) + throws OCSPException, IOException + { + try + { + ContentVerifierProvider verifier = new JcaContentVerifierProviderBuilder() + .setProvider(SecurityProvider.getProvider()).build(certificate); + + if (!basicResponse.isSignatureValid(verifier)) + { + throw new OCSPException("OCSP-Signature is not valid!"); + } + } + catch (OperatorCreationException e) + { + throw new OCSPException("Error checking Ocsp-Signature", e); + } + } + + /** + * Checks if the nonce in the response matches. + * + * @param basicResponse Response to be checked + * @return true if the nonce is present and matches, false if nonce is missing. + * @throws OCSPException if the nonce is different + */ + private boolean checkNonce(BasicOCSPResp basicResponse) throws OCSPException + { + Extension nonceExt = basicResponse.getExtension(OCSPObjectIdentifiers.id_pkix_ocsp_nonce); + if (nonceExt != null) + { + DEROctetString responseNonceString = (DEROctetString) nonceExt.getExtnValue(); + if (!responseNonceString.equals(encodedNonce)) + { + throw new OCSPException("Different nonce found in response!"); + } + else + { + LOG.info("Nonce is good"); + return true; + } + } + // https://tools.ietf.org/html/rfc5019 + // Clients that opt to include a nonce in the + // request SHOULD NOT reject a corresponding OCSPResponse solely on the + // basis of the nonexistent expected nonce, but MUST fall back to + // validating the OCSPResponse based on time. + return false; + } + + /** + * Performs the OCSP-Request, with given data. + * + * @return the OCSPResp, that has been fetched from the ocspUrl + * @throws IOException + * @throws OCSPException + */ + private OCSPResp performRequest() throws IOException, OCSPException + { + OCSPReq request = generateOCSPRequest(); + URL url = new URL(ocspUrl); + HttpURLConnection httpConnection = (HttpURLConnection) url.openConnection(); + try + { + httpConnection.setRequestProperty("Content-Type", "application/ocsp-request"); + httpConnection.setRequestProperty("Accept", "application/ocsp-response"); + httpConnection.setDoOutput(true); + OutputStream out = httpConnection.getOutputStream(); + try + { + out.write(request.getEncoded()); + } + finally + { + IOUtils.closeQuietly(out); + } + + if (httpConnection.getResponseCode() != 200) + { + throw new IOException("OCSP: Could not access url, ResponseCode: " + + httpConnection.getResponseCode()); + } + // Get response + InputStream in = (InputStream) httpConnection.getContent(); + try + { + return new OCSPResp(in); + } + finally + { + IOUtils.closeQuietly(in); + } + } + finally + { + httpConnection.disconnect(); + } + } + + /** + * Helper method to verify response status. + * + * @param resp OCSP response + * @throws OCSPException if the response status is not ok + */ + public void verifyRespStatus(OCSPResp resp) throws OCSPException + { + String statusInfo = ""; + if (resp != null) + { + int status = resp.getStatus(); + switch (status) + { + case OCSPResponseStatus.INTERNAL_ERROR: + statusInfo = "INTERNAL_ERROR"; + LOG.error("An internal error occurred in the OCSP Server!"); + break; + case OCSPResponseStatus.MALFORMED_REQUEST: + // This happened when the "critical" flag was used for extensions + // on a responder known by the committer of this comment. + statusInfo = "MALFORMED_REQUEST"; + LOG.error("Your request did not fit the RFC 2560 syntax!"); + break; + case OCSPResponseStatus.SIG_REQUIRED: + statusInfo = "SIG_REQUIRED"; + LOG.error("Your request was not signed!"); + break; + case OCSPResponseStatus.TRY_LATER: + statusInfo = "TRY_LATER"; + LOG.error("The server was too busy to answer you!"); + break; + case OCSPResponseStatus.UNAUTHORIZED: + statusInfo = "UNAUTHORIZED"; + LOG.error("The server could not authenticate you!"); + break; + case OCSPResponseStatus.SUCCESSFUL: + break; + default: + statusInfo = "UNKNOWN"; + LOG.error("Unknown OCSPResponse status code! " + status); + } + } + if (resp == null || resp.getStatus() != OCSPResponseStatus.SUCCESSFUL) + { + throw new OCSPException("OCSP response unsuccessful, status: " + statusInfo); + } + } + + /** + * Generates an OCSP request and generates the CertificateID. + * + * @return OCSP request, ready to fetch data + * @throws OCSPException + * @throws IOException + */ + private OCSPReq generateOCSPRequest() throws OCSPException, IOException + { + Security.addProvider(SecurityProvider.getProvider()); + + // Generate the ID for the certificate we are looking for + CertificateID certId; + try + { + certId = new CertificateID(new SHA1DigestCalculator(), + new JcaX509CertificateHolder(issuerCertificate), + certificateToCheck.getSerialNumber()); + } + catch (CertificateEncodingException e) + { + throw new IOException("Error creating CertificateID with the Certificate encoding", e); + } + + // https://tools.ietf.org/html/rfc2560#section-4.1.2 + // Support for any specific extension is OPTIONAL. The critical flag + // SHOULD NOT be set for any of them. + + Extension responseExtension = new Extension(OCSPObjectIdentifiers.id_pkix_ocsp_response, + false, new DLSequence(OCSPObjectIdentifiers.id_pkix_ocsp_basic).getEncoded()); + + encodedNonce = new DEROctetString(new DEROctetString(create16BytesNonce())); + Extension nonceExtension = new Extension(OCSPObjectIdentifiers.id_pkix_ocsp_nonce, false, + encodedNonce); + + OCSPReqBuilder builder = new OCSPReqBuilder(); + builder.setRequestExtensions( + new Extensions(new Extension[] { responseExtension, nonceExtension })); + builder.addRequest(certId); + return builder.build(); + } + + private byte[] create16BytesNonce() + { + byte[] nonce = new byte[16]; + RANDOM.nextBytes(nonce); + return nonce; + } + + /** + * Class to create SHA-1 Digest, used for creation of CertificateID. + */ + private static class SHA1DigestCalculator implements DigestCalculator + { + private final ByteArrayOutputStream bOut = new ByteArrayOutputStream(); + + @Override + public AlgorithmIdentifier getAlgorithmIdentifier() + { + return new AlgorithmIdentifier(OIWObjectIdentifiers.idSHA1); + } + + @Override + public OutputStream getOutputStream() + { + return bOut; + } + + @Override + public byte[] getDigest() + { + byte[] bytes = bOut.toByteArray(); + bOut.reset(); + + try + { + MessageDigest md = MessageDigest.getInstance("SHA-1"); + return md.digest(bytes); + } + catch (NoSuchAlgorithmException ex) + { + // should not happen + LOG.error("SHA-1 Algorithm not found", ex); + return new byte[0]; + } + } + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/RevokedCertificateException.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/RevokedCertificateException.java new file mode 100644 index 00000000000..6a4d7160367 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/cert/RevokedCertificateException.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.signature.cert; + +import java.util.Date; + +/** + * Exception to handle a revoked Certificate explicitly + * + * @author Alexis Suter + */ +public class RevokedCertificateException extends Exception +{ + private static final long serialVersionUID = 3543946618794126654L; + + private final Date revocationTime; + + public RevokedCertificateException(String message) + { + super(message); + this.revocationTime = null; + } + + public RevokedCertificateException(String message, Date revocationTime) + { + super(message); + this.revocationTime = revocationTime; + } + + public Date getRevocationTime() + { + return revocationTime; + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html b/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html index cc31cd070e4..0cb1675eb7c 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/AddValidationInformation.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/AddValidationInformation.java new file mode 100644 index 00000000000..c2139f757ba --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/AddValidationInformation.java @@ -0,0 +1,684 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.signature.validation; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.lang.reflect.InvocationTargetException; +import java.security.GeneralSecurityException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.Security; +import java.security.cert.CertificateEncodingException; +import java.security.cert.X509CRL; +import java.security.cert.X509Certificate; +import java.util.Calendar; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.cos.COSUpdateInfo; +import org.apache.pdfbox.examples.signature.SigUtils; +import org.apache.pdfbox.examples.signature.cert.CRLVerifier; +import org.apache.pdfbox.examples.signature.cert.CertificateVerificationException; +import org.apache.pdfbox.examples.signature.cert.OcspHelper; +import org.apache.pdfbox.examples.signature.cert.RevokedCertificateException; +import org.apache.pdfbox.examples.signature.validation.CertInformationCollector.CertSignatureInformation; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.encryption.SecurityProvider; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.apache.pdfbox.util.Hex; +import org.bouncycastle.asn1.ocsp.OCSPObjectIdentifiers; +import org.bouncycastle.cert.ocsp.BasicOCSPResp; +import org.bouncycastle.cert.ocsp.OCSPException; +import org.bouncycastle.cert.ocsp.OCSPResp; +import org.bouncycastle.cms.CMSException; +import org.bouncycastle.cms.CMSSignedData; +import org.bouncycastle.tsp.TSPException; +import org.bouncycastle.tsp.TimeStampToken; +import org.bouncycastle.tsp.TimeStampTokenInfo; + +/** + * An example for adding Validation Information to a signed PDF, inspired by ETSI TS 102 778-4 + * V1.1.2 (2009-12), Part 4: PAdES Long Term - PAdES-LTV Profile. This procedure appends the + * Validation Information of the last signature (more precise its signer(s)) to a copy of the + * document. The signature and the signed data will not be touched and stay valid. + *

+ * See also Bachelor thesis (in German) about LTV + * + * @author Alexis Suter + */ +public class AddValidationInformation +{ + private static final Log LOG = LogFactory.getLog(AddValidationInformation.class); + + private CertInformationCollector certInformationHelper; + private COSArray correspondingOCSPs; + private COSArray correspondingCRLs; + private COSDictionary vriBase; + private COSArray ocsps; + private COSArray crls; + private COSArray certs; + private PDDocument document; + private final Set foundRevocationInformation = new HashSet(); + private Calendar signDate; + private final Set ocspChecked = new HashSet(); + //TODO foundRevocationInformation and ocspChecked have a similar purpose. One of them should likely + // be removed and the code improved. When doing so, keep in mind that ocspChecked was added last, + // because of a problem with freetsa. + + /** + * Signs the given PDF file. + * + * @param inFile input PDF file + * @param outFile output PDF file + * @throws IOException if the input file could not be read + */ + public void validateSignature(File inFile, File outFile) throws IOException + { + if (inFile == null || !inFile.exists()) + { + String err = "Document for signing "; + if (null == inFile) + { + err += "is null"; + } + else + { + err += "does not exist: " + inFile.getAbsolutePath(); + } + throw new FileNotFoundException(err); + } + + PDDocument doc = PDDocument.load(inFile); + FileOutputStream fos = new FileOutputStream(outFile); + int accessPermissions = SigUtils.getMDPPermission(doc); + if (accessPermissions == 1) + { + System.out.println("PDF is certified to forbid changes, " + + "some readers may report the document as invalid despite that " + + "the PDF specification allows DSS additions"); + } + document = doc; + doValidation(inFile.getAbsolutePath(), fos); + fos.close(); + doc.close(); + } + + /** + * Fetches certificate information from the last signature of the document and appends a DSS + * with the validation information to the document. + * + * @param filename in file to extract signature + * @param output where to write the changed document + * @throws IOException + */ + private void doValidation(String filename, OutputStream output) throws IOException + { + certInformationHelper = new CertInformationCollector(); + CertSignatureInformation certInfo = null; + try + { + PDSignature signature = SigUtils.getLastRelevantSignature(document); + if (signature != null) + { + certInfo = certInformationHelper.getLastCertInfo(signature, filename); + signDate = signature.getSignDate(); + if ("ETSI.RFC3161".equals(signature.getSubFilter())) + { + byte[] contents = signature.getContents(); + TimeStampToken timeStampToken = new TimeStampToken(new CMSSignedData(contents)); + TimeStampTokenInfo timeStampInfo = timeStampToken.getTimeStampInfo(); + signDate = Calendar.getInstance(); + signDate.setTime(timeStampInfo.getGenTime()); + } + } + } + catch (CertificateProccessingException e) + { + throw new IOException("An Error occurred processing the Signature", e); + } + catch (CMSException e) + { + throw new IOException("An Error occurred processing the Signature", e); + } + catch (TSPException e) + { + throw new IOException("An Error occurred processing the Signature", e); + } + if (certInfo == null) + { + throw new IOException( + "No Certificate information or signature found in the given document"); + } + + PDDocumentCatalog docCatalog = document.getDocumentCatalog(); + COSDictionary catalog = docCatalog.getCOSObject(); + catalog.setNeedToBeUpdated(true); + + COSDictionary dss = getOrCreateDictionaryEntry(COSDictionary.class, catalog, "DSS"); + + addExtensions(docCatalog); + + vriBase = getOrCreateDictionaryEntry(COSDictionary.class, dss, "VRI"); + + ocsps = getOrCreateDictionaryEntry(COSArray.class, dss, "OCSPs"); + + crls = getOrCreateDictionaryEntry(COSArray.class, dss, "CRLs"); + + certs = getOrCreateDictionaryEntry(COSArray.class, dss, "Certs"); + + addRevocationData(certInfo); + + addAllCertsToCertArray(); + + // write incremental + document.saveIncremental(output); + } + + /** + * Gets or creates a dictionary entry. If existing checks for the type and sets need to be + * updated. + * + * @param clazz the class of the dictionary entry, must implement COSUpdateInfo + * @param parent where to find the element + * @param name of the element + * @return a Element of given class, new or existing + * @throws IOException when the type of the element is wrong + */ + private static T getOrCreateDictionaryEntry(Class clazz, + COSDictionary parent, String name) throws IOException + { + T result; + COSBase element = parent.getDictionaryObject(name); + if (element != null && clazz.isInstance(element)) + { + result = clazz.cast(element); + result.setNeedToBeUpdated(true); + } + else if (element != null) + { + throw new IOException("Element " + name + " from dictionary is not of type " + + clazz.getCanonicalName()); + } + else + { + try + { + result = clazz.getDeclaredConstructor().newInstance(); + } + catch (InstantiationException ex) + { + throw new IOException("Failed to create new instance of " + clazz.getCanonicalName(), ex); + } + catch (IllegalAccessException ex) + { + throw new IOException("Failed to create new instance of " + clazz.getCanonicalName(), ex); + } + catch (NoSuchMethodException ex) + { + throw new IOException("Failed to create new instance of " + clazz.getCanonicalName(), ex); + } + catch (SecurityException ex) + { + throw new IOException("Failed to create new instance of " + clazz.getCanonicalName(), ex); + } + catch (IllegalArgumentException ex) + { + throw new IOException("Failed to create new instance of " + clazz.getCanonicalName(), ex); + } + catch (InvocationTargetException ex) + { + throw new IOException("Failed to create new instance of " + clazz.getCanonicalName(), ex); + } + result.setDirect(false); + parent.setItem(COSName.getPDFName(name), result); + } + return result; + } + + /** + * Fetches and adds revocation information based on the certInfo to the DSS. + * + * @param certInfo Certificate information from CertInformationHelper containing certificate + * chains. + * @throws IOException + */ + private void addRevocationData(CertSignatureInformation certInfo) throws IOException + { + COSDictionary vri = new COSDictionary(); + vriBase.setItem(certInfo.getSignatureHash(), vri); + + updateVRI(certInfo, vri); + + if (certInfo.getTsaCerts() != null) + { + // Don't add RevocationInfo from tsa to VRI's + correspondingOCSPs = null; + correspondingCRLs = null; + addRevocationDataRecursive(certInfo.getTsaCerts()); + } + } + + /** + * Tries to get Revocation Data (first OCSP, else CRL) from the given Certificate Chain. + * + * @param certInfo from which to fetch revocation data. Will work recursively through its + * chains. + * @throws IOException when failed to fetch an revocation data. + */ + private void addRevocationDataRecursive(CertSignatureInformation certInfo) throws IOException + { + if (certInfo.isSelfSigned()) + { + return; + } + // To avoid getting same revocation information twice. + boolean isRevocationInfoFound = foundRevocationInformation.contains(certInfo.getCertificate()); + if (!isRevocationInfoFound) + { + if (certInfo.getOcspUrl() != null && certInfo.getIssuerCertificate() != null) + { + isRevocationInfoFound = fetchOcspData(certInfo); + } + if (!isRevocationInfoFound && certInfo.getCrlUrl() != null) + { + fetchCrlData(certInfo); + isRevocationInfoFound = true; + } + + if (certInfo.getOcspUrl() == null && certInfo.getCrlUrl() == null) + { + LOG.info("No revocation information for cert " + certInfo.getCertificate().getSubjectX500Principal()); + } + else if (!isRevocationInfoFound) + { + throw new IOException("Could not fetch Revocation Info for Cert: " + + certInfo.getCertificate().getSubjectX500Principal()); + } + } + + if (certInfo.getAlternativeCertChain() != null) + { + addRevocationDataRecursive(certInfo.getAlternativeCertChain()); + } + + if (certInfo.getCertChain() != null && certInfo.getCertChain().getCertificate() != null) + { + addRevocationDataRecursive(certInfo.getCertChain()); + } + } + + /** + * Tries to fetch and add OCSP Data to its containers. + * + * @param certInfo the certificate info, for it to check OCSP data. + * @return true when the OCSP data has successfully been fetched and added + * @throws IOException when Certificate is revoked. + */ + private boolean fetchOcspData(CertSignatureInformation certInfo) throws IOException + { + try + { + addOcspData(certInfo); + return true; + } + catch (OCSPException e) + { + LOG.warn("Failed fetching Ocsp", e); + return false; + } + catch (CertificateProccessingException e) + { + LOG.warn("Failed fetching Ocsp", e); + return false; + } + catch (IOException e) + { + LOG.warn("Failed fetching Ocsp", e); + return false; + } + catch (RevokedCertificateException e) + { + throw new IOException(e); + } + } + + /** + * Tries to fetch and add CRL Data to its containers. + * + * @param certInfo the certificate info, for it to check CRL data. + * @throws IOException when failed to fetch, because no validation data could be fetched for + * data. + */ + private void fetchCrlData(CertSignatureInformation certInfo) throws IOException + { + try + { + addCrlRevocationInfo(certInfo); + } + catch (GeneralSecurityException e) + { + LOG.warn("Failed fetching CRL", e); + throw new IOException(e); + } + catch (RevokedCertificateException e) + { + LOG.warn("Failed fetching CRL", e); + throw new IOException(e); + } + catch (IOException e) + { + LOG.warn("Failed fetching CRL", e); + throw new IOException(e); + } + catch (CertificateVerificationException e) + { + LOG.warn("Failed fetching CRL", e); + throw new IOException(e); + } + } + + /** + * Fetches and adds OCSP data to storage for the given Certificate. + * + * @param certInfo the certificate info, for it to check OCSP data. + * @throws IOException + * @throws OCSPException + * @throws CertificateProccessingException + * @throws RevokedCertificateException + */ + private void addOcspData(CertSignatureInformation certInfo) throws IOException, OCSPException, + CertificateProccessingException, RevokedCertificateException + { + if (ocspChecked.contains(certInfo.getCertificate())) + { + // This certificate has been OCSP-checked before + return; + } + OcspHelper ocspHelper = new OcspHelper( + certInfo.getCertificate(), + signDate.getTime(), + certInfo.getIssuerCertificate(), + new HashSet(certInformationHelper.getCertificateSet()), + certInfo.getOcspUrl()); + OCSPResp ocspResp = ocspHelper.getResponseOcsp(); + ocspChecked.add(certInfo.getCertificate()); + BasicOCSPResp basicResponse = (BasicOCSPResp) ocspResp.getResponseObject(); + X509Certificate ocspResponderCertificate = ocspHelper.getOcspResponderCertificate(); + certInformationHelper.addAllCertsFromHolders(basicResponse.getCerts()); + byte[] signatureHash; + try + { + signatureHash = MessageDigest.getInstance("SHA-1").digest(basicResponse.getSignature()); + } + catch (NoSuchAlgorithmException ex) + { + throw new CertificateProccessingException(ex); + } + String signatureHashHex = Hex.getString(signatureHash); + + if (!vriBase.containsKey(signatureHashHex)) + { + COSArray savedCorrespondingOCSPs = correspondingOCSPs; + COSArray savedCorrespondingCRLs = correspondingCRLs; + + COSDictionary vri = new COSDictionary(); + vriBase.setItem(signatureHashHex, vri); + CertSignatureInformation ocspCertInfo = certInformationHelper.getCertInfo(ocspResponderCertificate); + + updateVRI(ocspCertInfo, vri); + + correspondingOCSPs = savedCorrespondingOCSPs; + correspondingCRLs = savedCorrespondingCRLs; + } + + byte[] ocspData = ocspResp.getEncoded(); + + COSStream ocspStream = writeDataToStream(ocspData); + ocsps.add(ocspStream); + if (correspondingOCSPs != null) + { + correspondingOCSPs.add(ocspStream); + } + foundRevocationInformation.add(certInfo.getCertificate()); + } + + /** + * Fetches and adds CRL data to storage for the given Certificate. + * + * @param certInfo the certificate info, for it to check CRL data. + * @throws IOException + * @throws RevokedCertificateException + * @throws GeneralSecurityException + * @throws CertificateVerificationException + */ + private void addCrlRevocationInfo(CertSignatureInformation certInfo) + throws IOException, RevokedCertificateException, GeneralSecurityException, + CertificateVerificationException + { + X509CRL crl = CRLVerifier.downloadCRLFromWeb(certInfo.getCrlUrl()); + X509Certificate issuerCertificate = certInfo.getIssuerCertificate(); + + // find the issuer certificate (usually issuer of signature certificate) + for (X509Certificate certificate : certInformationHelper.getCertificateSet()) + { + if (certificate.getSubjectX500Principal().equals(crl.getIssuerX500Principal())) + { + issuerCertificate = certificate; + break; + } + } + crl.verify(issuerCertificate.getPublicKey(), SecurityProvider.getProvider().getName()); + CRLVerifier.checkRevocation(crl, certInfo.getCertificate(), signDate.getTime(), certInfo.getCrlUrl()); + COSStream crlStream = writeDataToStream(crl.getEncoded()); + crls.add(crlStream); + if (correspondingCRLs != null) + { + correspondingCRLs.add(crlStream); + + byte[] signatureHash; + try + { + signatureHash = MessageDigest.getInstance("SHA-1").digest(crl.getSignature()); + } + catch (NoSuchAlgorithmException ex) + { + throw new CertificateVerificationException(ex.getMessage(), ex); + } + String signatureHashHex = Hex.getString(signatureHash); + + if (!vriBase.containsKey(signatureHashHex)) + { + COSArray savedCorrespondingOCSPs = correspondingOCSPs; + COSArray savedCorrespondingCRLs = correspondingCRLs; + + COSDictionary vri = new COSDictionary(); + vriBase.setItem(signatureHashHex, vri); + + CertSignatureInformation crlCertInfo; + try + { + crlCertInfo = certInformationHelper.getCertInfo(issuerCertificate); + } + catch (CertificateProccessingException ex) + { + throw new CertificateVerificationException(ex.getMessage(), ex); + } + + updateVRI(crlCertInfo, vri); + + correspondingOCSPs = savedCorrespondingOCSPs; + correspondingCRLs = savedCorrespondingCRLs; + } + } + foundRevocationInformation.add(certInfo.getCertificate()); + } + + private void updateVRI(CertSignatureInformation certInfo, COSDictionary vri) throws IOException + { + if (certInfo.getCertificate().getExtensionValue(OCSPObjectIdentifiers.id_pkix_ocsp_nocheck.getId()) == null) + { + correspondingOCSPs = new COSArray(); + correspondingCRLs = new COSArray(); + addRevocationDataRecursive(certInfo); + if (correspondingOCSPs.size() > 0) + { + vri.setItem("OCSP", correspondingOCSPs); + } + if (correspondingCRLs.size() > 0) + { + vri.setItem("CRL", correspondingCRLs); + } + } + + COSArray correspondingCerts = new COSArray(); + CertSignatureInformation ci = certInfo; + do + { + X509Certificate cert = ci.getCertificate(); + try + { + COSStream certStream = writeDataToStream(cert.getEncoded()); + correspondingCerts.add(certStream); + certs.add(certStream); // may lead to duplicate certificates. Important? + } + catch (CertificateEncodingException ex) + { + // should not happen because these are existing certificates + LOG.error(ex, ex); + } + + if (cert.getExtensionValue(OCSPObjectIdentifiers.id_pkix_ocsp_nocheck.getId()) != null) + { + break; + } + ci = ci.getCertChain(); + } + while (ci != null); + vri.setItem(COSName.CERT, correspondingCerts); + + vri.setDate(COSName.TU, Calendar.getInstance()); + } + + /** + * Adds all certs to the certs-array. Make sure, all certificates are inside the + * certificateStore of certInformationHelper + * + * @throws IOException + */ + private void addAllCertsToCertArray() throws IOException + { + try + { + for (X509Certificate cert : certInformationHelper.getCertificateSet()) + { + COSStream stream = writeDataToStream(cert.getEncoded()); + certs.add(stream); + } + } + catch (CertificateEncodingException e) + { + throw new IOException(e); + } + } + + /** + * Creates a Flate encoded COSStream object with the given data. + * + * @param data to write into the COSStream + * @return COSStream a COSStream object that can be added to the document + * @throws IOException + */ + private COSStream writeDataToStream(byte[] data) throws IOException + { + COSStream stream = document.getDocument().createCOSStream(); + OutputStream os = null; + try + { + os = stream.createOutputStream(COSName.FLATE_DECODE); + os.write(data); + } + finally + { + IOUtils.closeQuietly(os); + } + return stream; + } + + /** + * Adds Extensions to the document catalog. So that the use of DSS is identified. Described in + * PAdES Part 4, Chapter 4.4. + * + * @param catalog to add Extensions into + */ + private void addExtensions(PDDocumentCatalog catalog) + { + COSDictionary dssExtensions = new COSDictionary(); + dssExtensions.setDirect(true); + catalog.getCOSObject().setItem("Extensions", dssExtensions); + + COSDictionary adbeExtension = new COSDictionary(); + adbeExtension.setDirect(true); + dssExtensions.setItem("ADBE", adbeExtension); + + adbeExtension.setName("BaseVersion", "1.7"); + adbeExtension.setInt("ExtensionLevel", 5); + + catalog.setVersion("1.7"); + } + + public static void main(String[] args) throws IOException, GeneralSecurityException + { + if (args.length != 1) + { + usage(); + System.exit(1); + } + + // register BouncyCastle provider, needed for "exotic" algorithms + Security.addProvider(SecurityProvider.getProvider()); + + // add ocspInformation + AddValidationInformation addOcspInformation = new AddValidationInformation(); + + File inFile = new File(args[0]); + String name = inFile.getName(); + String substring = name.substring(0, name.lastIndexOf('.')); + + File outFile = new File(inFile.getParent(), substring + "_LTV.pdf"); + addOcspInformation.validateSignature(inFile, outFile); + } + + private static void usage() + { + System.err.println("usage: java " + AddValidationInformation.class.getName() + " " + + "\n"); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertInformationCollector.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertInformationCollector.java new file mode 100644 index 00000000000..0ca013f0782 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertInformationCollector.java @@ -0,0 +1,474 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.examples.signature.validation; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.security.GeneralSecurityException; +import java.security.cert.CertificateException; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.examples.signature.cert.CertificateVerifier; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.encryption.SecurityProvider; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.bouncycastle.asn1.ASN1Encodable; +import org.bouncycastle.asn1.ASN1Object; +import org.bouncycastle.asn1.cms.Attribute; +import org.bouncycastle.asn1.cms.AttributeTable; +import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers; +import org.bouncycastle.asn1.x509.Extension; +import org.bouncycastle.cert.X509CertificateHolder; +import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter; +import org.bouncycastle.cms.CMSException; +import org.bouncycastle.cms.CMSSignedData; +import org.bouncycastle.cms.SignerInformation; +import org.bouncycastle.util.Store; + +/** + * This class helps to extract data/information from a signature. The information is held in + * CertSignatureInformation. Some information is needed for validation processing of the + * participating certificates. + * + * @author Alexis Suter + * + */ +public class CertInformationCollector +{ + private static final Log LOG = LogFactory.getLog(CertInformationCollector.class); + + private static final int MAX_CERTIFICATE_CHAIN_DEPTH = 5; + + private final Set certificateSet = new HashSet(); + private final Set urlSet = new HashSet(); + + private final JcaX509CertificateConverter certConverter = new JcaX509CertificateConverter(); + + private CertSignatureInformation rootCertInfo; + + /** + * Gets the certificate information of a signature. + * + * @param signature the signature of the document. + * @param fileName of the document. + * @return the CertSignatureInformation containing all certificate information + * @throws CertificateProccessingException when there is an error processing the certificates + * @throws IOException on a data processing error + */ + public CertSignatureInformation getLastCertInfo(PDSignature signature, String fileName) + throws CertificateProccessingException, IOException + { + FileInputStream documentInput = null; + try + { + documentInput = new FileInputStream(fileName); + byte[] signatureContent = signature.getContents(documentInput); + return getCertInfo(signatureContent); + } + finally + { + IOUtils.closeQuietly(documentInput); + } + } + + /** + * Processes one signature and its including certificates. + * + * @param signatureContent the byte[]-Content of the signature + * @return the CertSignatureInformation for this signature + * @throws IOException + * @throws CertificateProccessingException + */ + private CertSignatureInformation getCertInfo(byte[] signatureContent) + throws CertificateProccessingException, IOException + { + rootCertInfo = new CertSignatureInformation(); + + rootCertInfo.signatureHash = CertInformationHelper.getSha1Hash(signatureContent); + + try + { + CMSSignedData signedData = new CMSSignedData(signatureContent); + SignerInformation signerInformation = processSignerStore(signedData, rootCertInfo); + addTimestampCerts(signerInformation); + } + catch (CMSException e) + { + LOG.error("Error occurred getting Certificate Information from Signature", e); + throw new CertificateProccessingException(e); + } + return rootCertInfo; + } + + /** + * Processes an embedded signed timestamp, that has been placed into a signature. The + * certificates and its chain(s) will be processed the same way as the signature itself. + * + * @param signerInformation of the signature, to get unsigned attributes from it. + * @throws IOException + * @throws CertificateProccessingException + */ + private void addTimestampCerts(SignerInformation signerInformation) + throws IOException, CertificateProccessingException + { + AttributeTable unsignedAttributes = signerInformation.getUnsignedAttributes(); + if (unsignedAttributes == null) + { + return; + } + Attribute tsAttribute = unsignedAttributes + .get(PKCSObjectIdentifiers.id_aa_signatureTimeStampToken); + if (tsAttribute == null) + { + return; + } + ASN1Encodable obj0 = tsAttribute.getAttrValues().getObjectAt(0); + if (!(obj0 instanceof ASN1Object)) + { + return; + } + ASN1Object tsSeq = (ASN1Object) obj0; + + try + { + CMSSignedData signedData = new CMSSignedData(tsSeq.getEncoded("DER")); + rootCertInfo.tsaCerts = new CertSignatureInformation(); + processSignerStore(signedData, rootCertInfo.tsaCerts); + } + catch (CMSException e) + { + throw new IOException("Error parsing timestamp token", e); + } + } + + /** + * Processes a signer store and goes through the signers certificate-chain. Adds the found data + * to the certInfo. Handles only the first signer, although multiple would be possible, but is + * not yet practicable. + * + * @param signedData data from which to get the SignerInformation + * @param certInfo where to add certificate information + * @return Signer Information of the processed certificatesStore for further usage. + * @throws IOException on data-processing error + * @throws CertificateProccessingException on a specific error with a certificate + */ + private SignerInformation processSignerStore( + CMSSignedData signedData, CertSignatureInformation certInfo) + throws IOException, CertificateProccessingException + { + Collection signers = signedData.getSignerInfos().getSigners(); + SignerInformation signerInformation = signers.iterator().next(); + + Store certificatesStore = signedData.getCertificates(); + @SuppressWarnings("unchecked") + Collection matches = + certificatesStore.getMatches(signerInformation.getSID()); + + X509Certificate certificate = getCertFromHolder(matches.iterator().next()); + certificateSet.add(certificate); + + Collection allCerts = certificatesStore.getMatches(null); + addAllCerts(allCerts); + traverseChain(certificate, certInfo, MAX_CERTIFICATE_CHAIN_DEPTH); + return signerInformation; + } + + /** + * Traverse through the Cert-Chain of the given Certificate and add it to the CertInfo + * recursively. + * + * @param certificate Actual Certificate to be processed + * @param certInfo where to add the Certificate (and chain) information + * @param maxDepth Max depth from this point to go through CertChain (could be infinite) + * @throws IOException on data-processing error + * @throws CertificateProccessingException on a specific error with a certificate + */ + private void traverseChain(X509Certificate certificate, CertSignatureInformation certInfo, + int maxDepth) throws IOException, CertificateProccessingException + { + certInfo.certificate = certificate; + + // Certificate Authority Information Access + // As described in https://tools.ietf.org/html/rfc3280.html#section-4.2.2.1 + byte[] authorityExtensionValue = certificate.getExtensionValue(Extension.authorityInfoAccess.getId()); + if (authorityExtensionValue != null) + { + CertInformationHelper.getAuthorityInfoExtensionValue(authorityExtensionValue, certInfo); + } + + if (certInfo.issuerUrl != null) + { + getAlternativeIssuerCertificate(certInfo, maxDepth); + } + + // As described in https://tools.ietf.org/html/rfc3280.html#section-4.2.1.14 + byte[] crlExtensionValue = certificate.getExtensionValue(Extension.cRLDistributionPoints.getId()); + if (crlExtensionValue != null) + { + certInfo.crlUrl = CertInformationHelper.getCrlUrlFromExtensionValue(crlExtensionValue); + } + + try + { + certInfo.isSelfSigned = CertificateVerifier.isSelfSigned(certificate); + } + catch (GeneralSecurityException ex) + { + throw new CertificateProccessingException(ex); + } + if (maxDepth <= 0 || certInfo.isSelfSigned) + { + return; + } + + for (X509Certificate issuer : certificateSet) + { + try + { + certificate.verify(issuer.getPublicKey(), SecurityProvider.getProvider().getName()); + LOG.info("Found the right Issuer Cert! for Cert: " + certificate.getSubjectX500Principal() + + "\n" + issuer.getSubjectX500Principal()); + certInfo.issuerCertificate = issuer; + certInfo.certChain = new CertSignatureInformation(); + traverseChain(issuer, certInfo.certChain, maxDepth - 1); + break; + } + catch (GeneralSecurityException ex) + { + // not the issuer + } + } + if (certInfo.issuerCertificate == null) + { + throw new IOException( + "No Issuer Certificate found for Cert: '" + + certificate.getSubjectX500Principal() + "', i.e. Cert '" + + certificate.getIssuerX500Principal() + "' is missing in the chain"); + } + } + + /** + * Get alternative certificate chain, from the Authority Information (a url). If the chain is + * not included in the signature, this is the main chain. Otherwise there might be a second + * chain. Exceptions which happen on this chain will be logged and ignored, because the cert + * might not be available at the time or other reasons. + * + * @param certInfo base Certificate Information, on which to put the alternative Certificate + * @param maxDepth Maximum depth to dig through the chain from here on. + * @throws CertificateProccessingException on a specific error with a certificate + */ + private void getAlternativeIssuerCertificate(CertSignatureInformation certInfo, int maxDepth) + throws CertificateProccessingException + { + if (urlSet.contains(certInfo.issuerUrl)) + { + return; + } + urlSet.add(certInfo.issuerUrl); + LOG.info("Get alternative issuer certificate from: " + certInfo.issuerUrl); + try + { + URL certUrl = new URL(certInfo.issuerUrl); + CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); + InputStream in = certUrl.openStream(); + + X509Certificate altIssuerCert = (X509Certificate) certFactory.generateCertificate(in); + certificateSet.add(altIssuerCert); + + certInfo.alternativeCertChain = new CertSignatureInformation(); + traverseChain(altIssuerCert, certInfo.alternativeCertChain, maxDepth - 1); + in.close(); + } + catch (IOException e) + { + LOG.error("Error getting alternative issuer certificate from " + certInfo.issuerUrl, e); + } + catch (CertificateException e) + { + LOG.error("Error getting alternative issuer certificate from " + certInfo.issuerUrl, e); + } + } + + /** + * Gets the X509Certificate out of the X509CertificateHolder. + * + * @param certificateHolder to get the certificate from + * @return a X509Certificate or null when there was an Error with the Certificate + * @throws CertificateProccessingException on failed conversion from X509CertificateHolder to + * X509Certificate + */ + private X509Certificate getCertFromHolder(X509CertificateHolder certificateHolder) + throws CertificateProccessingException + { + try + { + return certConverter.getCertificate(certificateHolder); + } + catch (CertificateException e) + { + LOG.error("Certificate Exception getting Certificate from certHolder.", e); + throw new CertificateProccessingException(e); + } + } + + /** + * Adds multiple Certificates out of a Collection of X509CertificateHolder into certificateSet. + * + * @param certHolders Collection of X509CertificateHolder + */ + private void addAllCerts(Collection certHolders) + { + for (X509CertificateHolder certificateHolder : certHolders) + { + try + { + X509Certificate certificate = getCertFromHolder(certificateHolder); + certificateSet.add(certificate); + } + catch (CertificateProccessingException e) + { + LOG.warn("Certificate Exception getting Certificate from certHolder.", e); + } + } + } + + /** + * Gets a list of X509Certificate out of an array of X509CertificateHolder. The certificates + * will be added to certificateSet. + * + * @param certHolders Array of X509CertificateHolder + * @throws CertificateProccessingException when one of the Certificates could not be parsed. + */ + public void addAllCertsFromHolders(X509CertificateHolder[] certHolders) + throws CertificateProccessingException + { + addAllCerts(Arrays.asList(certHolders)); + } + + /** + * Traverse a certificate. + * + * @param certificate + * @return + * @throws CertificateProccessingException + */ + CertSignatureInformation getCertInfo(X509Certificate certificate) throws CertificateProccessingException + { + try + { + CertSignatureInformation certSignatureInformation = new CertSignatureInformation(); + traverseChain(certificate, certSignatureInformation, MAX_CERTIFICATE_CHAIN_DEPTH); + return certSignatureInformation; + } + catch (IOException ex) + { + throw new CertificateProccessingException(ex); + } + } + + /** + * Get the set of all processed certificates until now. + * + * @return a set of serial numbers to certificates. + */ + public Set getCertificateSet() + { + return certificateSet; + } + + /** + * Data class to hold Signature, Certificate (and its chain(s)) and revocation Information + */ + public static class CertSignatureInformation + { + private X509Certificate certificate; + private String signatureHash; + private boolean isSelfSigned = false; + private String ocspUrl; + private String crlUrl; + private String issuerUrl; + private X509Certificate issuerCertificate; + private CertSignatureInformation certChain; + private CertSignatureInformation tsaCerts; + private CertSignatureInformation alternativeCertChain; + + public String getOcspUrl() + { + return ocspUrl; + } + + public void setOcspUrl(String ocspUrl) + { + this.ocspUrl = ocspUrl; + } + + public void setIssuerUrl(String issuerUrl) + { + this.issuerUrl = issuerUrl; + } + + public String getCrlUrl() + { + return crlUrl; + } + + public X509Certificate getCertificate() + { + return certificate; + } + + public boolean isSelfSigned() + { + return isSelfSigned; + } + + public X509Certificate getIssuerCertificate() + { + return issuerCertificate; + } + + public String getSignatureHash() + { + return signatureHash; + } + + public CertSignatureInformation getCertChain() + { + return certChain; + } + + public CertSignatureInformation getTsaCerts() + { + return tsaCerts; + } + + public CertSignatureInformation getAlternativeCertChain() + { + return alternativeCertChain; + } + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertInformationHelper.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertInformationHelper.java new file mode 100644 index 00000000000..112708a7210 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertInformationHelper.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.signature.validation; + +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Enumeration; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.examples.signature.validation.CertInformationCollector.CertSignatureInformation; +import org.apache.pdfbox.util.Hex; +import org.bouncycastle.asn1.ASN1Encodable; +import org.bouncycastle.asn1.ASN1OctetString; +import org.bouncycastle.asn1.ASN1Sequence; +import org.bouncycastle.asn1.ASN1TaggedObject; +import org.bouncycastle.asn1.x509.GeneralName; +import org.bouncycastle.asn1.x509.X509ObjectIdentifiers; +import org.bouncycastle.cert.jcajce.JcaX509ExtensionUtils; + +public class CertInformationHelper +{ + private static final Log LOG = LogFactory.getLog(CertInformationHelper.class); + + private CertInformationHelper() + { + } + + /** + * Gets the SHA-1-Hash has of given byte[]-content. + * + * @param content to be hashed + * @return SHA-1 hash String + */ + protected static String getSha1Hash(byte[] content) + { + try + { + MessageDigest md = MessageDigest.getInstance("SHA-1"); + return Hex.getString(md.digest(content)); + } + catch (NoSuchAlgorithmException e) + { + LOG.error("No SHA-1 Algorithm found", e); + } + return null; + } + + /** + * Extracts authority information access extension values from the given data. The Data + * structure has to be implemented as described in RFC 2459, 4.2.2.1. + * + * @param extensionValue byte[] of the extension value. + * @param certInfo where to put the found values + * @throws IOException when there is a problem with the extensionValue + */ + protected static void getAuthorityInfoExtensionValue(byte[] extensionValue, + CertSignatureInformation certInfo) throws IOException + { + ASN1Sequence asn1Seq = (ASN1Sequence) JcaX509ExtensionUtils.parseExtensionValue(extensionValue); + Enumeration objects = asn1Seq.getObjects(); + while (objects.hasMoreElements()) + { + // AccessDescription + ASN1Sequence obj = (ASN1Sequence) objects.nextElement(); + ASN1Encodable oid = obj.getObjectAt(0); + // accessLocation + ASN1TaggedObject location = (ASN1TaggedObject) obj.getObjectAt(1); + + if (X509ObjectIdentifiers.id_ad_ocsp.equals(oid) + && location.getTagNo() == GeneralName.uniformResourceIdentifier) + { + ASN1OctetString url = (ASN1OctetString) location.getObject(); + certInfo.setOcspUrl(new String(url.getOctets())); + } + else if (X509ObjectIdentifiers.id_ad_caIssuers.equals(oid)) + { + ASN1OctetString uri = (ASN1OctetString) location.getObject(); + certInfo.setIssuerUrl(new String(uri.getOctets())); + } + } + } + + /** + * Gets the first CRL URL from given extension value. Structure has to be + * built as in 4.2.1.14 CRL Distribution Points of RFC 2459. + * + * @param extensionValue to get the extension value from + * @return first CRL- URL or null + * @throws IOException when there is a problem with the extensionValue + */ + protected static String getCrlUrlFromExtensionValue(byte[] extensionValue) throws IOException + { + ASN1Sequence asn1Seq = (ASN1Sequence) JcaX509ExtensionUtils.parseExtensionValue(extensionValue); + Enumeration objects = asn1Seq.getObjects(); + + while (objects.hasMoreElements()) + { + Object obj = objects.nextElement(); + if (obj instanceof ASN1Sequence) + { + String url = extractCrlUrlFromSequence((ASN1Sequence) obj); + if (url != null) + { + return url; + } + } + } + return null; + } + + private static String extractCrlUrlFromSequence(ASN1Sequence sequence) + { + ASN1TaggedObject taggedObject = (ASN1TaggedObject) sequence.getObjectAt(0); + taggedObject = (ASN1TaggedObject) taggedObject.getObject(); + if (taggedObject.getObject() instanceof ASN1TaggedObject) + { + taggedObject = (ASN1TaggedObject) taggedObject.getObject(); + } + else if (taggedObject.getObject() instanceof ASN1Sequence) + { + // multiple URLs (we take the first) + ASN1Sequence seq = (ASN1Sequence) taggedObject.getObject(); + if (seq.getObjectAt(0) instanceof ASN1TaggedObject) + { + taggedObject = (ASN1TaggedObject) seq.getObjectAt(0); + } + else + { + return null; + } + } + else + { + return null; + } + if (taggedObject.getObject() instanceof ASN1OctetString) + { + ASN1OctetString uri = (ASN1OctetString) taggedObject.getObject(); + String url = new String(uri.getOctets()); + + // return first http(s)-Url for crl + if (url.startsWith("http")) + { + return url; + } + } + // else happens with http://blogs.adobe.com/security/SampleSignedPDFDocument.pdf + return null; + } +} \ No newline at end of file diff --git a/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertificateProccessingException.java b/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertificateProccessingException.java new file mode 100644 index 00000000000..96399c6f769 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/signature/validation/CertificateProccessingException.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.signature.validation; + +/** + * Class to wrap around Certificate Processing exceptions + * + * @author Alexis Suter + */ +public class CertificateProccessingException extends Exception +{ + private static final long serialVersionUID = 814859842830313903L; + + public CertificateProccessingException(Throwable cause) + { + super(cause); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/AddWatermarkText.java b/examples/src/main/java/org/apache/pdfbox/examples/util/AddWatermarkText.java new file mode 100644 index 00000000000..ff8aa588631 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/AddWatermarkText.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.util; + +import java.awt.Color; +import java.io.File; +import java.io.IOException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.util.Matrix; + +/** + * Add a diagonal watermark text to each page of a PDF. + * + * @author Tilman Hausherr + */ +public class AddWatermarkText +{ + private AddWatermarkText() + { + } + + public static void main(String[] args) throws IOException + { + if (args.length != 3) + { + usage(); + } + else + { + File srcFile = new File(args[0]); + File dstFile = new File(args[1]); + String text = args[2]; + + PDDocument doc = PDDocument.load(srcFile); + for (PDPage page : doc.getPages()) + { + PDFont font = PDType1Font.HELVETICA; + addWatermarkText(doc, page, font, text); + } + doc.save(dstFile); + doc.close(); + } + } + + private static void addWatermarkText(PDDocument doc, PDPage page, PDFont font, String text) + throws IOException + { + PDPageContentStream cs + = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true, true); + + float fontHeight = 100; // arbitrary for short text + float width = page.getMediaBox().getWidth(); + float height = page.getMediaBox().getHeight(); + + int rotation = page.getRotation(); + switch (rotation) + { + case 90: + width = page.getMediaBox().getHeight(); + height = page.getMediaBox().getWidth(); + cs.transform(Matrix.getRotateInstance(Math.toRadians(90), height, 0)); + break; + case 180: + cs.transform(Matrix.getRotateInstance(Math.toRadians(180), width, height)); + break; + case 270: + width = page.getMediaBox().getHeight(); + height = page.getMediaBox().getWidth(); + cs.transform(Matrix.getRotateInstance(Math.toRadians(270), 0, width)); + break; + default: + break; + } + + float stringWidth = font.getStringWidth(text) / 1000 * fontHeight; + float diagonalLength = (float) Math.sqrt(width * width + height * height); + float angle = (float) Math.atan2(height, width); + float x = (diagonalLength - stringWidth) / 2; // "horizontal" position in rotated world + float y = -fontHeight / 4; // 4 is a trial-and-error thing, this lowers the text a bit + cs.transform(Matrix.getRotateInstance(angle, 0, 0)); + cs.setFont(font, fontHeight); + // cs.setRenderingMode(RenderingMode.STROKE) // for "hollow" effect + + PDExtendedGraphicsState gs = new PDExtendedGraphicsState(); + gs.setNonStrokingAlphaConstant(0.2f); + gs.setStrokingAlphaConstant(0.2f); + gs.setBlendMode(BlendMode.MULTIPLY); + gs.setLineWidth(3f); + cs.setGraphicsStateParameters(gs); + + cs.setNonStrokingColor(Color.red); + cs.setStrokingColor(Color.red); + + cs.beginText(); + cs.newLineAtOffset(x, y); + cs.showText(text); + cs.endText(); + cs.close(); + } + + /** + * This will print the usage. + */ + private static void usage() + { + System.err.println("Usage: java " + AddWatermarkText.class.getName() + " "); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/DrawPrintTextLocations.java b/examples/src/main/java/org/apache/pdfbox/examples/util/DrawPrintTextLocations.java index 26c9ff4696a..584ef4e1030 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/util/DrawPrintTextLocations.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/DrawPrintTextLocations.java @@ -35,13 +35,20 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDCIDFontType2; import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDSimpleFont; +import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont; +import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.font.PDType3CharProc; import org.apache.pdfbox.pdmodel.font.PDType3Font; +import org.apache.pdfbox.pdmodel.font.PDVectorFont; import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead; import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; import org.apache.pdfbox.util.Matrix; +import org.apache.pdfbox.util.Vector; /** * This is an example on how to get some x/y coordinates of text and to show them in a rendered @@ -52,11 +59,12 @@ */ public class DrawPrintTextLocations extends PDFTextStripper { - private BufferedImage image; + private AffineTransform flipAT; + private AffineTransform rotateAT; + private AffineTransform transAT; private final String filename; static final int SCALE = 4; private Graphics2D g2d; - private final PDDocument document; /** * Instantiate a new PDFTextStripper object. @@ -67,7 +75,7 @@ public class DrawPrintTextLocations extends PDFTextStripper */ public DrawPrintTextLocations(PDDocument document, String filename) throws IOException { - this.document = document; + this.document = document; // must initialize here, base class initializes too late this.filename = filename; } @@ -109,14 +117,137 @@ public static void main(String[] args) throws IOException } } + @Override + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, Vector displacement) + throws IOException + { + super.showGlyph(textRenderingMatrix, font, code, displacement); + + // in cyan: + // show actual glyph bounds. This must be done here and not in writeString(), + // because writeString processes only the glyphs with unicode, + // see e.g. the file in PDFBOX-3274 + Shape cyanShape = calculateGlyphBounds(textRenderingMatrix, font, code); + + if (cyanShape != null) + { + cyanShape = flipAT.createTransformedShape(cyanShape); + cyanShape = rotateAT.createTransformedShape(cyanShape); + cyanShape = transAT.createTransformedShape(cyanShape); + + g2d.setColor(Color.CYAN); + g2d.draw(cyanShape); + } + } + + // this calculates the real (except for type 3 fonts) individual glyph bounds + private Shape calculateGlyphBounds(Matrix textRenderingMatrix, PDFont font, int code) throws IOException + { + GeneralPath path = null; + AffineTransform at = textRenderingMatrix.createAffineTransform(); + at.concatenate(font.getFontMatrix().createAffineTransform()); + if (font instanceof PDType3Font) + { + // It is difficult to calculate the real individual glyph bounds for type 3 fonts + // because these are not vector fonts, the content stream could contain almost anything + // that is found in page content streams. + PDType3Font t3Font = (PDType3Font) font; + PDType3CharProc charProc = t3Font.getCharProc(code); + if (charProc != null) + { + BoundingBox fontBBox = t3Font.getBoundingBox(); + PDRectangle glyphBBox = charProc.getGlyphBBox(); + if (glyphBBox != null) + { + // PDFBOX-3850: glyph bbox could be larger than the font bbox + glyphBBox.setLowerLeftX(Math.max(fontBBox.getLowerLeftX(), glyphBBox.getLowerLeftX())); + glyphBBox.setLowerLeftY(Math.max(fontBBox.getLowerLeftY(), glyphBBox.getLowerLeftY())); + glyphBBox.setUpperRightX(Math.min(fontBBox.getUpperRightX(), glyphBBox.getUpperRightX())); + glyphBBox.setUpperRightY(Math.min(fontBBox.getUpperRightY(), glyphBBox.getUpperRightY())); + path = glyphBBox.toGeneralPath(); + } + } + } + else if (font instanceof PDVectorFont) + { + PDVectorFont vectorFont = (PDVectorFont) font; + path = vectorFont.getPath(code); + + if (font instanceof PDTrueTypeFont) + { + PDTrueTypeFont ttFont = (PDTrueTypeFont) font; + int unitsPerEm = ttFont.getTrueTypeFont().getHeader().getUnitsPerEm(); + at.scale(1000d / unitsPerEm, 1000d / unitsPerEm); + } + if (font instanceof PDType0Font) + { + PDType0Font t0font = (PDType0Font) font; + if (t0font.getDescendantFont() instanceof PDCIDFontType2) + { + int unitsPerEm = ((PDCIDFontType2) t0font.getDescendantFont()).getTrueTypeFont().getHeader().getUnitsPerEm(); + at.scale(1000d / unitsPerEm, 1000d / unitsPerEm); + } + } + } + else if (font instanceof PDSimpleFont) + { + PDSimpleFont simpleFont = (PDSimpleFont) font; + + // these two lines do not always work, e.g. for the TT fonts in file 032431.pdf + // which is why PDVectorFont is tried first. + String name = simpleFont.getEncoding().getName(code); + path = simpleFont.getPath(name); + } + else + { + // shouldn't happen, please open issue in JIRA + System.out.println("Unknown font class: " + font.getClass()); + } + if (path == null) + { + return null; + } + return at.createTransformedShape(path.getBounds2D()); + } + private void stripPage(int page) throws IOException { PDFRenderer pdfRenderer = new PDFRenderer(document); - image = pdfRenderer.renderImage(page, SCALE); - + BufferedImage image = pdfRenderer.renderImage(page, SCALE); PDPage pdPage = document.getPage(page); PDRectangle cropBox = pdPage.getCropBox(); + // flip y-axis + flipAT = new AffineTransform(); + flipAT.translate(0, pdPage.getBBox().getHeight()); + flipAT.scale(1, -1); + + // page may be rotated + rotateAT = new AffineTransform(); + int rotation = pdPage.getRotation(); + if (rotation != 0) + { + PDRectangle mediaBox = pdPage.getMediaBox(); + switch (rotation) + { + case 90: + rotateAT.translate(mediaBox.getHeight(), 0); + break; + case 270: + rotateAT.translate(0, mediaBox.getWidth()); + break; + case 180: + rotateAT.translate(mediaBox.getWidth(), mediaBox.getHeight()); + break; + default: + break; + } + rotateAT.rotate(Math.toRadians(rotation)); + } + + // cropbox + transAT = AffineTransform.getTranslateInstance(-cropBox.getLowerLeftX(), cropBox.getLowerLeftY()); + g2d = image.createGraphics(); g2d.setStroke(new BasicStroke(0.1f)); g2d.scale(SCALE, SCALE); @@ -132,12 +263,14 @@ private void stripPage(int page) throws IOException List pageArticles = pdPage.getThreadBeads(); for (PDThreadBead bead : pageArticles) { + if (bead == null) + { + continue; + } PDRectangle r = bead.getRectangle(); - GeneralPath p = r.transform(Matrix.getTranslateInstance(-cropBox.getLowerLeftX(), cropBox.getLowerLeftY())); - AffineTransform flip = new AffineTransform(); - flip.translate(0, pdPage.getBBox().getHeight()); - flip.scale(1, -1); - Shape s = flip.createTransformedShape(p); + Shape s = r.toGeneralPath().createTransformedShape(transAT); + s = flipAT.createTransformedShape(s); + s = rotateAT.createTransformedShape(s); g2d.setColor(Color.green); g2d.draw(s); } @@ -164,16 +297,21 @@ protected void writeString(String string, List textPositions) thro + text.getWidthOfSpace() + " width=" + text.getWidthDirAdj() + "]" + text.getUnicode()); + // glyph space -> user space + // note: text.getTextMatrix() is *not* the Text Matrix, it's the Text Rendering Matrix + AffineTransform at = text.getTextMatrix().createAffineTransform(); + // in red: // show rectangles with the "height" (not a real height, but used for text extraction // heuristics, it is 1/2 of the bounding box height and starts at y=0) - Rectangle2D.Float rect = new Rectangle2D.Float( - text.getXDirAdj(), - (text.getYDirAdj() - text.getHeightDir()), - text.getWidthDirAdj(), - text.getHeightDir()); + Rectangle2D.Float rect = new Rectangle2D.Float(0, 0, + text.getWidthDirAdj() / text.getTextMatrix().getScalingFactorX(), + text.getHeightDir() / text.getTextMatrix().getScalingFactorY()); + Shape s = at.createTransformedShape(rect); + s = flipAT.createTransformedShape(s); + s = rotateAT.createTransformedShape(s); g2d.setColor(Color.red); - g2d.draw(rect); + g2d.draw(s); // in blue: // show rectangle with the real vertical bounds, based on the font bounding box y values @@ -185,9 +323,6 @@ protected void writeString(String string, List textPositions) thro float xadvance = font.getWidth(text.getCharacterCodes()[0]); // todo: should iterate all chars rect = new Rectangle2D.Float(0, bbox.getLowerLeftY(), xadvance, bbox.getHeight()); - // glyph space -> user space - // note: text.getTextMatrix() is *not* the Text Matrix, it's the Text Rendering Matrix - AffineTransform at = text.getTextMatrix().createAffineTransform(); if (font instanceof PDType3Font) { // bbox and font matrix are unscaled @@ -198,41 +333,12 @@ protected void writeString(String string, List textPositions) thro // bbox and font matrix are already scaled to 1000 at.scale(1/1000f, 1/1000f); } - Shape s = at.createTransformedShape(rect); + s = at.createTransformedShape(rect); + s = flipAT.createTransformedShape(s); + s = rotateAT.createTransformedShape(s); - // flip y-axis - AffineTransform flip = new AffineTransform(); - flip.translate(0, getCurrentPage().getBBox().getHeight()); - flip.scale(1, -1); - s = flip.createTransformedShape(s); - - AffineTransform transform = g2d.getTransform(); - int rotation = getCurrentPage().getRotation(); - if (rotation != 0) - { - PDRectangle mediaBox = getCurrentPage().getMediaBox(); - switch (rotation) - { - case 90: - g2d.translate(mediaBox.getHeight(), 0); - break; - case 270: - g2d.translate(0, mediaBox.getWidth()); - break; - case 180: - g2d.translate(mediaBox.getWidth(), mediaBox.getHeight()); - break; - default: - break; - } - g2d.rotate(Math.toRadians(rotation)); - } g2d.setColor(Color.blue); g2d.draw(s); - if (rotation != 0) - { - g2d.setTransform(transform); - } } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/ExtractTextSimple.java b/examples/src/main/java/org/apache/pdfbox/examples/util/ExtractTextSimple.java new file mode 100644 index 00000000000..6561522cdbf --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/ExtractTextSimple.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.util; + +import java.io.File; +import java.io.IOException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.encryption.AccessPermission; +import org.apache.pdfbox.text.PDFTextStripper; + +/** + * This is a simple text extraction example to get started. For more advance usage, see the + * ExtractTextByArea and the DrawPrintTextLocations examples in this subproject, as well as the + * ExtractText tool in the tools subproject. + * + * @author Tilman Hausherr + */ +public class ExtractTextSimple +{ + private ExtractTextSimple() + { + // example class should not be instantiated + } + + /** + * This will print the documents text page by page. + * + * @param args The command line arguments. + * + * @throws IOException If there is an error parsing or extracting the document. + */ + public static void main(String[] args) throws IOException + { + if (args.length != 1) + { + usage(); + } + + PDDocument document = PDDocument.load(new File(args[0])); + AccessPermission ap = document.getCurrentAccessPermission(); + if (!ap.canExtractContent()) + { + throw new IOException("You do not have permission to extract text"); + } + + PDFTextStripper stripper = new PDFTextStripper(); + + // This example uses sorting, but in some cases it is more useful to switch it off, + // e.g. in some files with columns where the PDF content stream respects the + // column order. + stripper.setSortByPosition(true); + + for (int p = 1; p <= document.getNumberOfPages(); ++p) + { + // Set the page interval to extract. If you don't, then all pages would be extracted. + stripper.setStartPage(p); + stripper.setEndPage(p); + + // let the magic happen + String text = stripper.getText(document); + + // do some nice output with a header + String pageStr = String.format("page %d:", p); + System.out.println(pageStr); + for (int i = 0; i < pageStr.length(); ++i) + { + System.out.print("-"); + } + System.out.println(); + System.out.println(text.trim()); + System.out.println(); + + // If the extracted text is empty or gibberish, please try extracting text + // with Adobe Reader first before asking for help. Also read the FAQ + // on the website: + // https://pdfbox.apache.org/2.0/faq.html#text-extraction + } + document.close(); + } + + /** + * This will print the usage for this document. + */ + private static void usage() + { + System.err.println("Usage: java " + ExtractTextSimple.class.getName() + " "); + System.exit(-1); + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/PDFHighlighter.java b/examples/src/main/java/org/apache/pdfbox/examples/util/PDFHighlighter.java index d11a518660a..4a64d2acccc 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/util/PDFHighlighter.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/PDFHighlighter.java @@ -54,7 +54,6 @@ public class PDFHighlighter extends PDFTextStripper */ public PDFHighlighter() throws IOException { - super(); super.setLineSeparator( "" ); super.setWordSeparator( "" ); super.setShouldSeparateByBeads( false ); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/PDFMergerExample.java b/examples/src/main/java/org/apache/pdfbox/examples/util/PDFMergerExample.java new file mode 100644 index 00000000000..bbfc638c545 --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/PDFMergerExample.java @@ -0,0 +1,174 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.util; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.MemoryUsageSetting; +import org.apache.pdfbox.multipdf.PDFMergerUtility; +import org.apache.pdfbox.pdmodel.PDDocumentInformation; +import org.apache.pdfbox.pdmodel.common.PDMetadata; +import org.apache.xmpbox.XMPMetadata; +import org.apache.xmpbox.schema.DublinCoreSchema; +import org.apache.xmpbox.schema.PDFAIdentificationSchema; +import org.apache.xmpbox.schema.XMPBasicSchema; +import org.apache.xmpbox.type.BadFieldValueException; +import org.apache.xmpbox.xml.XmpSerializer; + +import java.util.Calendar; +import java.util.List; +import javax.xml.transform.TransformerException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.io.IOUtils; + +/** + * + * This example demonstrates the use of the new methods {@link PDFMergerUtility#setDestinationDocumentInformation(org.apache.pdfbox.pdmodel.PDDocumentInformation) + * } and {@link PDFMergerUtility#setDestinationMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) + * } that were added in April 2016. These allow to control the meta data in a merge without having + * to reopen the result file. + * + * @author Alexander Kriegisch + */ +public class PDFMergerExample +{ + private static final Log LOG = LogFactory.getLog(PDFMergerExample.class); + + /** + * Creates a compound PDF document from a list of input documents. + *

+ * The merged document is PDF/A-1b compliant, provided the source documents are as well. It + * contains document properties title, creator and subject, currently hard-coded. + * + * @param sources list of source PDF document streams. + * @return compound PDF document as a readable input stream. + * @throws IOException if anything goes wrong during PDF merge. + */ + public InputStream merge(final List sources) throws IOException + { + String title = "My title"; + String creator = "Alexander Kriegisch"; + String subject = "Subject with umlauts ÄÖÜ"; + + ByteArrayOutputStream mergedPDFOutputStream = null; + COSStream cosStream = null; + try + { + // If you're merging in a servlet, you can modify this example to use the outputStream only + // as the response as shown here: http://stackoverflow.com/a/36894346/535646 + mergedPDFOutputStream = new ByteArrayOutputStream(); + cosStream = new COSStream(); + + PDFMergerUtility pdfMerger = createPDFMergerUtility(sources, mergedPDFOutputStream); + + // PDF and XMP properties must be identical, otherwise document is not PDF/A compliant + PDDocumentInformation pdfDocumentInfo = createPDFDocumentInfo(title, creator, subject); + PDMetadata xmpMetadata = createXMPMetadata(cosStream, title, creator, subject); + pdfMerger.setDestinationDocumentInformation(pdfDocumentInfo); + pdfMerger.setDestinationMetadata(xmpMetadata); + + LOG.info("Merging " + sources.size() + " source documents into one PDF"); + pdfMerger.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly()); + LOG.info("PDF merge successful, size = {" + mergedPDFOutputStream.size() + "} bytes"); + + return new ByteArrayInputStream(mergedPDFOutputStream.toByteArray()); + } + catch (BadFieldValueException e) + { + throw new IOException("PDF merge problem", e); + } + catch (TransformerException e) + { + throw new IOException("PDF merge problem", e); + } + finally + { + for (InputStream source : sources) + { + IOUtils.closeQuietly(source); + } + IOUtils.closeQuietly(cosStream); + IOUtils.closeQuietly(mergedPDFOutputStream); + } + } + + private PDFMergerUtility createPDFMergerUtility(List sources, ByteArrayOutputStream mergedPDFOutputStream) + { + LOG.info("Initialising PDF merge utility"); + PDFMergerUtility pdfMerger = new PDFMergerUtility(); + pdfMerger.addSources(sources); + pdfMerger.setDestinationStream(mergedPDFOutputStream); + return pdfMerger; + } + + private PDDocumentInformation createPDFDocumentInfo(String title, String creator, String subject) + { + LOG.info("Setting document info (title, author, subject) for merged PDF"); + PDDocumentInformation documentInformation = new PDDocumentInformation(); + documentInformation.setTitle(title); + documentInformation.setCreator(creator); + documentInformation.setSubject(subject); + return documentInformation; + } + + private PDMetadata createXMPMetadata(COSStream cosStream, String title, String creator, String subject) + throws BadFieldValueException, TransformerException, IOException + { + LOG.info("Setting XMP metadata (title, author, subject) for merged PDF"); + XMPMetadata xmpMetadata = XMPMetadata.createXMPMetadata(); + + // PDF/A-1b properties + PDFAIdentificationSchema pdfaSchema = xmpMetadata.createAndAddPFAIdentificationSchema(); + pdfaSchema.setPart(1); + pdfaSchema.setConformance("B"); + + // Dublin Core properties + DublinCoreSchema dublinCoreSchema = xmpMetadata.createAndAddDublinCoreSchema(); + dublinCoreSchema.setTitle(title); + dublinCoreSchema.addCreator(creator); + dublinCoreSchema.setDescription(subject); + + // XMP Basic properties + XMPBasicSchema basicSchema = xmpMetadata.createAndAddXMPBasicSchema(); + Calendar creationDate = Calendar.getInstance(); + basicSchema.setCreateDate(creationDate); + basicSchema.setModifyDate(creationDate); + basicSchema.setMetadataDate(creationDate); + basicSchema.setCreatorTool(creator); + + // Create and return XMP data structure in XML format + ByteArrayOutputStream xmpOutputStream = null; + OutputStream cosXMPStream = null; + try + { + xmpOutputStream = new ByteArrayOutputStream(); + cosXMPStream = cosStream.createOutputStream(); + new XmpSerializer().serialize(xmpMetadata, xmpOutputStream, true); + cosXMPStream.write(xmpOutputStream.toByteArray()); + return new PDMetadata(cosStream); + } + finally + { + IOUtils.closeQuietly(xmpOutputStream); + IOUtils.closeQuietly(cosXMPStream); + } + } +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java b/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java index 8cf5fb1b479..6f6b23ca080 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java @@ -26,6 +26,7 @@ import org.apache.pdfbox.util.Matrix; import org.apache.pdfbox.contentstream.operator.DrawObject; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.contentstream.PDFStreamEngine; import java.io.File; @@ -110,7 +111,7 @@ public static void main( String[] args ) throws IOException protected void processOperator( Operator operator, List operands) throws IOException { String operation = operator.getName(); - if( "Do".equals(operation) ) + if (OperatorName.DRAW_OBJECT.equals(operation)) { COSName objectName = (COSName) operands.get( 0 ); PDXObject xobject = getResources().getXObject( objectName ); diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java b/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java index d109e5e995e..ce9f2ddf259 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java @@ -16,19 +16,26 @@ */ package org.apache.pdfbox.examples.util; +import java.io.File; +import java.io.IOException; import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.contentstream.PDContentStream; +import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.contentstream.operator.Operator; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern; +import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; /** * This is an example on how to remove all text from PDF document. @@ -42,7 +49,7 @@ public final class RemoveAllText */ private RemoveAllText() { - //example class should not be instantiated + // example class should not be instantiated } /** @@ -52,60 +59,104 @@ private RemoveAllText() * * @throws IOException If there is an error parsing the document. */ - public static void main( String[] args ) throws IOException + public static void main(String[] args) throws IOException { - if( args.length != 2 ) + if (args.length != 2) { usage(); } else { - PDDocument document = null; - try + PDDocument document = PDDocument.load(new File(args[0])); + if (document.isEncrypted()) { - document = PDDocument.load( new File(args[0]) ); - if( document.isEncrypted() ) - { - System.err.println( "Error: Encrypted documents are not supported for this example." ); - System.exit( 1 ); - } - for( PDPage page : document.getPages() ) - { - PDFStreamParser parser = new PDFStreamParser(page); - parser.parse(); - List tokens = parser.getTokens(); - List newTokens = new ArrayList(); - for (Object token : tokens) - { - if( token instanceof Operator) - { - Operator op = (Operator)token; - if( op.getName().equals( "TJ") || op.getName().equals( "Tj" )) - { - //remove the one argument to this operator - newTokens.remove( newTokens.size() -1 ); - continue; - } - } - newTokens.add( token ); - } - PDStream newContents = new PDStream( document ); - OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE); - ContentStreamWriter writer = new ContentStreamWriter( out ); - writer.writeTokens( newTokens ); - out.close(); - page.setContents( newContents ); - } - document.save( args[1] ); + System.err.println( + "Error: Encrypted documents are not supported for this example."); + System.exit(1); } - finally + for (PDPage page : document.getPages()) { - if( document != null ) + List newTokens = createTokensWithoutText(page); + PDStream newContents = new PDStream(document); + writeTokensToStream(newContents, newTokens); + page.setContents(newContents); + processResources(page.getResources()); + } + document.save(args[1]); + document.close(); + } + } + + private static void processResources(PDResources resources) throws IOException + { + for (COSName name : resources.getXObjectNames()) + { + PDXObject xobject = resources.getXObject(name); + if (xobject instanceof PDFormXObject) + { + PDFormXObject formXObject = (PDFormXObject) xobject; + writeTokensToStream(formXObject.getContentStream(), + createTokensWithoutText(formXObject)); + processResources(formXObject.getResources()); + } + } + for (COSName name : resources.getPatternNames()) + { + PDAbstractPattern pattern = resources.getPattern(name); + if (pattern instanceof PDTilingPattern) + { + PDTilingPattern tilingPattern = (PDTilingPattern) pattern; + writeTokensToStream(tilingPattern.getContentStream(), + createTokensWithoutText(tilingPattern)); + processResources(tilingPattern.getResources()); + } + } + } + + private static void writeTokensToStream(PDStream newContents, List newTokens) throws IOException + { + OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE); + ContentStreamWriter writer = new ContentStreamWriter(out); + writer.writeTokens(newTokens); + out.close(); + } + + private static List createTokensWithoutText(PDContentStream contentStream) throws IOException + { + PDFStreamParser parser = new PDFStreamParser(contentStream); + Object token = parser.parseNextToken(); + List newTokens = new ArrayList(); + while (token != null) + { + if (token instanceof Operator) + { + Operator op = (Operator) token; + String opName = op.getName(); + if (OperatorName.SHOW_TEXT_ADJUSTED.equals(opName) + || OperatorName.SHOW_TEXT.equals(opName) + || OperatorName.SHOW_TEXT_LINE.equals(opName)) { - document.close(); + // remove the argument to this operator + newTokens.remove(newTokens.size() - 1); + + token = parser.parseNextToken(); + continue; + } + else if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(opName)) + { + // remove the 3 arguments to this operator + newTokens.remove(newTokens.size() - 1); + newTokens.remove(newTokens.size() - 1); + newTokens.remove(newTokens.size() - 1); + + token = parser.parseNextToken(); + continue; } } + newTokens.add(token); + token = parser.parseNextToken(); } + return newTokens; } /** @@ -113,7 +164,8 @@ public static void main( String[] args ) throws IOException */ private static void usage() { - System.err.println( "Usage: java " + RemoveAllText.class.getName() + " " ); + System.err.println( + "Usage: java " + RemoveAllText.class.getName() + " "); } } diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/SplitBooklet.java b/examples/src/main/java/org/apache/pdfbox/examples/util/SplitBooklet.java new file mode 100644 index 00000000000..a05c9ab028a --- /dev/null +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/SplitBooklet.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.util; + +import java.io.File; +import java.io.IOException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDRectangle; + +/** + * Split a booklet. Based on the discussion from + * PDFBOX-5078, see there for + * example files, more sample code, and a link to a project to create booklets. + * + * @author Tilman Hausherr + */ +public class SplitBooklet +{ + /** + * Default constructor. + */ + private SplitBooklet() + { + // example class should not be instantiated + } + + public static void main(String[] args) throws IOException + { + if (args.length < 2) + { + usage(); + System.exit(-1); + } + PDDocument document = PDDocument.load(new File(args[0])); + PDDocument outdoc = new PDDocument(); + for (PDPage page : document.getPages()) + { + PDRectangle cropBoxORIG = page.getCropBox(); + + // make sure to have new objects + PDRectangle cropBoxLEFT = new PDRectangle(cropBoxORIG.getCOSArray()); + PDRectangle cropBoxRIGHT = new PDRectangle(cropBoxORIG.getCOSArray()); + + if (page.getRotation() == 90 || page.getRotation() == 270) + { + cropBoxLEFT.setUpperRightY(cropBoxORIG.getLowerLeftY() + cropBoxORIG.getHeight() / 2); + cropBoxRIGHT.setLowerLeftY(cropBoxORIG.getLowerLeftY() + cropBoxORIG.getHeight() / 2); + } + else + { + cropBoxLEFT.setUpperRightX(cropBoxORIG.getLowerLeftX() + cropBoxORIG.getWidth() / 2); + cropBoxRIGHT.setLowerLeftX(cropBoxORIG.getLowerLeftX() + cropBoxORIG.getWidth() / 2); + } + + if (page.getRotation() == 180 || page.getRotation() == 270) + { + PDPage pageRIGHT = outdoc.importPage(page); + pageRIGHT.setCropBox(cropBoxRIGHT); + PDPage pageLEFT = outdoc.importPage(page); + pageLEFT.setCropBox(cropBoxLEFT); + } + else + { + PDPage pageLEFT = outdoc.importPage(page); + pageLEFT.setCropBox(cropBoxLEFT); + PDPage pageRIGHT = outdoc.importPage(page); + pageRIGHT.setCropBox(cropBoxRIGHT); + } + } + + outdoc.save(args[1]); + outdoc.close(); + document.close(); // must be after saving the destination document + } + + private static void usage() + { + System.err.println("Usage: java " + SplitBooklet.class.getName() + " "); + } + +} diff --git a/examples/src/main/java/org/apache/pdfbox/examples/util/package.html b/examples/src/main/java/org/apache/pdfbox/examples/util/package.html index e4abc7ac5e6..e466bb4a24e 100644 --- a/examples/src/main/java/org/apache/pdfbox/examples/util/package.html +++ b/examples/src/main/java/org/apache/pdfbox/examples/util/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB Color Space Profile.icm b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB Color Space Profile.icm deleted file mode 100644 index 7f9d18d097d..00000000000 Binary files a/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB Color Space Profile.icm and /dev/null differ diff --git a/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB Color Space Profile.icm.LICENSE.txt b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB Color Space Profile.icm.LICENSE.txt deleted file mode 100644 index 9b817e33924..00000000000 --- a/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB Color Space Profile.icm.LICENSE.txt +++ /dev/null @@ -1,14 +0,0 @@ -Obtained from: http://www.srgb.com/usingsrgb.html - -The file "sRGB Color Space Profile.icm" is: -Copyright (c) 1998 Hewlett-Packard Company - -To anyone who acknowledges that the file "sRGB Color Space Profile.icm" -is provided "AS IS" WITH NO EXPRESS OR IMPLIED WARRANTY: -permission to use, copy and distribute this file for any purpose is hereby -granted without fee, provided that the file is not changed including the HP -copyright notice tag, and that the name of Hewlett-Packard Company not be -used in advertising or publicity pertaining to distribution of the software -without specific, written prior permission. Hewlett-Packard Company makes -no representations about the suitability of this software for any purpose. - diff --git a/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc new file mode 100644 index 00000000000..6f3efbf02db Binary files /dev/null and b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc differ diff --git a/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.COPYING b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.COPYING new file mode 100644 index 00000000000..2c71be612da --- /dev/null +++ b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.COPYING @@ -0,0 +1,45 @@ +The profiles in the base directory are provided according to different licenses. + + +Group A +sRGB, LCMSLAB.ICM, LCMSXYZ.ICM, the compatibleWithAdobeRGB.icc and the +Gray.icc, CineonLog_M*.icc, CineLogCurve.icc profiles are all zlib licensed. +Even though it is highly recommended to rename them before editing. + + +Group B +The eciRGB*.icc profiles come with their license in license.rtf. + + +Group C +PhotoGamutRGB_avg6c.icc is licensed to be distributed freely. Modifications +are not allowed. + + + +Additionally all profiles come with the following disclaimer. The provided +ICC Profiles in the package are called DATA in the folling statement. + + + NO WARRANTY + + BECAUSE THE DATA IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE DATA, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE DATA "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE DATA IS WITH YOU. SHOULD THE +DATA PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE DATA AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE DATA (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE DATA TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + diff --git a/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.LICENSE-ZLIB b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.LICENSE-ZLIB new file mode 100644 index 00000000000..3b357a1d687 --- /dev/null +++ b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.LICENSE-ZLIB @@ -0,0 +1,23 @@ +The zlib/libpng License + +Copyright (c) 2008 Kai-Uwe Behrmann + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. + diff --git a/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.README b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.README new file mode 100644 index 00000000000..7c29edcdacb --- /dev/null +++ b/examples/src/main/resources/org/apache/pdfbox/resources/pdfa/sRGB.icc.README @@ -0,0 +1,12 @@ +Included are profiles created by Marti Maria (littleCMS) : + + # CIE*Lab + # CIE*XYZ + # sRGB + +Various contributors: + + # LStar-RGB from ColorSolutions + # Photogamut-RGB from the Photogamut workgroup + # Cineon and Gray from Kai-Uwe Behrmann + # compatibleWithAdobeRGB1998 from Graeme Gill diff --git a/examples/src/main/resources/org/apache/pdfbox/resources/ttf/Lohit-Bengali.ttf b/examples/src/main/resources/org/apache/pdfbox/resources/ttf/Lohit-Bengali.ttf new file mode 100644 index 00000000000..fa0f51627e7 Binary files /dev/null and b/examples/src/main/resources/org/apache/pdfbox/resources/ttf/Lohit-Bengali.ttf differ diff --git a/examples/src/test/java/org/apache/pdfbox/examples/interactive/form/TestCreateSimpleForms.java b/examples/src/test/java/org/apache/pdfbox/examples/interactive/form/TestCreateSimpleForms.java new file mode 100644 index 00000000000..2c750c4d59c --- /dev/null +++ b/examples/src/test/java/org/apache/pdfbox/examples/interactive/form/TestCreateSimpleForms.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.interactive.form; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceCharacteristicsDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDCheckBox; +import org.apache.pdfbox.pdmodel.interactive.form.PDRadioButton; +import org.apache.pdfbox.pdmodel.interactive.form.PDTextField; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test of some the form examples. + * + * @author Tilman Hausherr + */ +public class TestCreateSimpleForms +{ + public TestCreateSimpleForms() + { + } + + /** + * Test of CreateSimpleForm + * + * @throws java.io.IOException + */ + @Test + public void testCreateSimpleForm() throws IOException + { + CreateSimpleForm.main(null); + PDDocument doc = PDDocument.load(new File("target/SimpleForm.pdf")); + new PDFRenderer(doc).renderImage(0); + PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(); + PDTextField textBox = (PDTextField) acroForm.getField("SampleField"); + Assert.assertEquals("Sample field content", textBox.getValue()); + try + { + textBox.setValue("Łódź"); + Assert.fail("should have failed with IllegalArgumentException"); + } + catch (IllegalArgumentException ex) + { + Assert.assertTrue(ex.getMessage().contains("U+0141 ('Lslash') is not available")); + } + + PDFont font = getFontFromWidgetResources(textBox, "Helv"); + Assert.assertEquals("Helvetica", font.getName()); + Assert.assertTrue(font.isStandard14()); + doc.close(); + } + + @Test + public void testAddBorderToField() throws IOException + { + CreateSimpleForm.main(null); + + PDDocument doc = PDDocument.load(new File("target/SimpleForm.pdf")); + PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(); + PDTextField textBox = (PDTextField) acroForm.getField("SampleField"); + PDAnnotationWidget widget = textBox.getWidgets().get(0); + PDAppearanceCharacteristicsDictionary appearanceCharacteristics = widget.getAppearanceCharacteristics(); + PDColor borderColour = appearanceCharacteristics.getBorderColour(); + PDColor backgroundColour = appearanceCharacteristics.getBackground(); + Assert.assertEquals(PDDeviceRGB.INSTANCE, borderColour.getColorSpace()); + Assert.assertEquals(PDDeviceRGB.INSTANCE, backgroundColour.getColorSpace()); + Assert.assertArrayEquals(new float[]{0,1,0}, borderColour.getComponents(), 0); + Assert.assertArrayEquals(new float[]{1,1,0}, backgroundColour.getComponents(), 0); + doc.close(); + + AddBorderToField.main(null); + + PDDocument doc2 = PDDocument.load(new File("target/AddBorderToField.pdf")); + new PDFRenderer(doc2).renderImage(0); + PDAcroForm acroForm2 = doc2.getDocumentCatalog().getAcroForm(); + PDTextField textBox2 = (PDTextField) acroForm2.getField("SampleField"); + PDAnnotationWidget widget2 = textBox2.getWidgets().get(0); + PDAppearanceCharacteristicsDictionary appearanceCharacteristics2 = widget2.getAppearanceCharacteristics(); + PDColor borderColour2 = appearanceCharacteristics2.getBorderColour(); + Assert.assertEquals(PDDeviceRGB.INSTANCE, borderColour2.getColorSpace()); + Assert.assertArrayEquals(new float[]{1,0,0}, borderColour2.getComponents(), 0); + + doc2.close(); + } + + /** + * Test of CreateSimpleFormWithEmbeddedFont + * + * @throws java.io.IOException + */ + @Test + public void testCreateSimpleFormWithEmbeddedFont() throws IOException + { + CreateSimpleFormWithEmbeddedFont.main(null); + PDDocument doc = PDDocument.load(new File("target/SimpleFormWithEmbeddedFont.pdf")); + new PDFRenderer(doc).renderImage(0); + PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(); + PDTextField textBox = (PDTextField) acroForm.getField("SampleField"); + Assert.assertEquals("Sample field İ", textBox.getValue()); + textBox.setValue("Łódź"); + PDFont font = getFontFromWidgetResources(textBox, "F1"); + Assert.assertEquals("LiberationSans", font.getName()); + doc.close(); + } + + /** + * Test of CreateSimpleFormWithEmbeddedFont + * + * @throws java.io.IOException + */ + @Test + public void testCreateMultiWidgetsForm() throws IOException + { + CreateMultiWidgetsForm.main(null); + + PDDocument doc = PDDocument.load(new File("target/MultiWidgetsForm.pdf")); + Assert.assertEquals(2, doc.getNumberOfPages()); + new PDFRenderer(doc).renderImage(0); + new PDFRenderer(doc).renderImage(1); + PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(); + PDTextField textBox = (PDTextField) acroForm.getField("SampleField"); + Assert.assertEquals("Sample field", textBox.getValue()); + List widgets = textBox.getWidgets(); + Assert.assertEquals(2, widgets.size()); + PDAnnotationWidget w1 = widgets.get(0); + PDAnnotationWidget w2 = widgets.get(1); + PDPage page1 = w1.getPage(); + PDPage page2 = w2.getPage(); + Assert.assertNotEquals(page1.getCOSObject(), page2.getCOSObject()); + Assert.assertEquals(page1, doc.getPage(0)); + Assert.assertEquals(page2, doc.getPage(1)); + Assert.assertEquals(page1.getAnnotations().get(0), w1); + Assert.assertEquals(page2.getAnnotations().get(0), w2); + Assert.assertNotEquals(w1, w2); + PDAppearanceCharacteristicsDictionary appearanceCharacteristics1 = w1.getAppearanceCharacteristics(); + PDAppearanceCharacteristicsDictionary appearanceCharacteristics2 = w2.getAppearanceCharacteristics(); + PDColor backgroundColor1 = appearanceCharacteristics1.getBackground(); + PDColor backgroundColor2 = appearanceCharacteristics2.getBackground(); + PDColor borderColour1 = appearanceCharacteristics1.getBorderColour(); + PDColor borderColour2 = appearanceCharacteristics2.getBorderColour(); + Assert.assertEquals(PDDeviceRGB.INSTANCE, backgroundColor1.getColorSpace()); + Assert.assertEquals(PDDeviceRGB.INSTANCE, backgroundColor2.getColorSpace()); + Assert.assertEquals(PDDeviceRGB.INSTANCE, borderColour1.getColorSpace()); + Assert.assertEquals(PDDeviceRGB.INSTANCE, borderColour2.getColorSpace()); + Assert.assertArrayEquals(new float[]{1,1,0}, backgroundColor1.getComponents(), 0); + Assert.assertArrayEquals(new float[]{0,1,0}, backgroundColor2.getComponents(), 0); + Assert.assertArrayEquals(new float[]{0,1,0}, borderColour1.getComponents(), 0); + Assert.assertArrayEquals(new float[]{1,0,0}, borderColour2.getComponents(), 0); + doc.close(); + } + + @Test + public void testCreateCheckBox() throws IOException + { + CreateCheckBox.main(null); + PDDocument doc1 = PDDocument.load(new File("target/CheckBoxSample.pdf")); + new PDFRenderer(doc1).renderImage(0); + PDAcroForm acroForm1 = doc1.getDocumentCatalog().getAcroForm(); + PDCheckBox checkbox1 = (PDCheckBox) acroForm1.getField("MyCheckBox"); + Assert.assertEquals("Yes", checkbox1.getOnValue()); + Assert.assertEquals("Off", checkbox1.getValue()); + checkbox1.check(); + Assert.assertEquals("Yes", checkbox1.getValue()); + doc1.save("target/CheckBoxSample-modified.pdf"); + doc1.close(); + + PDDocument doc2 = PDDocument.load(new File("target/CheckBoxSample-modified.pdf")); + new PDFRenderer(doc2).renderImage(0); + PDAcroForm acroForm2 = doc2.getDocumentCatalog().getAcroForm(); + PDCheckBox checkbox2 = (PDCheckBox) acroForm2.getField("MyCheckBox"); + Assert.assertEquals("Yes", checkbox2.getValue()); + doc2.close(); + } + + @Test + public void testRadioButtons() throws IOException + { + CreateRadioButtons.main(null); + PDDocument doc1 = PDDocument.load(new File("target/RadioButtonsSample.pdf")); + new PDFRenderer(doc1).renderImage(0); + PDAcroForm acroForm1 = doc1.getDocumentCatalog().getAcroForm(); + PDRadioButton radioButton1 = (PDRadioButton) acroForm1.getField("MyRadioButton"); + Assert.assertEquals(3, radioButton1.getWidgets().size()); + Assert.assertEquals("c", radioButton1.getValue()); + Assert.assertEquals(1, radioButton1.getSelectedExportValues().size()); + Assert.assertEquals("c", radioButton1.getSelectedExportValues().get(0)); + Assert.assertEquals(3, radioButton1.getExportValues().size()); + Assert.assertEquals("a", radioButton1.getExportValues().get(0)); + Assert.assertEquals("b", radioButton1.getExportValues().get(1)); + Assert.assertEquals("c", radioButton1.getExportValues().get(2)); + radioButton1.setValue("b"); + doc1.save("target/RadioButtonsSample-modified.pdf"); + doc1.close(); + + PDDocument doc2 = PDDocument.load(new File("target/RadioButtonsSample-modified.pdf")); + new PDFRenderer(doc2).renderImage(0); + PDAcroForm acroForm2 = doc2.getDocumentCatalog().getAcroForm(); + PDRadioButton radioButton2 = (PDRadioButton) acroForm2.getField("MyRadioButton"); + Assert.assertEquals("b", radioButton2.getValue()); + Assert.assertEquals(1, radioButton2.getSelectedExportValues().size()); + Assert.assertEquals("b", radioButton2.getSelectedExportValues().get(0)); + Assert.assertEquals(3, radioButton2.getExportValues().size()); + doc2.close(); + } + + private PDFont getFontFromWidgetResources(PDTextField textBox, String fontResourceName) throws IOException + { + PDAnnotationWidget widget = textBox.getWidgets().get(0); + PDAppearanceDictionary appearance = widget.getAppearance(); + PDAppearanceEntry normalAppearance = appearance.getNormalAppearance(); + PDAppearanceStream appearanceStream = normalAppearance.getAppearanceStream(); + PDResources resources = appearanceStream.getResources(); + return resources.getFont(COSName.getPDFName(fontResourceName)); + } +} diff --git a/examples/src/test/java/org/apache/pdfbox/examples/pdfa/CreatePDFATest.java b/examples/src/test/java/org/apache/pdfbox/examples/pdfa/CreatePDFATest.java index 86e7640da4e..a7fecf45e01 100644 --- a/examples/src/test/java/org/apache/pdfbox/examples/pdfa/CreatePDFATest.java +++ b/examples/src/test/java/org/apache/pdfbox/examples/pdfa/CreatePDFATest.java @@ -18,7 +18,10 @@ import junit.framework.TestCase; import java.io.File; +import java.io.FileInputStream; +import java.security.KeyStore; import org.apache.pdfbox.examples.pdmodel.CreatePDFA; +import org.apache.pdfbox.examples.signature.CreateSignature; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.common.PDMetadata; @@ -52,12 +55,21 @@ public void testCreatePDFA() throws Exception { System.out.println("testCreatePDFA"); String pdfaFilename = outDir + "/PDFA.pdf"; + String signedPdfaFilename = outDir + "/PDFA_signed.pdf"; + String keystorePath = "src/test/resources/org/apache/pdfbox/examples/signature/keystore.p12"; String message = "The quick brown fox jumps over the lazy dog äöüÄÖÜß @°^²³ {[]}"; String dir = "../pdfbox/src/main/resources/org/apache/pdfbox/resources/ttf/"; String fontfile = dir + "LiberationSans-Regular.ttf"; CreatePDFA.main(new String[] { pdfaFilename, message, fontfile }); - - PreflightParser preflightParser = new PreflightParser(new File(pdfaFilename)); + + // sign PDF - because we want to make sure that the signed PDF is also PDF/A-1b + KeyStore keystore = KeyStore.getInstance("PKCS12"); + keystore.load(new FileInputStream(keystorePath), "123456".toCharArray()); + CreateSignature signing = new CreateSignature(keystore, "123456".toCharArray()); + signing.signDetached(new File(pdfaFilename), new File(signedPdfaFilename)); + + // Verify that it is PDF/A-1b + PreflightParser preflightParser = new PreflightParser(new File(signedPdfaFilename)); preflightParser.parse(); PreflightDocument preflightDocument = preflightParser.getPreflightDocument(); preflightDocument.validate(); @@ -74,7 +86,7 @@ public void testCreatePDFA() throws Exception PDDocumentCatalog catalog = document.getDocumentCatalog(); PDMetadata meta = catalog.getMetadata(); DomXmpParser xmpParser = new DomXmpParser(); - XMPMetadata metadata = xmpParser.parse(meta.createInputStream()); + XMPMetadata metadata = xmpParser.parse(meta.toByteArray()); DublinCoreSchema dc = metadata.getDublinCoreSchema(); assertEquals(pdfaFilename, dc.getTitle()); document.close(); diff --git a/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestCreateGradientShadingPDF.java b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestCreateGradientShadingPDF.java new file mode 100644 index 00000000000..02f4d7b1080 --- /dev/null +++ b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestCreateGradientShadingPDF.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.awt.Color; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.junit.Assert; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class TestCreateGradientShadingPDF +{ + @Test + public void testCreateGradientShading() throws IOException + { + String filename = "target/GradientShading.pdf"; + + CreateGradientShadingPDF creator = new CreateGradientShadingPDF(); + creator.create(filename); + + PDDocument document = PDDocument.load(new File(filename)); + Set set = new HashSet(); + BufferedImage bim = new PDFRenderer(document).renderImage(0); + for (int x = 0; x < bim.getWidth(); ++x) + { + for (int y = 0; y < bim.getHeight(); ++y) + { + set.add(new Color(bim.getRGB(x, y))); + } + } + Assert.assertTrue(set.size() > 10000); // 10258 different colors on windows 10 + document.close(); + } +} diff --git a/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestCreateSignature.java b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestCreateSignature.java index 8be4830187c..6fb5e354b1b 100644 --- a/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestCreateSignature.java +++ b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestCreateSignature.java @@ -1,241 +1,1066 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.examples.pdmodel; - -import junit.framework.TestCase; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.RandomAccessFile; -import java.net.URL; -import java.security.GeneralSecurityException; -import java.security.KeyStore; -import java.security.MessageDigest; -import java.security.cert.Certificate; -import java.security.cert.X509Certificate; -import java.util.Collection; -import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSString; - -import org.apache.pdfbox.examples.signature.CreateSignature; -import org.apache.pdfbox.examples.signature.CreateVisibleSignature; -import org.apache.pdfbox.examples.signature.TSAClient; -import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.wink.client.MockHttpServer; -import org.bouncycastle.cert.X509CertificateHolder; -import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter; -import org.bouncycastle.cms.CMSException; -import org.bouncycastle.cms.CMSProcessableByteArray; -import org.bouncycastle.cms.CMSSignedData; -import org.bouncycastle.cms.SignerInformation; -import org.bouncycastle.operator.OperatorCreationException; -import org.bouncycastle.tsp.TSPValidationException; -import org.bouncycastle.util.Store; - -/** - * Test for CreateSignature - */ -public class TestCreateSignature extends TestCase -{ - private final String inDir = "src/test/resources/org/apache/pdfbox/examples/signature/"; - private final String outDir = "target/test-output/"; - private final String keystorePath = inDir + "keystore.p12"; - private final String jpegPath = inDir + "stamp.jpg"; - private final String password = "123456"; - private Certificate certificate; - - @Override - protected void setUp() throws Exception - { - super.setUp(); - new File("target/test-output").mkdirs(); - - KeyStore keystore = KeyStore.getInstance("PKCS12"); - keystore.load(new FileInputStream(keystorePath), password.toCharArray()); - certificate = keystore.getCertificateChain(keystore.aliases().nextElement())[0]; - } - - /** - * Signs a PDF using the "adbe.pkcs7.detached" SubFilter with the SHA-256 digest. - * - * @throws IOException - * @throws GeneralSecurityException - */ - public void testDetachedSHA256() - throws IOException, CMSException, OperatorCreationException, GeneralSecurityException - { - // load the keystore - KeyStore keystore = KeyStore.getInstance("PKCS12"); - keystore.load(new FileInputStream(keystorePath), password.toCharArray()); - - // sign PDF - CreateSignature signing = new CreateSignature(keystore, password.toCharArray()); - signing.signDetached(new File(inDir + "sign_me.pdf"), new File(outDir + "signed.pdf")); - - checkSignature(new File(outDir + "signed.pdf")); - } - - /** - * Signs a PDF using the "adbe.pkcs7.detached" SubFilter with the SHA-256 digest and a signed - * timestamp from a Time Stamping Authority (TSA) server. - * - * This is not a complete test because we don't have the ability to return a valid response, so - * we return a cached response which is well-formed, but does not match the timestamp or nonce - * in the request. This allows us to test the basic TSA mechanism and test the nonce, which is a - * good start. - * - * @throws IOException - * @throws GeneralSecurityException - */ - public void testDetachedSHA256WithTSA() - throws IOException, CMSException, OperatorCreationException, GeneralSecurityException - { - // mock TSA response content - InputStream input = new FileInputStream(inDir + "tsa_response.asn1"); - byte[] content = IOUtils.toByteArray(input); - input.close(); - - // mock TSA server (RFC 3161) - MockHttpServer mockServer = new MockHttpServer(15371); - mockServer.startServer(); - String tsaUrl = "http://localhost:" + mockServer.getServerPort() + "/"; - MockHttpServer.MockHttpServerResponse response = new MockHttpServer.MockHttpServerResponse(); - response.setMockResponseContent(content); - response.setMockResponseContentType("application/timestamp-reply"); - response.setMockResponseCode(200); - mockServer.setMockHttpServerResponses(response); - - // TSA client - MessageDigest digest = MessageDigest.getInstance("SHA-256"); - TSAClient tsaClient = new TSAClient(new URL(tsaUrl), null, null, digest); - - // load the keystore - KeyStore keystore = KeyStore.getInstance("PKCS12"); - keystore.load(new FileInputStream(keystorePath), password.toCharArray()); - - // sign PDF (will fail due to nonce and timestamp differing) - try - { - String inPath = inDir + "sign_me_tsa.pdf"; - String outPath = outDir + "signed_tsa.pdf"; - CreateSignature signing = new CreateSignature(keystore, password.toCharArray()); - signing.signDetached(new File(inPath), new File(outPath), tsaClient); - } - catch (IOException e) - { - assertTrue(e.getCause() instanceof TSPValidationException); - } - - // TODO verify the signed PDF file - // TODO create a file signed with TSA - } - - /** - * Test creating visual signature. - * - * @throws IOException - * @throws GeneralSecurityException - */ - public void testCreateVisibleSignature() - throws IOException, CMSException, OperatorCreationException, GeneralSecurityException - { - // load the keystore - KeyStore keystore = KeyStore.getInstance("PKCS12"); - keystore.load(new FileInputStream(keystorePath), password.toCharArray()); - - // sign PDF - String inPath = inDir + "sign_me.pdf"; - FileInputStream fis = new FileInputStream(jpegPath); - CreateVisibleSignature signing = new CreateVisibleSignature(keystore, password.toCharArray()); - signing.setVisibleSignatureProperties(inPath, 0, 0, -50, fis, 1); - signing.setSignatureProperties("name", "location", "Security", 0, 1, true); - File destFile = new File(outDir + "signed_visible.pdf"); - signing.signPDF(new File(inPath), destFile); - - checkSignature(destFile); - } - - //TODO expand this into a full verify (if possible) - // This check fails with a file created with the code before PDFBOX-3011 was solved. - private void checkSignature(File file) - throws IOException, CMSException, OperatorCreationException, GeneralSecurityException - { - PDDocument document = PDDocument.load(file); - COSDictionary trailer = document.getDocument().getTrailer(); - COSDictionary root = (COSDictionary) trailer.getDictionaryObject(COSName.ROOT); - COSDictionary acroForm = (COSDictionary) root.getDictionaryObject(COSName.ACRO_FORM); - COSArray fields = (COSArray) acroForm.getDictionaryObject(COSName.FIELDS); - COSDictionary sig = null; - for (int i = 0; i < fields.size(); i++) - { - COSDictionary field = (COSDictionary) fields.getObject(i); - if (COSName.SIG.equals(field.getCOSName(COSName.FT))) - { - sig = (COSDictionary) field.getDictionaryObject(COSName.V); - - COSString contents = (COSString) sig.getDictionaryObject(COSName.CONTENTS); - COSArray byteRange = (COSArray) sig.getDictionaryObject(COSName.BYTERANGE); - - RandomAccessFile raf = new RandomAccessFile(file, "r"); - - byte[] buf = new byte[byteRange.getInt(1) + byteRange.getInt(3)]; - raf.seek(byteRange.getInt(0)); - raf.readFully(buf, 0, byteRange.getInt(1)); - raf.seek(byteRange.getInt(2)); - raf.readFully(buf, byteRange.getInt(1), byteRange.getInt(3)); - raf.close(); - - // inspiration: - // http://stackoverflow.com/a/26702631/535646 - // http://stackoverflow.com/a/9261365/535646 - CMSSignedData signedData = new CMSSignedData(new CMSProcessableByteArray(buf), contents.getBytes()); - Store certificatesStore = signedData.getCertificates(); - Collection signers = signedData.getSignerInfos().getSigners(); - SignerInformation signerInformation = signers.iterator().next(); - - Collection matches = certificatesStore.getMatches(signerInformation.getSID()); - X509CertificateHolder certificateHolder = (X509CertificateHolder) matches.iterator().next(); - X509Certificate certFromSignedData = new JcaX509CertificateConverter().getCertificate(certificateHolder); - - assertEquals(certificate, certFromSignedData); - - // code below doesn't work - maybe because the signature can indeed not be verified? - -// if (signerInformation.verify(new JcaSimpleSignerInfoVerifierBuilder().build(certFromSignedData))) -// { -// System.out.println("Signature verified"); -// } -// else -// { -// System.out.println("Signature verification failed"); -// } - - break; - } - } - if (sig == null) - { - fail("no signature found"); - } - document.close(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; +import java.awt.image.DataBufferInt; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.security.GeneralSecurityException; +import java.security.KeyStore; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.security.Security; +import java.security.UnrecoverableKeyException; +import java.security.cert.Certificate; +import java.security.cert.CertificateException; +import java.security.cert.CertificateFactory; +import java.security.cert.X509CRL; +import java.security.cert.X509Certificate; +import java.text.MessageFormat; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.examples.interactive.form.CreateSimpleForm; +import org.apache.pdfbox.examples.signature.CreateEmbeddedTimeStamp; +import org.apache.pdfbox.examples.signature.CreateEmptySignatureForm; +import org.apache.pdfbox.examples.signature.CreateSignature; +import org.apache.pdfbox.examples.signature.CreateSignedTimeStamp; +import org.apache.pdfbox.examples.signature.CreateVisibleSignature; +import org.apache.pdfbox.examples.signature.CreateVisibleSignature2; +import org.apache.pdfbox.examples.signature.SigUtils; +import org.apache.pdfbox.examples.signature.cert.CertificateVerificationException; +import org.apache.pdfbox.examples.signature.validation.AddValidationInformation; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDPageTree; +import org.apache.pdfbox.pdmodel.encryption.SecurityProvider; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.ExternalSigningSupport; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.apache.pdfbox.pdmodel.interactive.form.PDField; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.util.Hex; + +import org.apache.wink.client.MockHttpServer; + +import org.bouncycastle.asn1.ocsp.OCSPResponseStatus; +import org.bouncycastle.cert.X509CertificateHolder; +import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter; +import org.bouncycastle.cert.ocsp.BasicOCSPResp; +import org.bouncycastle.cert.ocsp.OCSPException; +import org.bouncycastle.cert.ocsp.OCSPResp; +import org.bouncycastle.cms.CMSException; +import org.bouncycastle.cms.CMSProcessableByteArray; +import org.bouncycastle.cms.CMSSignedData; +import org.bouncycastle.cms.SignerInformation; +import org.bouncycastle.cms.jcajce.JcaSimpleSignerInfoVerifierBuilder; +import org.bouncycastle.crypto.prng.FixedSecureRandom; +import org.bouncycastle.operator.ContentVerifierProvider; +import org.bouncycastle.operator.OperatorCreationException; +import org.bouncycastle.operator.jcajce.JcaContentVerifierProviderBuilder; +import org.bouncycastle.tsp.TSPException; +import org.bouncycastle.tsp.TSPValidationException; +import org.bouncycastle.tsp.TimeStampToken; +import org.bouncycastle.tsp.TimeStampTokenInfo; +import org.bouncycastle.util.CollectionStore; +import org.bouncycastle.util.Selector; +import org.bouncycastle.util.Store; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Test for CreateSignature. Each test case will run twice: once with SignatureInterface + * and once using external signature creation scenario. + */ +@RunWith(Parameterized.class) +public class TestCreateSignature +{ + private static CertificateFactory certificateFactory = null; + private static KeyStore keyStore = null; + private static final String inDir = "src/test/resources/org/apache/pdfbox/examples/signature/"; + private static final String outDir = "target/test-output/"; + private static final String keystorePath = inDir + "keystore.p12"; + private static final String jpegPath = inDir + "stamp.jpg"; + private static final String password = "123456"; + private static Certificate certificate; + private static String tsa; + + @Parameterized.Parameter + public boolean externallySign; + + /** + * Values for {@link #externallySign} test parameter to specify if signing should be conducted + * using externally signing scenario ({@code true}) or SignatureInterface ({@code false}). + */ + @Parameterized.Parameters + public static Collection signingTypes() + { + return Arrays.asList(false, true); + } + + @BeforeClass + public static void init() throws Exception + { + Security.addProvider(SecurityProvider.getProvider()); + certificateFactory = CertificateFactory.getInstance("X.509"); + + // load the keystore + keyStore = KeyStore.getInstance("PKCS12"); + keyStore.load(new FileInputStream(keystorePath), password.toCharArray()); + + new File("target/test-output").mkdirs(); + + certificate = keyStore.getCertificateChain(keyStore.aliases().nextElement())[0]; + tsa = System.getProperty("org.apache.pdfbox.examples.pdmodel.tsa"); + } + + /** + * Signs a PDF using the "adbe.pkcs7.detached" SubFilter with the SHA-256 digest. + * + * @throws IOException + * @throws GeneralSecurityException + * @throws CMSException + * @throws OperatorCreationException + * @throws TSPException + * @throws CertificateVerificationException + */ + @Test + public void testDetachedSHA256() + throws IOException, CMSException, OperatorCreationException, GeneralSecurityException, + TSPException, CertificateVerificationException + { + // sign PDF + CreateSignature signing = new CreateSignature(keyStore, password.toCharArray()); + signing.setExternalSigning(externallySign); + + final String fileName = getOutputFileName("signed{0}.pdf"); + final String fileName2 = getOutputFileName("signed{0}-late-tsa.pdf"); + signing.signDetached(new File(inDir + "sign_me.pdf"), new File(outDir + fileName)); + + checkSignature(new File(inDir, "sign_me.pdf"), new File(outDir, fileName), false); + + // Also test CreateEmbeddedTimeStamp if tsa URL is available + if (tsa == null || tsa.isEmpty()) + { + System.err.println("No TSA URL defined, test skipped"); + return; + } + + CreateEmbeddedTimeStamp tsaSigning = new CreateEmbeddedTimeStamp(tsa); + tsaSigning.embedTimeStamp(new File(outDir, fileName), new File(outDir, fileName2)); + checkSignature(new File(outDir, fileName), new File(outDir, fileName2), true); + } + + /** + * Signs a PDF using the "adbe.pkcs7.detached" SubFilter with the SHA-256 digest and a signed + * timestamp from a Time Stamping Authority (TSA) server. + * + * This is not a complete test because we don't have the ability to return a valid response, so + * we return a cached response which is well-formed, but does not match the timestamp or nonce + * in the request. This allows us to test the basic TSA mechanism and test the nonce, which is a + * good start. + * + * @throws IOException + * @throws GeneralSecurityException + * @throws CMSException + * @throws OperatorCreationException + * @throws TSPException + * @throws CertificateVerificationException + */ + @Test + public void testDetachedSHA256WithTSA() + throws IOException, CMSException, OperatorCreationException, GeneralSecurityException, + TSPException, CertificateVerificationException + { + // mock TSA response content + InputStream input = new FileInputStream(inDir + "tsa_response.asn1"); + byte[] content = IOUtils.toByteArray(input); + input.close(); + + // mock TSA server (RFC 3161) + MockHttpServer mockServer = new MockHttpServer(15371); + mockServer.startServer(); + String brokenMockTSA = "http://localhost:" + mockServer.getServerPort() + "/"; + MockHttpServer.MockHttpServerResponse response = new MockHttpServer.MockHttpServerResponse(); + response.setMockResponseContent(content); + response.setMockResponseContentType("application/timestamp-reply"); + response.setMockResponseCode(200); + mockServer.setMockHttpServerResponses(response); + + String inPath = inDir + "sign_me_tsa.pdf"; + String outPath = outDir + getOutputFileName("signed{0}_tsa.pdf"); + + // sign PDF (will fail due to nonce and timestamp differing) + CreateSignature signing1 = new CreateSignature(keyStore, password.toCharArray()); + signing1.setExternalSigning(externallySign); + try + { + signing1.signDetached(new File(inPath), new File(outPath), brokenMockTSA); + Assert.fail("This should have failed"); + } + catch (IOException e) + { + Assert.assertTrue(e.getCause() instanceof TSPValidationException); + new File(outPath).delete(); + } + + mockServer.stopServer(); + + if (tsa == null || tsa.isEmpty()) + { + System.err.println("No TSA URL defined, test skipped"); + return; + } + + CreateSignature signing2 = new CreateSignature(keyStore, password.toCharArray()); + signing2.setExternalSigning(externallySign); + signing2.signDetached(new File(inPath), new File(outPath), tsa); + checkSignature(new File(inPath), new File(outPath), true); + System.out.println("TSA test successful"); + } + + /** + * Test timestamp only signature (ETSI.RFC3161). + * + * @throws IOException + * @throws CMSException + * @throws OperatorCreationException + * @throws GeneralSecurityException + * @throws TSPException + * @throws CertificateVerificationException + */ + @Test + public void testCreateSignedTimeStamp() + throws IOException, CMSException, OperatorCreationException, GeneralSecurityException, + TSPException, CertificateVerificationException, OCSPException + { + if (externallySign) + { + return; // runs only once, independent of externallySign + } + if (tsa == null || tsa.isEmpty()) + { + System.err.println("No TSA URL defined, test skipped"); + return; + } + final String fileName = "timestamped.pdf"; + CreateSignedTimeStamp signing = new CreateSignedTimeStamp(tsa); + signing.signDetached(new File(inDir + "sign_me.pdf"), new File(outDir + fileName)); + + PDDocument doc = PDDocument.load(new File(outDir + fileName)); + PDSignature signature = doc.getLastSignatureDictionary(); + byte[] signedFileContent = + signature.getSignedContent(new FileInputStream(new File(outDir, fileName))); + byte[] contents = signature.getContents(); + TimeStampToken timeStampToken = new TimeStampToken(new CMSSignedData(contents)); + ByteArrayInputStream certStream = new ByteArrayInputStream(contents); + Collection certs = certificateFactory.generateCertificates(certStream); + + String hashAlgorithm = timeStampToken.getTimeStampInfo().getMessageImprintAlgOID().getId(); + // compare the hash of the signed content with the hash in the timestamp + Assert.assertArrayEquals(MessageDigest.getInstance(hashAlgorithm).digest(signedFileContent), + timeStampToken.getTimeStampInfo().getMessageImprintDigest()); + + X509Certificate certFromTimeStamp = (X509Certificate) certs.iterator().next(); + SigUtils.checkTimeStampCertificateUsage(certFromTimeStamp); + SigUtils.validateTimestampToken(timeStampToken); + SigUtils.verifyCertificateChain(timeStampToken.getCertificates(), + certFromTimeStamp, + timeStampToken.getTimeStampInfo().getGenTime()); + + doc.close(); + + File inFile = new File(outDir, fileName); + String name = inFile.getName(); + String substring = name.substring(0, name.lastIndexOf('.')); + + File outFile = new File(outDir, substring + "_LTV.pdf"); + AddValidationInformation addValidationInformation = new AddValidationInformation(); + addValidationInformation.validateSignature(inFile, outFile); + + checkLTV(outFile); + } + + /** + * Test creating visual signature. + * + * @throws IOException + * @throws CMSException + * @throws OperatorCreationException + * @throws GeneralSecurityException + * @throws TSPException + * @throws CertificateVerificationException + */ + @Test + public void testCreateVisibleSignature() + throws IOException, CMSException, OperatorCreationException, GeneralSecurityException, + TSPException, CertificateVerificationException + { + // sign PDF + String inPath = inDir + "sign_me_visible.pdf"; + FileInputStream fis = new FileInputStream(jpegPath); + CreateVisibleSignature signing = new CreateVisibleSignature(keyStore, password.toCharArray()); + signing.setVisibleSignDesigner(inPath, 0, 0, -50, fis, 1); + signing.setVisibleSignatureProperties("name", "location", "Security", 0, 1, true); + signing.setExternalSigning(externallySign); + + File destFile = new File(outDir + getOutputFileName("signed{0}_visible.pdf")); + signing.signPDF(new File(inPath), destFile, null); + fis.close(); + + checkSignature(new File(inPath), destFile, false); + } + + /** + * Test creating visual signature with the modernized example. + * + * @throws IOException + * @throws CMSException + * @throws OperatorCreationException + * @throws GeneralSecurityException + * @throws TSPException + * @throws CertificateVerificationException + */ + @Test + public void testCreateVisibleSignature2() + throws IOException, CMSException, OperatorCreationException, GeneralSecurityException, + TSPException, CertificateVerificationException + { + // sign PDF + String inPath = inDir + "sign_me_visible.pdf"; + File destFile; + + CreateVisibleSignature2 signing = new CreateVisibleSignature2(keyStore, password.toCharArray()); + Rectangle2D humanRect = new Rectangle2D.Float(100, 200, 150, 50); + signing.setImageFile(new File(jpegPath)); + signing.setExternalSigning(externallySign); + destFile = new File(outDir + getOutputFileName("signed{0}_visible2.pdf")); + signing.signPDF(new File(inPath), destFile, humanRect, null); + + checkSignature(new File(inPath), destFile, false); + } + + /** + * Test when visually signing externally on an existing signature field on a file which has + * been signed before. + * + * @throws IOException + * @throws NoSuchAlgorithmException + * @throws CertificateException + * @throws UnrecoverableKeyException + * @throws CMSException + * @throws OperatorCreationException + * @throws GeneralSecurityException + * @throws TSPException + * @throws CertificateVerificationException + */ + @Test + public void testPDFBox3978() throws IOException, NoSuchAlgorithmException, + CertificateException, UnrecoverableKeyException, + CMSException, OperatorCreationException, GeneralSecurityException, + TSPException, CertificateVerificationException + { + String filename = outDir + "EmptySignatureForm.pdf"; + String filenameSigned1 = outDir + "EmptySignatureForm-signed1.pdf"; + String filenameSigned2 = outDir + "EmptySignatureForm-signed2.pdf"; + + if (!externallySign) + { + return; + } + + // create file with empty signature + CreateEmptySignatureForm.main(new String[]{filename}); + + // sign PDF + CreateSignature signing1 = new CreateSignature(keyStore, password.toCharArray()); + signing1.setExternalSigning(false); + signing1.signDetached(new File(filename), new File(filenameSigned1)); + + checkSignature(new File(filename), new File(filenameSigned1), false); + + PDDocument doc1 = PDDocument.load(new File(filenameSigned1)); + List signatureDictionaries = doc1.getSignatureDictionaries(); + Assert.assertEquals(1, signatureDictionaries.size()); + doc1.close(); + + // do visual signing in the field + FileInputStream fis = new FileInputStream(jpegPath); + CreateVisibleSignature signing2 = new CreateVisibleSignature(keyStore, password.toCharArray()); + signing2.setVisibleSignDesigner(filenameSigned1, 0, 0, -50, fis, 1); + signing2.setVisibleSignatureProperties("name", "location", "Security", 0, 1, true); + signing2.setExternalSigning(true); + signing2.signPDF(new File(filenameSigned1), new File(filenameSigned2), null, "Signature1"); + fis.close(); + + checkSignature(new File(filenameSigned1), new File(filenameSigned2), false); + + PDDocument doc2 = PDDocument.load(new File(filenameSigned2)); + signatureDictionaries = doc2.getSignatureDictionaries(); + Assert.assertEquals(2, signatureDictionaries.size()); + doc2.close(); + } + + private String getOutputFileName(String filePattern) + { + return MessageFormat.format(filePattern,(externallySign ? "_ext" : "")); + } + + // This check fails with a file created with the code before PDFBOX-3011 was solved. + private void checkSignature(File origFile, File signedFile, boolean checkTimeStamp) + throws IOException, CMSException, OperatorCreationException, GeneralSecurityException, + TSPException, CertificateVerificationException + { + PDDocument document = PDDocument.load(origFile); + // get string representation of pages COSObject + String origPageKey = document.getDocumentCatalog().getCOSObject().getItem(COSName.PAGES).toString(); + document.close(); + + document = PDDocument.load(signedFile); + + // early detection of problems in the page structure + int p = 0; + PDPageTree pageTree = document.getPages(); + for (PDPage page : document.getPages()) + { + Assert.assertEquals(p, pageTree.indexOf(page)); + ++p; + } + + // PDFBOX-4261: check that object number stays the same + Assert.assertEquals(origPageKey, document.getDocumentCatalog().getCOSObject().getItem(COSName.PAGES).toString()); + + List signatureDictionaries = document.getSignatureDictionaries(); + if (signatureDictionaries.isEmpty()) + { + Assert.fail("no signature found"); + } + for (PDSignature sig : document.getSignatureDictionaries()) + { + byte[] contents = sig.getContents(); + + byte[] buf = sig.getSignedContent(new FileInputStream(signedFile)); + + // verify that getSignedContent() brings the same content + // regardless whether from an InputStream or from a byte array + FileInputStream fis2 = new FileInputStream(signedFile); + byte[] buf2 = sig.getSignedContent(IOUtils.toByteArray(fis2)); + Assert.assertArrayEquals(buf, buf2); + fis2.close(); + + // verify that all getContents() methods returns the same content + FileInputStream fis3 = new FileInputStream(signedFile); + byte[] contents2 = sig.getContents(IOUtils.toByteArray(fis3)); + Assert.assertArrayEquals(contents, contents2); + fis3.close(); + byte[] contents3 = sig.getContents(new FileInputStream(signedFile)); + Assert.assertArrayEquals(contents, contents3); + + // inspiration: + // http://stackoverflow.com/a/26702631/535646 + // http://stackoverflow.com/a/9261365/535646 + CMSSignedData signedData = new CMSSignedData(new CMSProcessableByteArray(buf), contents); + Store certificatesStore = signedData.getCertificates(); + Collection signers = signedData.getSignerInfos().getSigners(); + SignerInformation signerInformation = signers.iterator().next(); + Collection matches = certificatesStore.getMatches((Selector) signerInformation.getSID()); + X509CertificateHolder certificateHolder = (X509CertificateHolder) matches.iterator().next(); + Assert.assertArrayEquals(certificate.getEncoded(), certificateHolder.getEncoded()); + + // CMSVerifierCertificateNotValidException means that the keystore wasn't valid at signing time + if (!signerInformation.verify(new JcaSimpleSignerInfoVerifierBuilder().build(certificateHolder))) + { + Assert.fail("Signature verification failed"); + } + + TimeStampToken timeStampToken = SigUtils.extractTimeStampTokenFromSignerInformation(signerInformation); + if (checkTimeStamp) + { + Assert.assertNotNull(timeStampToken); + SigUtils.validateTimestampToken(timeStampToken); + + TimeStampTokenInfo timeStampInfo = timeStampToken.getTimeStampInfo(); + + // compare the hash of the signed content with the hash in the timestamp + byte[] tsMessageImprintDigest = timeStampInfo.getMessageImprintDigest(); + String hashAlgorithm = timeStampInfo.getMessageImprintAlgOID().getId(); + byte[] sigMessageImprintDigest = MessageDigest.getInstance(hashAlgorithm).digest(signerInformation.getSignature()); + Assert.assertArrayEquals("timestamp signature verification failed", sigMessageImprintDigest, tsMessageImprintDigest); + + Store tsCertStore = timeStampToken.getCertificates(); + + // get the certificate from the timeStampToken + @SuppressWarnings("unchecked") // TimeStampToken.getSID() is untyped + Collection tsCertStoreMatches = tsCertStore.getMatches(timeStampToken.getSID()); + X509CertificateHolder certHolderFromTimeStamp = tsCertStoreMatches.iterator().next(); + X509Certificate certFromTimeStamp = new JcaX509CertificateConverter().getCertificate(certHolderFromTimeStamp); + + SigUtils.checkTimeStampCertificateUsage(certFromTimeStamp); + SigUtils.verifyCertificateChain(tsCertStore, certFromTimeStamp, timeStampInfo.getGenTime()); + } + else + { + Assert.assertNull(timeStampToken); + } + } + document.close(); + } + + private String calculateDigestString(InputStream inputStream) throws NoSuchAlgorithmException, IOException + { + MessageDigest md = MessageDigest.getInstance("SHA-256"); + return Hex.getString(md.digest(IOUtils.toByteArray(inputStream))); + } + + /** + * PDFBOX-3811: make sure that calling saveIncrementalForExternalSigning() more than once + * brings the same result. + * + * @throws IOException + * @throws NoSuchAlgorithmException + */ + @Test + public void testPDFBox3811() throws IOException, NoSuchAlgorithmException + { + if (!externallySign) + { + return; + } + + // create simple PDF + PDDocument document = new PDDocument(); + PDPage page = new PDPage(); + document.addPage(page); + new PDPageContentStream(document, page).close(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + document.save(baos); + document.close(); + + document = PDDocument.load(baos.toByteArray()); + // for stable digest + document.setDocumentId(12345L); + + PDSignature signature = new PDSignature(); + signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); + signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED); + document.addSignature(signature); + int[] reserveByteRange = signature.getByteRange(); + + String digestString = calculateDigestString(document.saveIncrementalForExternalSigning(new ByteArrayOutputStream()).getContent()); + boolean caught = false; + try + { + document.saveIncrementalForExternalSigning(new ByteArrayOutputStream()); + } + catch (IllegalStateException ex) + { + caught = true; + } + Assert.assertTrue("IllegalStateException should have been thrown", caught); + signature.setByteRange(reserveByteRange); + Assert.assertEquals(digestString, calculateDigestString(document.saveIncrementalForExternalSigning(new ByteArrayOutputStream()).getContent())); + } + + /** + * Create a simple form PDF, sign it, reload it, change a field value, incrementally save it. + * This should not break the signature, and the value and its display must have changed as + * expected. Do this both for the old and new incremental save methods. + * + * @throws Exception + */ + @Test + public void testSaveIncrementalAfterSign() throws Exception + { + BufferedImage oldImage, expectedImage1, actualImage1, expectedImage2, actualImage2; + DataBufferInt expectedData; + DataBufferInt actualData; + PDField field; + FileOutputStream fileOutputStream; + + CreateSimpleForm.main(new String[0]); // creates "target/SimpleForm.pdf" + + // sign PDF + CreateSignature signing = new CreateSignature(keyStore, password.toCharArray()); + signing.setExternalSigning(externallySign); + + final String fileNameSigned = getOutputFileName("SimpleForm_signed{0}.pdf"); + final String fileNameResaved1 = getOutputFileName("SimpleForm_signed{0}_incrementallyresaved1.pdf"); + final String fileNameResaved2 = getOutputFileName("SimpleForm_signed{0}_incrementallyresaved2.pdf"); + signing.signDetached(new File("target/SimpleForm.pdf"), new File(outDir + fileNameSigned)); + + checkSignature(new File("target/SimpleForm.pdf"), new File(outDir, fileNameSigned), false); + + PDDocument doc = PDDocument.load(new File(outDir, fileNameSigned)); + + oldImage = new PDFRenderer(doc).renderImage(0); + + fileOutputStream = new FileOutputStream(new File(outDir, fileNameResaved1)); + field = doc.getDocumentCatalog().getAcroForm().getField("SampleField"); + field.setValue("New Value 1"); + + // Test of PDFBOX-4509: only "Helv" font should be there + Collection fonts = (Collection) field.getWidgets().get(0).getAppearance(). + getNormalAppearance().getAppearanceStream().getResources().getFontNames(); + Assert.assertTrue(fonts.contains(COSName.HELV)); + Assert.assertEquals(1, fonts.size()); + + expectedImage1 = new PDFRenderer(doc).renderImage(0); + + // compare images, image must has changed + Assert.assertEquals(oldImage.getWidth(), expectedImage1.getWidth()); + Assert.assertEquals(oldImage.getHeight(), expectedImage1.getHeight()); + Assert.assertEquals(oldImage.getType(), expectedImage1.getType()); + expectedData = (DataBufferInt) oldImage.getRaster().getDataBuffer(); + actualData = (DataBufferInt) expectedImage1.getRaster().getDataBuffer(); + Assert.assertEquals(expectedData.getData().length, actualData.getData().length); + Assert.assertFalse(Arrays.equals(expectedData.getData(), actualData.getData())); + + // old style incremental save: create a "path" from the root to the objects that need an update + doc.getDocumentCatalog().getCOSObject().setNeedToBeUpdated(true); + doc.getDocumentCatalog().getAcroForm().getCOSObject().setNeedToBeUpdated(true); + field.getCOSObject().setNeedToBeUpdated(true); + field.getWidgets().get(0).getAppearance().getCOSObject().setNeedToBeUpdated(true); + ((COSDictionary) field.getWidgets().get(0).getAppearance().getNormalAppearance().getCOSObject()).setNeedToBeUpdated(true); + doc.saveIncremental(fileOutputStream); + doc.close(); + checkSignature(new File("target/SimpleForm.pdf"), new File(outDir, fileNameResaved1), false); + + doc = PDDocument.load(new File(outDir, fileNameResaved1)); + + field = doc.getDocumentCatalog().getAcroForm().getField("SampleField"); + Assert.assertEquals("New Value 1", field.getValueAsString()); + actualImage1 = new PDFRenderer(doc).renderImage(0); + // compare images, equality proves that the appearance has been updated too + Assert.assertEquals(expectedImage1.getWidth(), actualImage1.getWidth()); + Assert.assertEquals(expectedImage1.getHeight(), actualImage1.getHeight()); + Assert.assertEquals(expectedImage1.getType(), actualImage1.getType()); + expectedData = (DataBufferInt) expectedImage1.getRaster().getDataBuffer(); + actualData = (DataBufferInt) actualImage1.getRaster().getDataBuffer(); + Assert.assertArrayEquals(expectedData.getData(), actualData.getData()); + doc.close(); + + doc = PDDocument.load(new File(outDir, fileNameSigned)); + + fileOutputStream = new FileOutputStream(new File(outDir, fileNameResaved2)); + field = doc.getDocumentCatalog().getAcroForm().getField("SampleField"); + field.setValue("New Value 2"); + expectedImage2 = new PDFRenderer(doc).renderImage(0); + + // compare images, image must has changed + Assert.assertEquals(oldImage.getWidth(), expectedImage2.getWidth()); + Assert.assertEquals(oldImage.getHeight(), expectedImage2.getHeight()); + Assert.assertEquals(oldImage.getType(), expectedImage2.getType()); + expectedData = (DataBufferInt) oldImage.getRaster().getDataBuffer(); + actualData = (DataBufferInt) expectedImage2.getRaster().getDataBuffer(); + Assert.assertEquals(expectedData.getData().length, actualData.getData().length); + Assert.assertFalse(Arrays.equals(expectedData.getData(), actualData.getData())); + + // new style incremental save: add only the objects that have changed + Set objectsToWrite = new HashSet(); + objectsToWrite.add(field.getCOSObject()); + objectsToWrite.add(field.getWidgets().get(0).getAppearance().getCOSObject()); + objectsToWrite.add((COSDictionary) field.getWidgets().get(0).getAppearance().getNormalAppearance().getCOSObject()); + doc.saveIncremental(fileOutputStream, objectsToWrite); + doc.close(); + + checkSignature(new File("target/SimpleForm.pdf"), new File(outDir, fileNameResaved2), false); + doc = PDDocument.load(new File(outDir, fileNameResaved2)); + + field = doc.getDocumentCatalog().getAcroForm().getField("SampleField"); + Assert.assertEquals("New Value 2", field.getValueAsString()); + actualImage2 = new PDFRenderer(doc).renderImage(0); + // compare images, equality proves that the appearance has been updated too + Assert.assertEquals(expectedImage2.getWidth(), actualImage2.getWidth()); + Assert.assertEquals(expectedImage2.getHeight(), actualImage2.getHeight()); + Assert.assertEquals(expectedImage2.getType(), actualImage2.getType()); + expectedData = (DataBufferInt) expectedImage2.getRaster().getDataBuffer(); + actualData = (DataBufferInt) actualImage2.getRaster().getDataBuffer(); + Assert.assertArrayEquals(expectedData.getData(), actualData.getData()); + doc.close(); + } + + @Test + public void testPDFBox4784() throws Exception + { + if (!externallySign) + { + return; + } + Date signingTime = new Date(); + + byte[] defaultSignedOne = signEncrypted(null, signingTime); + byte[] defaultSignedTwo = signEncrypted(null, signingTime); + Assert.assertFalse(Arrays.equals(defaultSignedOne, defaultSignedTwo)); + + // a dummy value for FixedSecureRandom is used (for real use-cases a secure value should be provided) + byte[] fixedRandomSignedOne = signEncrypted(new FixedSecureRandom(new byte[128]), + signingTime); + byte[] fixedRandomSignedTwo = signEncrypted(new FixedSecureRandom(new byte[128]), + signingTime); + Assert.assertArrayEquals(fixedRandomSignedOne, fixedRandomSignedTwo); + } + + /** + * Test getting CRLs when OCSP (adobe-ocsp.geotrust.com) is unavailable. + * This validates the certificates of the signature from the file 083698.pdf, which is + * 109TH CONGRESS 2D SESSION H. R. 5500, from MAY 25, 2006. + * + * @throws IOException + * @throws CMSException + * @throws CertificateException + * @throws TSPException + * @throws OperatorCreationException + * @throws CertificateVerificationException + * @throws NoSuchAlgorithmException + */ + @Test + public void testCRL() throws IOException, CMSException, CertificateException, TSPException, + OperatorCreationException, CertificateVerificationException, NoSuchAlgorithmException + { + if (externallySign) + { + return; // runs only once, independent of externallySign + } + String hexSignature; + BufferedReader bfr + = new BufferedReader(new InputStreamReader(new FileInputStream(inDir + "hexsignature.txt"))); + hexSignature = bfr.readLine(); + bfr.close(); + + CMSSignedData signedData = new CMSSignedData(Hex.decodeHex(hexSignature)); + Collection signers = signedData.getSignerInfos().getSigners(); + SignerInformation signerInformation = signers.iterator().next(); + Store certificatesStore = signedData.getCertificates(); + @SuppressWarnings("unchecked") // SignerInformation.getSID() is untyped + Collection matches = certificatesStore.getMatches(signerInformation.getSID()); + X509CertificateHolder certificateHolder = matches.iterator().next(); + X509Certificate certFromSignedData = new JcaX509CertificateConverter().getCertificate(certificateHolder); + SigUtils.checkCertificateUsage(certFromSignedData); + + TimeStampToken timeStampToken = SigUtils.extractTimeStampTokenFromSignerInformation(signerInformation); + SigUtils.validateTimestampToken(timeStampToken); + @SuppressWarnings("unchecked") // TimeStampToken.getSID() is untyped + Collection tstMatches = + timeStampToken.getCertificates().getMatches((Selector) timeStampToken.getSID()); + X509CertificateHolder tstCertHolder = tstMatches.iterator().next(); + X509Certificate certFromTimeStamp = new JcaX509CertificateConverter().getCertificate(tstCertHolder); + // merge both stores using a set to remove duplicates + HashSet certificateHolderSet = new HashSet(); + certificateHolderSet.addAll(certificatesStore.getMatches(null)); + certificateHolderSet.addAll(timeStampToken.getCertificates().getMatches(null)); + SigUtils.verifyCertificateChain(new CollectionStore(certificateHolderSet), + certFromTimeStamp, + timeStampToken.getTimeStampInfo().getGenTime()); + SigUtils.checkTimeStampCertificateUsage(certFromTimeStamp); + + // compare the hash of the signature with the hash in the timestamp + byte[] tsMessageImprintDigest = timeStampToken.getTimeStampInfo().getMessageImprintDigest(); + String hashAlgorithm = timeStampToken.getTimeStampInfo().getMessageImprintAlgOID().getId(); + byte[] sigMessageImprintDigest = MessageDigest.getInstance(hashAlgorithm).digest(signerInformation.getSignature()); + Assert.assertArrayEquals(tsMessageImprintDigest, sigMessageImprintDigest); + + certFromSignedData.checkValidity(timeStampToken.getTimeStampInfo().getGenTime()); + SigUtils.verifyCertificateChain(certificatesStore, certFromSignedData, timeStampToken.getTimeStampInfo().getGenTime()); + } + + /** + * Test adding LTV information. This tests the status quo. If we use a new file (or if the file + * gets updated) then the test may have to be adjusted. The test is not really perfect, but it + * tries to check a minimum of things that should match. If the test fails and you didn't change + * anything in signing, then find out whether some external servers involved are unresponsive. + * At the time of writing this, the OCSP server http://ocsp.quovadisglobal.com responds with 502 + * "UNAUTHORIZED". That is not a problem as long as the CRL URL works. + * + * @throws java.io.IOException + * @throws java.security.GeneralSecurityException + * @throws org.bouncycastle.cert.ocsp.OCSPException + * @throws org.bouncycastle.operator.OperatorCreationException + * @throws org.bouncycastle.cms.CMSException + */ + @Test + public void testAddValidationInformation() + throws IOException, GeneralSecurityException, OCSPException, OperatorCreationException, CMSException + { + if (externallySign) + { + return; // runs only once, independent of externallySign + } + File inFile = new File("target/pdfs", "notCertified_368835_Sig_en_201026090509.pdf"); + String name = inFile.getName(); + String substring = name.substring(0, name.lastIndexOf('.')); + + File outFile = new File(outDir, substring + "_LTV.pdf"); + AddValidationInformation addValidationInformation = new AddValidationInformation(); + addValidationInformation.validateSignature(inFile, outFile); + + checkLTV(outFile); + } + + @Test + public void testDoubleVisibleSignatureOnEncryptedFile() + throws IOException, CMSException, OperatorCreationException, GeneralSecurityException, TSPException, CertificateVerificationException + { + // sign PDF + String inPath = "target/pdfs/PDFBOX-2469-1-AcroForm-AES128.pdf"; + FileInputStream fis = new FileInputStream(jpegPath); + CreateVisibleSignature signing = new CreateVisibleSignature(keyStore, password.toCharArray()); + signing.setVisibleSignDesigner(inPath, 0, 0, -50, fis, 1); + signing.setVisibleSignatureProperties("name", "location", "Security", 0, 1, true); + signing.setExternalSigning(externallySign); + File destFile = new File(outDir, getOutputFileName("2signed{0}_visible.pdf")); + signing.signPDF(new File(inPath), destFile, null); + fis.close(); + + checkSignature(new File(inPath), destFile, false); + + inPath = destFile.getAbsolutePath(); + fis = new FileInputStream(jpegPath); + signing = new CreateVisibleSignature(keyStore, password.toCharArray()); + signing.setVisibleSignDesigner(inPath, 0, 0, -50, fis, 2); + signing.setVisibleSignatureProperties("name", "location", "Security", 0, 2, true); + signing.setExternalSigning(externallySign); + destFile = new File(outDir, getOutputFileName("2signed{0}_visible_signed{0}_visible.pdf")); + signing.signPDF(new File(inPath), destFile, null); + fis.close(); + + checkSignature(new File(inPath), destFile, false); + } + + private void checkLTV(File outFile) + throws IOException, GeneralSecurityException, OCSPException, OperatorCreationException, + CMSException + { + PDDocument doc = PDDocument.load(outFile); + + PDSignature signature = doc.getLastSignatureDictionary(); + byte[] contents = signature.getContents(); + + PDDocumentCatalog docCatalog = doc.getDocumentCatalog(); + COSDictionary dssDict = docCatalog.getCOSObject().getCOSDictionary(COSName.getPDFName("DSS")); + COSArray dssCertArray = dssDict.getCOSArray(COSName.getPDFName("Certs")); + COSDictionary vriDict = dssDict.getCOSDictionary(COSName.getPDFName("VRI")); + + // Check that all known signature certificates are in the VRI/signaturehash/Cert array + byte[] signatureHash = MessageDigest.getInstance("SHA-1").digest(contents); + String hexSignatureHash = Hex.getString(signatureHash); + System.out.println("hexSignatureHash: " + hexSignatureHash); + CMSSignedData signedData = new CMSSignedData(contents); + Store certificatesStore = signedData.getCertificates(); + HashSet certificateHolderSet = + new HashSet(certificatesStore.getMatches(null)); + COSDictionary sigDict = vriDict.getCOSDictionary(COSName.getPDFName(hexSignatureHash)); + COSArray sigCertArray = sigDict.getCOSArray(COSName.getPDFName("Cert")); + Set sigCertHolderSetFromVRIArray = new HashSet(); + for (int i = 0; i < sigCertArray.size(); ++i) + { + COSStream certStream = (COSStream) sigCertArray.getObject(i); + InputStream is = certStream.createInputStream(); + sigCertHolderSetFromVRIArray.add(new X509CertificateHolder(IOUtils.toByteArray(is))); + is.close(); + } + for (X509CertificateHolder holder : certificateHolderSet) + { + if (holder.getSubject().toString().contains("QuoVadis OCSP Authority Signature")) + { + continue; // not relevant here + } + // disabled until PDFBOX-5203 is fixed +// Assert.assertTrue("File '" + outFile + "' Root/DSS/VRI/" + hexSignatureHash + +// "/Cert array doesn't contain a certificate with subject '" + +// holder.getSubject() + "' and serial " + holder.getSerialNumber(), +// sigCertHolderSetFromVRIArray.contains(holder)); + } + + // Get all certificates. Each one should either be issued (= signed) by a certificate of the set + Set certSet = new HashSet(); + for (int i = 0; i < dssCertArray.size(); ++i) + { + COSStream certStream = (COSStream) dssCertArray.getObject(i); + InputStream is = certStream.createInputStream(); + X509Certificate cert = (X509Certificate) certificateFactory.generateCertificate(is); + is.close(); + certSet.add(cert); + } + for (X509Certificate cert : certSet) + { + boolean verified = false; + for (X509Certificate cert2 : certSet) + { + try + { + cert.verify(cert2.getPublicKey(), SecurityProvider.getProvider().getName()); + verified = true; + } + catch (GeneralSecurityException ex) + { + // not the issuer + } + } + // disabled until PDFBOX-5203 is fixed +// Assert.assertTrue("Certificate " + cert.getSubjectX500Principal() + +// " not issued by any certificate in the Certs array", verified); + } + + // Each CRL should be signed by one of the certificates in Certs + Set crlSet = new HashSet(); + COSArray crlArray = dssDict.getCOSArray(COSName.getPDFName("CRLs")); + for (int i = 0; i < crlArray.size(); ++i) + { + COSStream crlStream = (COSStream) crlArray.getObject(i); + InputStream is = crlStream.createInputStream(); + X509CRL cert = (X509CRL) certificateFactory.generateCRL(is); + is.close(); + crlSet.add(cert); + } + for (X509CRL crl : crlSet) + { + boolean crlVerified = false; + X509Certificate crlIssuerCert = null; + for (X509Certificate cert : certSet) + { + try + { + crl.verify(cert.getPublicKey(), SecurityProvider.getProvider().getName()); + crlVerified = true; + crlIssuerCert = cert; + } + catch (GeneralSecurityException ex) + { + // not the issuer + } + } + Assert.assertTrue("issuer of CRL not found in Certs array", crlVerified); + + byte[] crlSignatureHash = MessageDigest.getInstance("SHA-1").digest(crl.getSignature()); + String hexCrlSignatureHash = Hex.getString(crlSignatureHash); + System.out.println("hexCrlSignatureHash: " + hexCrlSignatureHash); + + // Check that the issueing certificate is in the VRI array + COSDictionary crlSigDict = vriDict.getCOSDictionary(COSName.getPDFName(hexCrlSignatureHash)); + COSArray certArray2 = crlSigDict.getCOSArray(COSName.getPDFName("Cert")); + COSStream certStream = (COSStream) certArray2.getObject(0); + InputStream is2 = certStream.createInputStream(); + X509CertificateHolder certHolder2 = new X509CertificateHolder(IOUtils.toByteArray(is2)); + is2.close(); + + Assert.assertEquals("CRL issuer certificate missing in VRI " + hexCrlSignatureHash, + certHolder2, new X509CertificateHolder(crlIssuerCert.getEncoded())); + } + + Set oscpSet = new HashSet(); + COSArray ocspArray = dssDict.getCOSArray(COSName.getPDFName("OCSPs")); + for (int i = 0; i < ocspArray.size(); ++i) + { + COSStream ocspStream = (COSStream) ocspArray.getObject(i); + InputStream is = ocspStream.createInputStream(); + OCSPResp ocspResp = new OCSPResp(is); + is.close(); + oscpSet.add(ocspResp); + } + for (OCSPResp ocspResp : oscpSet) + { + BasicOCSPResp basicResponse = (BasicOCSPResp) ocspResp.getResponseObject(); + Assert.assertEquals(OCSPResponseStatus.SUCCESSFUL, ocspResp.getStatus()); + Assert.assertTrue("OCSP should have at least 1 certificate", basicResponse.getCerts().length >= 1); + byte[] ocspSignatureHash = MessageDigest.getInstance("SHA-1").digest(basicResponse.getSignature()); + String hexOcspSignatureHash = Hex.getString(ocspSignatureHash); + System.out.println("ocspSignatureHash: " + hexOcspSignatureHash); + long secondsOld = (System.currentTimeMillis() - basicResponse.getProducedAt().getTime()) / 1000; + Assert.assertTrue("OCSP answer is too old, is from " + secondsOld + " seconds ago", + secondsOld < 10); + + X509CertificateHolder ocspCertHolder = basicResponse.getCerts()[0]; + ContentVerifierProvider verifier = new JcaContentVerifierProviderBuilder().setProvider(SecurityProvider.getProvider()).build(ocspCertHolder); + Assert.assertTrue(basicResponse.isSignatureValid(verifier)); + + COSDictionary ocspSigDict = vriDict.getCOSDictionary(COSName.getPDFName(hexOcspSignatureHash)); + + // Check that the Cert is in the VRI array + COSArray certArray2 = ocspSigDict.getCOSArray(COSName.getPDFName("Cert")); + COSStream certStream = (COSStream) certArray2.getObject(0); + InputStream is2 = certStream.createInputStream(); + X509CertificateHolder certHolder2 = new X509CertificateHolder(IOUtils.toByteArray(is2)); + is2.close(); + + Assert.assertEquals("OCSP certificate is not in the VRI array", certHolder2, ocspCertHolder); + } + + doc.close(); + } + + private byte[] signEncrypted(SecureRandom secureRandom, Date signingTime) throws Exception + { + CreateSignature signing = new CreateSignature(keyStore, password.toCharArray()); + signing.setExternalSigning(true); + + File inFile = new File(inDir + "sign_me_protected.pdf"); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + PDDocument doc = null; + try + { + doc = PDDocument.load(inFile, " "); + + if (secureRandom != null) + { + doc.getEncryption().getSecurityHandler().setCustomSecureRandom(secureRandom); + } + + PDSignature signature = new PDSignature(); + signature.setName("Example User"); + Calendar cal = Calendar.getInstance(); + cal.setTime(signingTime); + signature.setSignDate(cal); + + doc.addSignature(signature); + doc.setDocumentId(12345l); + ExternalSigningSupport externalSigning = doc.saveIncrementalForExternalSigning(baos); + // invoke external signature service + return IOUtils.toByteArray(externalSigning.getContent()); + } + finally + { + IOUtils.closeQuietly(doc); + IOUtils.closeQuietly(baos); + } + } +} diff --git a/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestEmbeddedFiles.java b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestEmbeddedFiles.java new file mode 100644 index 00000000000..f9f589fbe0a --- /dev/null +++ b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestEmbeddedFiles.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.pdfbox.io.IOUtils; + +import org.junit.Assert; +import org.junit.Test; + + +/** + * + * @author Tilman Hausherr + */ +public class TestEmbeddedFiles +{ + /** + * Very basic test of embedding and extracting an attachment. + * + * @throws IOException + */ + @Test + public void testEmbeddedFiles() throws IOException + { + String outputFile = "target/test-output/EmbeddedFile.pdf"; + String embeddedFile = "target/test-output/Test.txt"; + + new File("target/test-output").mkdirs(); + new File(outputFile).delete(); + new File(embeddedFile).delete(); + String[] args = new String[] { outputFile }; + EmbeddedFiles.main(args); + ExtractEmbeddedFiles.main(args); + InputStream is = new FileInputStream(embeddedFile); + byte[] bytes = IOUtils.toByteArray(is); + is.close(); + String content = new String(bytes); + Assert.assertEquals("This is the contents of the embedded file", content); + new File(embeddedFile).delete(); + new File(outputFile).delete(); + } +} \ No newline at end of file diff --git a/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestHelloWorld.java b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestHelloWorld.java new file mode 100644 index 00000000000..994002efe98 --- /dev/null +++ b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestHelloWorld.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.examples.pdmodel; + +import java.io.File; +import java.io.IOException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; +import static org.junit.Assert.assertEquals; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class TestHelloWorld +{ + private static final String OUTPUT_DIR = "target/test-output"; + + @BeforeClass + public static void init() throws Exception + { + new File(OUTPUT_DIR).mkdirs(); + } + + @Test + public void testHelloWorldTTF() throws IOException + { + String outputFile = OUTPUT_DIR + "/HelloWorldTTF.pdf"; + String message = "HelloWorldTTF.pdf"; + String fontFile = "../pdfbox/src/main/resources/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"; + + new File(outputFile).delete(); + + String[] args = new String[] { outputFile, message, fontFile }; + HelloWorldTTF.main(args); + + checkOutputFile(outputFile, message); + + new File(outputFile).delete(); + } + + @Test + public void testHelloWorld() throws IOException + { + String outputDir = "target/test-output"; + String outputFile = outputDir + "/HelloWorld.pdf"; + String message = "HelloWorld.pdf"; + + new File(outputFile).delete(); + + String[] args = new String[] { outputFile, message }; + HelloWorld.main(args); + + checkOutputFile(outputFile, message); + + new File(outputFile).delete(); + } + + private void checkOutputFile(String outputFile, String message) throws IOException + { + PDDocument doc = PDDocument.load(new File(outputFile)); + PDFTextStripper stripper = new PDFTextStripper(); + String extractedText = stripper.getText(doc).trim(); + assertEquals(message, extractedText); + doc.close(); + } +} diff --git a/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestRubberStampWithImage.java b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestRubberStampWithImage.java index 5488f8aff9c..42880662c14 100644 --- a/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestRubberStampWithImage.java +++ b/examples/src/test/java/org/apache/pdfbox/examples/pdmodel/TestRubberStampWithImage.java @@ -16,16 +16,31 @@ */ package org.apache.pdfbox.examples.pdmodel; -import junit.framework.TestCase; +import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; +import javax.imageio.ImageIO; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationRubberStamp; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.rendering.PDFRenderer; + +import org.junit.Assert; +import org.junit.Test; + /** * Test for RubberStampWithImage */ -public class TestRubberStampWithImage extends TestCase +public class TestRubberStampWithImage { + @Test public void test() throws IOException { String documentFile = "src/test/resources/org/apache/pdfbox/examples/pdmodel/document.pdf"; @@ -34,8 +49,48 @@ public void test() throws IOException new File("target/test-output").mkdirs(); + PDDocument doc1 = PDDocument.load(new File(documentFile)); + BufferedImage bim1 = new PDFRenderer(doc1).renderImage(0); + doc1.close(); + String[] args = new String[] { documentFile, outFile, stampFile }; RubberStampWithImage rubberStamp = new RubberStampWithImage(); rubberStamp.doIt(args); + + PDDocument doc2 = PDDocument.load(new File(outFile)); + BufferedImage bim2 = new PDFRenderer(doc2).renderImage(0); + Assert.assertFalse(compareImages(bim1, bim2)); + PDAnnotationRubberStamp rubberStampAnnotation = (PDAnnotationRubberStamp) doc2.getPage(0).getAnnotations().get(0); + PDAppearanceDictionary appearance = rubberStampAnnotation.getAppearance(); + PDAppearanceEntry normalAppearance = appearance.getNormalAppearance(); + PDAppearanceStream appearanceStream = normalAppearance.getAppearanceStream(); + PDImageXObject ximage = (PDImageXObject) appearanceStream.getResources().getXObject(COSName.getPDFName("Im1")); + BufferedImage actualStampImage = ximage.getImage(); + BufferedImage expectedStampImage = ImageIO.read(new File(stampFile)); + Assert.assertTrue(compareImages(expectedStampImage, actualStampImage)); + doc2.close(); + } + + private boolean compareImages(BufferedImage bim1, BufferedImage bim2) + { + if (bim1.getWidth() != bim2.getWidth()) + { + return false; + } + if (bim1.getHeight() != bim2.getHeight()) + { + return false; + } + for (int x = 0; x < bim1.getWidth(); ++x) + { + for (int y = 0; y < bim1.getHeight(); ++y) + { + if (bim1.getRGB(x, y) != bim2.getRGB(x, y)) + { + return false; + } + } + } + return true; } -} +} \ No newline at end of file diff --git a/examples/src/test/java/org/apache/wink/client/MockHttpServer.java b/examples/src/test/java/org/apache/wink/client/MockHttpServer.java new file mode 100644 index 00000000000..c3f1d1acc74 --- /dev/null +++ b/examples/src/test/java/org/apache/wink/client/MockHttpServer.java @@ -0,0 +1,550 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *******************************************************************************/ + +package org.apache.wink.client; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.BindException; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.net.ServerSocketFactory; +import javax.net.ssl.SSLServerSocketFactory; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.Status; + +import org.apache.pdfbox.io.IOUtils; + +/** + * Copied from + * http://svn.apache.org/repos/asf/wink/trunk/wink-component-test-support/src/main/java/org/apache/wink/client/MockHttpServer.java + * on 28.7.2018. + */ +public class MockHttpServer extends Thread { + + public static class MockHttpServerResponse { + + // mock response data + private int mockResponseCode = 200; + private final Map mockResponseHeaders = new HashMap(); + private byte[] mockResponseContent = "received message".getBytes(); + private String mockResponseContentType = "text/plain;charset=utf-8"; + private boolean mockResponseContentEchoRequest; + + public void setMockResponseHeaders(Map headers) { + mockResponseHeaders.clear(); + mockResponseHeaders.putAll(headers); + } + + public void setMockResponseHeader(String name, String value) { + mockResponseHeaders.put(name, value); + } + + public Map getMockResponseHeaders() { + return mockResponseHeaders; + } + + public void setMockResponseCode(int responseCode) { + this.mockResponseCode = responseCode; + } + + public int getMockResponseCode() { + return mockResponseCode; + } + + public void setMockResponseContent(String content) { + mockResponseContent = content.getBytes(); + } + + public void setMockResponseContent(byte[] content) { + mockResponseContent = content; + } + + public byte[] getMockResponseContent() { + return mockResponseContent; + } + + public void setMockResponseContentType(String type) { + mockResponseContentType = type; + } + + public String getMockResponseContentType() { + return mockResponseContentType; + } + + public void setMockResponseContentEchoRequest(boolean echo) { + mockResponseContentEchoRequest = echo; + } + + public boolean getMockResponseContentEchoRequest() { + return mockResponseContentEchoRequest; + } + } + + private Thread serverThread = null; + private ServerSocket serverSocket = null; + private boolean serverStarted = false; + private ServerSocketFactory serverSocketFactory = null; + private int serverPort; + private int readTimeOut = 5000; // 5 + // seconds + private int delayResponseTime = 0; + private static byte[] NEW_LINE = "\r\n".getBytes(); + // request data + private String requestMethod = null; + private String requestUrl = null; + private Map> requestHeaders = + new HashMap>(); + private ByteArrayOutputStream requestContent = new ByteArrayOutputStream(); + private List mockHttpServerResponses = + new ArrayList(); + private int responseCounter = 0; + + public MockHttpServer(int serverPort) { + this(serverPort, false); + } + + public MockHttpServer(int serverPort, boolean ssl) { + mockHttpServerResponses.add(new MockHttpServerResponse()); // set a + // default + // response + this.serverPort = serverPort; + try { + serverSocketFactory = ServerSocketFactory.getDefault(); + if (ssl) { + serverSocketFactory = SSLServerSocketFactory.getDefault(); + } + while (serverSocket == null) { + try { + serverSocket = serverSocketFactory.createServerSocket(++this.serverPort); + } catch (BindException e) { + + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public synchronized void startServer() { + if (serverStarted) + return; + + // start the server thread + start(); + serverStarted = true; + + // wait for the server thread to start + waitForServerToStart(); + } + + private synchronized void waitForServerToStart() { + try { + wait(5000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + private synchronized void waitForServerToStop() { + try { + wait(5000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + @Override + public void run() { + serverThread = Thread.currentThread(); + executeLoop(); + } + + private void executeLoop() { + serverStarted(); + try { + while (true) { + Socket socket = serverSocket.accept(); + HttpProcessor processor = new HttpProcessor(socket); + processor.run(); + } + } catch (IOException e) { + if (e instanceof SocketException) { + if (!("Socket closed".equalsIgnoreCase(e.getMessage()) || "Socket is closed" + .equalsIgnoreCase(e.getMessage()))) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } else { + e.printStackTrace(); + throw new RuntimeException(e); + } + } finally { + // notify that the server was stopped + serverStopped(); + } + } + + private synchronized void serverStarted() { + // notify the waiting thread that the thread started + notifyAll(); + } + + private synchronized void serverStopped() { + // notify the waiting thread that the thread started + notifyAll(); + } + + public synchronized void stopServer() { + if (!serverStarted) + return; + + try { + serverStarted = false; + // the server may be sleeping somewhere... + serverThread.interrupt(); + // close the server socket + serverSocket.close(); + // wait for the server to stop + waitForServerToStop(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private class HttpProcessor { + + private Socket socket; + + public HttpProcessor(Socket socket) throws SocketException { + // set the read timeout (5 seconds by default) + socket.setSoTimeout(readTimeOut); + socket.setKeepAlive(false); + this.socket = socket; + } + + public void run() { + try { + processRequest(socket); + processResponse(socket); + } catch (IOException e) { + if (e instanceof SocketException) { + if (!("socket closed".equalsIgnoreCase(e.getMessage()))) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } else { + e.printStackTrace(); + throw new RuntimeException(e); + } + } finally { + try { + socket.shutdownOutput(); + socket.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + + private void processRequest(Socket socket) throws IOException { + requestContent.reset(); + BufferedInputStream is = new BufferedInputStream(socket.getInputStream()); + String requestMethodHeader = new String(readLine(is)); + processRequestMethod(requestMethodHeader); + processRequestHeaders(is); + processRequestContent(is); + } + + private void processRequestMethod(String requestMethodHeader) { + String[] parts = requestMethodHeader.split(" "); + if (parts.length < 2) { + throw new RuntimeException("illegal http request"); + } + requestMethod = parts[0]; + requestUrl = parts[1]; + } + + private void processRequestHeaders(InputStream is) throws IOException { + requestHeaders.clear(); + byte[] line; + while ((line = readLine(is)) != null) { + String lineStr = new String(line); + // if there are no more headers + if ("".equals(lineStr.trim())) { + break; + } + addRequestHeader(lineStr); + } + } + + private void processRequestContent(InputStream is) throws NumberFormatException, + IOException { + if (!("PUT".equals(requestMethod) || "POST".equals(requestMethod))) { + return; + } + + List transferEncodingValues = requestHeaders.get("Transfer-Encoding"); + String transferEncoding = + (transferEncodingValues == null || transferEncodingValues.isEmpty()) ? null + : transferEncodingValues.get(0); + if ("chunked".equals(transferEncoding)) { + processChunkedContent(is); + } else { + processRegularContent(is); + } + + if (mockHttpServerResponses.get(responseCounter).getMockResponseContentEchoRequest()) { + mockHttpServerResponses.get(responseCounter).setMockResponseContent(requestContent + .toByteArray()); + } + + } + + private void processRegularContent(InputStream is) throws IOException { + List contentLengthValues = requestHeaders.get("Content-Length"); + String contentLength = + (contentLengthValues == null || contentLengthValues.isEmpty()) ? null + : contentLengthValues.get(0); + if (contentLength == null) { + return; + } + int contentLen = Integer.parseInt(contentLength); + byte[] bytes = new byte[contentLen]; + IOUtils.populateBuffer(is, bytes); + requestContent.write(bytes); + } + + private void processChunkedContent(InputStream is) throws IOException { + requestContent.write("".getBytes()); + byte[] chunk; + byte[] line = null; + boolean lastChunk = false; + // we should exit this loop only after we get to the end of stream + while (!lastChunk && (line = readLine(is)) != null) { + + String lineStr = new String(line); + // a chunk is identified as: + // 1) not an empty line + // 2) not 0. 0 means that there are no more chunks + if ("0".equals(lineStr)) { + lastChunk = true; + } + + if (!lastChunk) { + // get the length of the current chunk (it is in hexadecimal + // form) + int chunkLen = Integer.parseInt(lineStr, 16); + + // get the chunk + chunk = getChunk(is, chunkLen); + + // consume the newline after the chunk that separates + // between + // the chunk content and the next chunk size + readLine(is); + + requestContent.write(chunk); + } + } + + // do one last read to consume the empty line after the last chunk + if (lastChunk) { + readLine(is); + } + } + + private byte[] readLine(InputStream is) throws IOException { + int n; + ByteArrayOutputStream tmpOs = new ByteArrayOutputStream(); + while ((n = is.read()) != -1) { + if (n == '\r') { + n = is.read(); + if (n == '\n') { + return tmpOs.toByteArray(); + } else { + tmpOs.write('\r'); + if (n != -1) { + tmpOs.write(n); + } else { + return tmpOs.toByteArray(); + } + } + } else if (n == '\n') { + return tmpOs.toByteArray(); + } else { + tmpOs.write(n); + } + } + return tmpOs.toByteArray(); + } + + private byte[] getChunk(InputStream is, int len) throws IOException { + ByteArrayOutputStream chunk = new ByteArrayOutputStream(); + int read; + int totalRead = 0; + byte[] bytes = new byte[512]; + // read len bytes as the chunk + while (totalRead < len) { + read = is.read(bytes, 0, Math.min(bytes.length, len - totalRead)); + chunk.write(bytes, 0, read); + totalRead += read; + } + return chunk.toByteArray(); + } + + private void addRequestHeader(String line) { + String[] parts = line.split(": "); + List values = requestHeaders.get(parts[0]); + if (values == null) { + values = new ArrayList(); + requestHeaders.put(parts[0], values); + } + values.add(parts[1]); + } + + private void processResponse(Socket socket) throws IOException { + // if delaying the response failed (because it was interrupted) + // then don't send the response + if (!delayResponse()) + return; + + OutputStream sos = socket.getOutputStream(); + BufferedOutputStream os = new BufferedOutputStream(sos); + String reason = ""; + Status statusCode = + Response.Status.fromStatusCode(mockHttpServerResponses.get(responseCounter) + .getMockResponseCode()); + if (statusCode != null) { + reason = statusCode.toString(); + } + os.write(("HTTP/1.1 " + mockHttpServerResponses.get(responseCounter) + .getMockResponseCode() + + " " + reason).getBytes()); + os.write(NEW_LINE); + processResponseHeaders(os); + processResponseContent(os); + os.flush(); + responseCounter++; + } + + // return: + // true - delay was successful + // false - delay was unsuccessful + private boolean delayResponse() { + // delay the response by delayResponseTime milliseconds + if (delayResponseTime > 0) { + try { + Thread.sleep(delayResponseTime); + return true; + } catch (InterruptedException e) { + return false; + } + } + return true; + } + + private void processResponseContent(OutputStream os) throws IOException { + if (mockHttpServerResponses.get(responseCounter).getMockResponseContent() == null) { + return; + } + + os.write(mockHttpServerResponses.get(responseCounter).getMockResponseContent()); + } + + private void processResponseHeaders(OutputStream os) throws IOException { + addServerResponseHeaders(); + for (String header : mockHttpServerResponses.get(responseCounter) + .getMockResponseHeaders().keySet()) { + os.write((header + ": " + mockHttpServerResponses.get(responseCounter) + .getMockResponseHeaders().get(header)).getBytes()); + os.write(NEW_LINE); + } + os.write(NEW_LINE); + } + + private void addServerResponseHeaders() { + Map mockResponseHeaders = + mockHttpServerResponses.get(responseCounter).getMockResponseHeaders(); + mockResponseHeaders.put("Content-Type", mockHttpServerResponses.get(responseCounter) + .getMockResponseContentType()); + mockResponseHeaders.put("Content-Length", mockHttpServerResponses.get(responseCounter) + .getMockResponseContent().length + ""); + mockResponseHeaders.put("Server", "Mock HTTP Server v1.0"); + mockResponseHeaders.put("Connection", "closed"); + } + } + + public void setReadTimeout(int milliseconds) { + readTimeOut = milliseconds; + } + + public void setDelayResponse(int milliseconds) { + delayResponseTime = milliseconds; + } + + public String getRequestContentAsString() { + return requestContent.toString(); + } + + public byte[] getRequestContent() { + return requestContent.toByteArray(); + } + + public Map> getRequestHeaders() { + return requestHeaders; + } + + public String getRequestMethod() { + return requestMethod; + } + + public String getRequestUrl() { + return requestUrl; + } + + public void setMockHttpServerResponses(MockHttpServerResponse... responses) { + mockHttpServerResponses.clear(); + mockHttpServerResponses.addAll(Arrays.asList(responses)); + } + + public List getMockHttpServerResponses() { + return mockHttpServerResponses; + } + + public void setServerPort(int serverPort) { + this.serverPort = serverPort; + } + + public int getServerPort() { + return serverPort; + } +} diff --git a/examples/src/test/resources/org/apache/pdfbox/examples/signature/hexsignature.txt b/examples/src/test/resources/org/apache/pdfbox/examples/signature/hexsignature.txt new file mode 100644 index 00000000000..8cb145db97f --- /dev/null +++ b/examples/src/test/resources/org/apache/pdfbox/examples/signature/hexsignature.txt @@ -0,0 +1 @@ diff --git a/examples/src/test/resources/org/apache/pdfbox/examples/signature/keystore.p12 b/examples/src/test/resources/org/apache/pdfbox/examples/signature/keystore.p12 index a4d24bf59d8..ee7d8065dc9 100644 Binary files a/examples/src/test/resources/org/apache/pdfbox/examples/signature/keystore.p12 and b/examples/src/test/resources/org/apache/pdfbox/examples/signature/keystore.p12 differ diff --git a/examples/src/test/resources/org/apache/pdfbox/examples/signature/sign_me_protected.pdf b/examples/src/test/resources/org/apache/pdfbox/examples/signature/sign_me_protected.pdf new file mode 100644 index 00000000000..cbe614b5a57 Binary files /dev/null and b/examples/src/test/resources/org/apache/pdfbox/examples/signature/sign_me_protected.pdf differ diff --git a/fontbox/pom.xml b/fontbox/pom.xml index 7c78d228b66..f15377c19e0 100644 --- a/fontbox/pom.xml +++ b/fontbox/pom.xml @@ -21,7 +21,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml @@ -61,9 +61,65 @@ src/main/resources/org/apache/fontbox/cmap/* + src/main/resources/org/apache/fontbox/unicode/* - + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.fontbox + + + + + + + + com.googlecode.maven-download-plugin + download-maven-plugin + + + PDFBOX-4038 + generate-test-resources + + wget + + + https://issues.apache.org/jira/secure/attachment/12684264/SourceSansProBold.otf + ${project.build.directory}/pdfs + 28a044a2685fbc8da7810d9ac7b6b93a95542d504d7d8e671f009b8ebb2f5b70c974be7ea78974b188d8e6ab17d65b08f276c054927857315d5aad26f6fe36fc + + + + PDFBOX-3997 + generate-test-resources + + wget + + + https://issues.apache.org/jira/secure/attachment/12896461/NotoEmoji-Regular.ttf + ${project.build.directory}/pdfs + 51b01ab0794be9f92c59679f6d56d4ce09ed959daeb9ec945bb837eb15a82ab302e83b29aab1972ac9cb648f7196a5f5ff4488a4622b36bedbc9cd0cab6dc3de + + + + PDFBOX-3379 + generate-test-resources + + wget + + + https://issues.apache.org/jira/secure/attachment/12809395/DejaVuSansMono.ttf + ${project.build.directory}/pdfs + 1af1ce3e6d34a0b89c93072d8646e92cceb45b276389d2dd0d84457ec1193394d2bcc49bf3ce99c9c6b2658cd1337fc40ee5c61957f74cd45dbc3d51b6aef417 + + + + diff --git a/fontbox/src/main/appended-resources/META-INF/LICENSE b/fontbox/src/main/appended-resources/META-INF/LICENSE index a63eb20648e..4e356abd033 100644 --- a/fontbox/src/main/appended-resources/META-INF/LICENSE +++ b/fontbox/src/main/appended-resources/META-INF/LICENSE @@ -30,3 +30,100 @@ Apache FontBox is based on contributions made to the original FontBox project: LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Lohit-Bengali font (https://pagure.io/lohit): + + Copyright 2011-13 Lohit Fonts Project contributors + + + This Font Software is licensed under the SIL Open Font License, Version 1.1. + This license is copied below, and is also available with a FAQ at: + http://scripts.sil.org/OFL + + + ----------------------------------------------------------- + SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 + ----------------------------------------------------------- + + PREAMBLE + The goals of the Open Font License (OFL) are to stimulate worldwide + development of collaborative font projects, to support the font creation + efforts of academic and linguistic communities, and to provide a free and + open framework in which fonts may be shared and improved in partnership + with others. + + The OFL allows the licensed fonts to be used, studied, modified and + redistributed freely as long as they are not sold by themselves. The + fonts, including any derivative works, can be bundled, embedded, + redistributed and/or sold with any software provided that any reserved + names are not used by derivative works. The fonts and derivatives, + however, cannot be released under any other type of license. The + requirement for fonts to remain under this license does not apply + to any document created using the fonts or their derivatives. + + DEFINITIONS + "Font Software" refers to the set of files released by the Copyright + Holder(s) under this license and clearly marked as such. This may + include source files, build scripts and documentation. + + "Reserved Font Name" refers to any names specified as such after the + copyright statement(s). + + "Original Version" refers to the collection of Font Software components as + distributed by the Copyright Holder(s). + + "Modified Version" refers to any derivative made by adding to, deleting, + or substituting -- in part or in whole -- any of the components of the + Original Version, by changing formats or by porting the Font Software to a + new environment. + + "Author" refers to any designer, engineer, programmer, technical + writer or other person who contributed to the Font Software. + + PERMISSION & CONDITIONS + Permission is hereby granted, free of charge, to any person obtaining + a copy of the Font Software, to use, study, copy, merge, embed, modify, + redistribute, and sell modified and unmodified copies of the Font + Software, subject to the following conditions: + + 1) Neither the Font Software nor any of its individual components, + in Original or Modified Versions, may be sold by itself. + + 2) Original or Modified Versions of the Font Software may be bundled, + redistributed and/or sold with any software, provided that each copy + contains the above copyright notice and this license. These can be + included either as stand-alone text files, human-readable headers or + in the appropriate machine-readable metadata fields within text or + binary files as long as those fields can be easily viewed by the user. + + 3) No Modified Version of the Font Software may use the Reserved Font + Name(s) unless explicit written permission is granted by the corresponding + Copyright Holder. This restriction only applies to the primary font name as + presented to the users. + + 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font + Software shall not be used to promote, endorse or advertise any + Modified Version, except to acknowledge the contribution(s) of the + Copyright Holder(s) and the Author(s) or with their explicit written + permission. + + 5) The Font Software, modified or unmodified, in part or in whole, + must be distributed entirely under this license, and must not be + distributed under any other license. The requirement for fonts to + remain under this license does not apply to any document created + using the Font Software. + + TERMINATION + This license becomes null and void if any of the above conditions are + not met. + + DISCLAIMER + THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE + COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL + DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM + OTHER DEALINGS IN THE FONT SOFTWARE. diff --git a/fontbox/src/main/appended-resources/META-INF/NOTICE b/fontbox/src/main/appended-resources/META-INF/NOTICE index b228e5cc7ce..c1f8cdd1aaf 100644 --- a/fontbox/src/main/appended-resources/META-INF/NOTICE +++ b/fontbox/src/main/appended-resources/META-INF/NOTICE @@ -1,2 +1,5 @@ Based on source code contributed to the original FontBox project. Copyright (c) 2006-2007, www.fontbox.org + +Includes the Script Property (Scripts-10.0.0.txt) +Copyright 2017 Unicode, Inc. diff --git a/fontbox/src/main/java/org/apache/fontbox/afm/AFMParser.java b/fontbox/src/main/java/org/apache/fontbox/afm/AFMParser.java index 84018838491..b31379adbaa 100644 --- a/fontbox/src/main/java/org/apache/fontbox/afm/AFMParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/afm/AFMParser.java @@ -253,11 +253,11 @@ public class AFMParser */ public static final String START_KERN_PAIRS1 = "StartKernPairs1"; /** - * This is the start compisites data section. + * This is the start composites data section. */ public static final String START_COMPOSITES = "StartComposites"; /** - * This is the end compisites data section. + * This is the end composites data section. */ public static final String END_COMPOSITES = "EndComposites"; /** @@ -265,7 +265,7 @@ public class AFMParser */ public static final String CC = "CC"; /** - * This is a composite charater part. + * This is a composite character part. */ public static final String PCC = "PCC"; /** @@ -612,45 +612,31 @@ private KernPair parseKernPair() throws IOException String cmd = readString(); if( KERN_PAIR_KP.equals( cmd ) ) { - String first = readString(); - String second = readString(); - float x = readFloat(); - float y = readFloat(); - kernPair.setFirstKernCharacter( first ); - kernPair.setSecondKernCharacter( second ); - kernPair.setX( x ); - kernPair.setY( y ); + kernPair.setFirstKernCharacter(readString()); + kernPair.setSecondKernCharacter(readString()); + kernPair.setX(readFloat()); + kernPair.setY(readFloat()); } else if( KERN_PAIR_KPH.equals( cmd ) ) { - String first = hexToString( readString() ); - String second = hexToString( readString() ); - float x = readFloat(); - float y = readFloat(); - kernPair.setFirstKernCharacter( first ); - kernPair.setSecondKernCharacter( second ); - kernPair.setX( x ); - kernPair.setY( y ); + kernPair.setFirstKernCharacter(hexToString(readString())); + kernPair.setSecondKernCharacter(hexToString(readString())); + kernPair.setX(readFloat()); + kernPair.setY(readFloat()); } else if( KERN_PAIR_KPX.equals( cmd ) ) { - String first = readString(); - String second = readString(); - float x = readFloat(); - kernPair.setFirstKernCharacter( first ); - kernPair.setSecondKernCharacter( second ); - kernPair.setX( x ); + kernPair.setFirstKernCharacter(readString()); + kernPair.setSecondKernCharacter(readString()); + kernPair.setX(readFloat()); kernPair.setY( 0 ); } else if( KERN_PAIR_KPY.equals( cmd ) ) { - String first = readString(); - String second = readString(); - float y = readFloat(); - kernPair.setFirstKernCharacter( first ); - kernPair.setSecondKernCharacter( second ); + kernPair.setFirstKernCharacter(readString()); + kernPair.setSecondKernCharacter(readString()); kernPair.setX( 0 ); - kernPair.setY( y ); + kernPair.setY(readFloat()); } else { @@ -680,10 +666,10 @@ private String hexToString( String hexString ) throws IOException throw new IOException( "String should be enclosed by angle brackets '" + hexString+ "'" ); } hexString = hexString.substring( 1, hexString.length() -1 ); - byte[] data = new byte[ (hexString.length() / 2) ]; + byte[] data = new byte[hexString.length() / 2]; for( int i=0; i - - + + This package holds classes used to parse AFM(Adobe Font Metrics) files. -
+
More information about AFM files can be found at http://partners.adobe.com/asn/developer/type/ diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CFFDataInput.java b/fontbox/src/main/java/org/apache/fontbox/cff/CFFDataInput.java index fff70596448..68196577a60 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CFFDataInput.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CFFDataInput.java @@ -72,13 +72,19 @@ public int readOffset(int offSize) throws IOException } /** - * Read the offsize from the buffer. - * @return the offsize - * @throws IOException if an error occurs during reading + * Read offSize from the buffer. This is a 1 byte value between 1 and 4. + * + * @return the offSize. + * @throws IOException if an error occurs during reading or if the value is illegal. */ public int readOffSize() throws IOException { - return readUnsignedByte(); + int offSize = readUnsignedByte(); + if (offSize < 1 || offSize > 4) + { + throw new IOException("Illegal (< 1 or > 4) offSize value " + offSize + " in CFF font at position " + (getPosition() - 1)); + } + return offSize; } /** diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CFFEncoding.java b/fontbox/src/main/java/org/apache/fontbox/cff/CFFEncoding.java index c8a2edd5759..7cfa0147831 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CFFEncoding.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CFFEncoding.java @@ -18,9 +18,6 @@ import org.apache.fontbox.encoding.Encoding; -import java.util.HashMap; -import java.util.Map; - /** * A CFF Type 1-equivalent Encoding. An encoding is an array of codes associated with some or all * glyphs in a font @@ -29,8 +26,6 @@ */ public abstract class CFFEncoding extends Encoding { - private final Map codeToName = new HashMap(250); - /** * Package-private constructor for subclasses. */ @@ -38,23 +33,6 @@ public abstract class CFFEncoding extends Encoding { } - /** - * Returns the name of the glyph for the given character code. - * - * @param code character code - * @return PostScript glyph name - */ - @Override - public String getName(int code) - { - String name = codeToName.get(code); - if (name == null) - { - return ".notdef"; - } - return name; - } - /** * Adds a new code/SID combination to the encoding. * @param code the given code @@ -62,7 +40,6 @@ public String getName(int code) */ public void add(int code, int sid, String name) { - codeToName.put(code, name); addCharacterEncoding(code, name); } @@ -71,8 +48,6 @@ public void add(int code, int sid, String name) */ protected void add(int code, int sid) { - String name = CFFStandardString.getName(sid); - codeToName.put(code, name); - addCharacterEncoding(code, name); + addCharacterEncoding(code, CFFStandardString.getName(sid)); } } diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CFFFont.java b/fontbox/src/main/java/org/apache/fontbox/cff/CFFFont.java index a5bd86f27fb..be58b35c38e 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CFFFont.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CFFFont.java @@ -124,7 +124,7 @@ void setCharset(CFFCharset charset) /** * Returns the character strings dictionary. For expert users only. * - * @return the dictionary + * @return the character strings dictionary as a list of byte arrays. */ public final List getCharStringBytes() { @@ -158,7 +158,7 @@ public int getNumCharStrings() /** * Sets the global subroutine index data. * - * @param globalSubrIndexValue an list containing the global subroutines + * @param globalSubrIndexValue a list of the global subroutines. */ void setGlobalSubrIndex(byte[][] globalSubrIndexValue) { @@ -166,9 +166,9 @@ void setGlobalSubrIndex(byte[][] globalSubrIndexValue) } /** - * Returns the list containing the global subroutine . + * Returns the list containing the global subroutines. * - * @return the dictionary + * @return a list of the global subroutines. */ public List getGlobalSubrIndex() { diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java b/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java index e579e8f0ecc..e107ee63765 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java @@ -24,6 +24,8 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.fontbox.util.Charsets; @@ -33,6 +35,11 @@ */ public class CFFParser { + /** + * Log instance. + */ + private static final Log LOG = LogFactory.getLog(CFFParser.class); + private static final String TAG_OTTO = "OTTO"; private static final String TAG_TTCF = "ttcf"; private static final String TAG_TTFONLY = "\u0000\u0001\u0000\u0000"; @@ -83,37 +90,7 @@ public List parse(byte[] bytes) throws IOException // try to determine which kind of font we have if (TAG_OTTO.equals(firstTag)) { - // this is OpenType font containing CFF data - // so find CFF tag - short numTables = input.readShort(); - @SuppressWarnings("unused") - short searchRange = input.readShort(); - @SuppressWarnings("unused") - short entrySelector = input.readShort(); - @SuppressWarnings("unused") - short rangeShift = input.readShort(); - - boolean cffFound = false; - for (int q = 0; q < numTables; q++) - { - String tagName = readTagName(input); - @SuppressWarnings("unused") - long checksum = readLong(input); - long offset = readLong(input); - long length = readLong(input); - if (tagName.equals("CFF ")) - { - cffFound = true; - byte[] bytes2 = new byte[(int) length]; - System.arraycopy(bytes, (int) offset, bytes2, 0, bytes2.length); - input = new CFFDataInput(bytes2); - break; - } - } - if (!cffFound) - { - throw new IOException("CFF tag not found in this OpenType font."); - } + input = createTaggedCFFDataInput(input, bytes); } else if (TAG_TTCF.equals(firstTag)) { @@ -131,11 +108,15 @@ else if (TAG_TTFONLY.equals(firstTag)) @SuppressWarnings("unused") Header header = readHeader(input); String[] nameIndex = readStringIndexData(input); + if (nameIndex == null) + { + throw new IOException("Name index missing in CFF font"); + } byte[][] topDictIndex = readIndexData(input); stringIndex = readStringIndexData(input); byte[][] globalSubrIndex = readIndexData(input); - List fonts = new ArrayList(); + List fonts = new ArrayList(nameIndex.length); for (int i = 0; i < nameIndex.length; i++) { CFFFont font = parseFont(input, nameIndex[i], topDictIndex[i]); @@ -146,6 +127,33 @@ else if (TAG_TTFONLY.equals(firstTag)) return fonts; } + private CFFDataInput createTaggedCFFDataInput(CFFDataInput input, byte[] bytes) throws IOException + { + // this is OpenType font containing CFF data + // so find CFF tag + short numTables = input.readShort(); + @SuppressWarnings("unused") + short searchRange = input.readShort(); + @SuppressWarnings("unused") + short entrySelector = input.readShort(); + @SuppressWarnings("unused") + short rangeShift = input.readShort(); + for (int q = 0; q < numTables; q++) + { + String tagName = readTagName(input); + @SuppressWarnings("unused") + long checksum = readLong(input); + long offset = readLong(input); + long length = readLong(input); + if ("CFF ".equals(tagName)) + { + byte[] bytes2 = Arrays.copyOfRange(bytes, (int) offset, (int) (offset + length)); + return new CFFDataInput(bytes2); + } + } + throw new IOException("CFF tag not found in this OpenType font."); + } + private static String readTagName(CFFDataInput input) throws IOException { byte[] b = input.readBytes(4); @@ -217,6 +225,12 @@ private static String[] readStringIndexData(CFFDataInput input) throws IOExcepti for (int i = 0; i < count; i++) { int length = offsets[i + 1] - offsets[i]; + if (length < 0) + { + throw new IOException("Negative index data length + " + length + " at " + + i + ": offsets[" + (i + 1) + "]=" + offsets[i + 1] + + ", offsets[" + i + "]=" + offsets[i]); + } indexDataValues[i] = new String(input.readBytes(length), Charsets.ISO_8859_1); } return indexDataValues; @@ -261,7 +275,7 @@ else if (b0 == 28 || b0 == 29) } else if (b0 == 30) { - entry.operands.add(readRealNumber(input, b0)); + entry.operands.add(readRealNumber(input)); } else if (b0 >= 32 && b0 <= 254) { @@ -269,7 +283,7 @@ else if (b0 >= 32 && b0 <= 254) } else { - throw new IllegalArgumentException(); + throw new IOException("invalid DICT data b0 byte: " + b0); } } return entry; @@ -321,18 +335,18 @@ else if (b0 >= 251 && b0 <= 254) } } - /** - * @param b0 - */ - private static Double readRealNumber(CFFDataInput input, int b0) throws IOException + private static Double readRealNumber(CFFDataInput input) throws IOException { StringBuilder sb = new StringBuilder(); boolean done = false; boolean exponentMissing = false; + boolean hasExponent = false; + int[] nibbles = new int[2]; while (!done) { int b = input.readUnsignedByte(); - int[] nibbles = { b / 16, b % 16 }; + nibbles[0] = b / 16; + nibbles[1] = b % 16; for (int nibble : nibbles) { switch (nibble) @@ -354,12 +368,24 @@ private static Double readRealNumber(CFFDataInput input, int b0) throws IOExcept sb.append("."); break; case 0xb: + if (hasExponent) + { + LOG.warn("duplicate 'E' ignored after " + sb); + break; + } sb.append("E"); exponentMissing = true; + hasExponent = true; break; case 0xc: + if (hasExponent) + { + LOG.warn("duplicate 'E-' ignored after " + sb); + break; + } sb.append("E-"); exponentMissing = true; + hasExponent = true; break; case 0xd: break; @@ -370,7 +396,8 @@ private static Double readRealNumber(CFFDataInput input, int b0) throws IOExcept done = true; break; default: - throw new IllegalArgumentException(); + // can only be a programming error because a nibble is between 0 and F + throw new IllegalArgumentException("illegal nibble " + nibble); } } } @@ -381,7 +408,18 @@ private static Double readRealNumber(CFFDataInput input, int b0) throws IOExcept // see PDFBOX-1522 sb.append("0"); } - return Double.valueOf(sb.toString()); + if (sb.length() == 0) + { + return 0d; + } + try + { + return Double.valueOf(sb.toString()); + } + catch (NumberFormatException ex) + { + throw new IOException(ex); + } } private CFFFont parseFont(CFFDataInput input, String name, byte[] topDictIndex) throws IOException @@ -390,7 +428,7 @@ private CFFFont parseFont(CFFDataInput input, String name, byte[] topDictIndex) CFFDataInput topDictInput = new CFFDataInput(topDictIndex); DictData topDict = readDictData(topDictInput); - // we dont't support synthetic fonts + // we don't support synthetic fonts DictData.Entry syntheticBaseEntry = topDict.getEntry("SyntheticBase"); if (syntheticBaseEntry != null) { @@ -402,11 +440,13 @@ private CFFFont parseFont(CFFDataInput input, String name, byte[] topDictIndex) boolean isCIDFont = topDict.getEntry("ROS") != null; if (isCIDFont) { - font = new CFFCIDFont(); + CFFCIDFont cffCIDFont = new CFFCIDFont(); DictData.Entry rosEntry = topDict.getEntry("ROS"); - ((CFFCIDFont) font).setRegistry(readString(rosEntry.getNumber(0).intValue())); - ((CFFCIDFont) font).setOrdering(readString(rosEntry.getNumber(1).intValue())); - ((CFFCIDFont) font).setSupplement(rosEntry.getNumber(2).intValue()); + cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue())); + cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue())); + cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue()); + + font = cffCIDFont; } else { @@ -491,14 +531,19 @@ else if (!isCIDFont && charsetId == 2) { parseCIDFontDicts(input, topDict, (CFFCIDFont) font, charStringsIndex.length); + List privMatrix = null; + List> fontDicts = ((CFFCIDFont) font).getFontDicts(); + if (!fontDicts.isEmpty() && fontDicts.get(0).containsKey("FontMatrix")) + { + privMatrix = (List) fontDicts.get(0).get("FontMatrix"); + } // some malformed fonts have FontMatrix in their Font DICT, see PDFBOX-2495 - if (topDict.getEntry("FontMatrix") == null) + List matrix = topDict.getArray("FontMatrix", null); + if (matrix == null) { - List> fontDicts = ((CFFCIDFont) font).getFontDicts(); - if (fontDicts.size() > 0 && fontDicts.get(0).containsKey("FontMatrix")) + if (privMatrix != null) { - List matrix = (List)fontDicts.get(0).get("FontMatrix"); - font.addValueToTopDict("FontMatrix", matrix); + font.addValueToTopDict("FontMatrix", privMatrix); } else { @@ -508,6 +553,14 @@ else if (!isCIDFont && charsetId == 2) (double) 0, (double) 0))); } } + else if (privMatrix != null) + { + // we have to multiply the font matrix from the top directory with the font matrix + // from the private directory. This should be done for synthetic fonts only but in + // case of PDFBOX-3579 it's needed as well to get the right scaling + concatenateMatrix(matrix, privMatrix); + } + } else { @@ -517,6 +570,34 @@ else if (!isCIDFont && charsetId == 2) return font; } + private void concatenateMatrix(List matrixDest, List matrixConcat) + { + // concatenate matrices + // (a b 0) + // (c d 0) + // (x y 1) + double a1 = matrixDest.get(0).doubleValue(); + double b1 = matrixDest.get(1).doubleValue(); + double c1 = matrixDest.get(2).doubleValue(); + double d1 = matrixDest.get(3).doubleValue(); + double x1 = matrixDest.get(4).doubleValue(); + double y1 = matrixDest.get(5).doubleValue(); + + double a2 = matrixConcat.get(0).doubleValue(); + double b2 = matrixConcat.get(1).doubleValue(); + double c2 = matrixConcat.get(2).doubleValue(); + double d2 = matrixConcat.get(3).doubleValue(); + double x2 = matrixConcat.get(4).doubleValue(); + double y2 = matrixConcat.get(5).doubleValue(); + + matrixDest.set(0, a1 * a2 + b1 * c2); + matrixDest.set(1, a1 * b2 + b1 * d1); + matrixDest.set(2, c1 * a2 + d1 * c2); + matrixDest.set(3, c1 * b2 + d1 * d2); + matrixDest.set(4, x1 * a2 + y1 * c2 + x2); + matrixDest.set(5, x1 * b2 + y1 * d2 + y2); + } + /** * Parse dictionaries specific to a CIDFont. */ @@ -535,13 +616,16 @@ private void parseCIDFontDicts(CFFDataInput input, DictData topDict, CFFCIDFont int fontDictOffset = fdArrayEntry.getNumber(0).intValue(); input.setPosition(fontDictOffset); byte[][] fdIndex = readIndexData(input); + if (fdIndex == null) + { + throw new IOException("Font dict index is missing for a CIDKeyed Font"); + } List> privateDictionaries = new LinkedList>(); List> fontDictionaries = new LinkedList>(); - for (int i = 0; i < fdIndex.length; ++i) + for (byte[] bytes : fdIndex) { - byte[] bytes = fdIndex[i]; CFFDataInput fontDictInput = new CFFDataInput(bytes); DictData fontDict = readDictData(fontDictInput); @@ -597,17 +681,17 @@ private void parseCIDFontDicts(CFFDataInput input, DictData topDict, CFFCIDFont private Map readPrivateDict(DictData privateDict) { Map privDict = new LinkedHashMap(17); - privDict.put("BlueValues", privateDict.getArray("BlueValues", null)); - privDict.put("OtherBlues", privateDict.getArray("OtherBlues", null)); - privDict.put("FamilyBlues", privateDict.getArray("FamilyBlues", null)); - privDict.put("FamilyOtherBlues", privateDict.getArray("FamilyOtherBlues", null)); + privDict.put("BlueValues", privateDict.getDelta("BlueValues", null)); + privDict.put("OtherBlues", privateDict.getDelta("OtherBlues", null)); + privDict.put("FamilyBlues", privateDict.getDelta("FamilyBlues", null)); + privDict.put("FamilyOtherBlues", privateDict.getDelta("FamilyOtherBlues", null)); privDict.put("BlueScale", privateDict.getNumber("BlueScale", 0.039625)); privDict.put("BlueShift", privateDict.getNumber("BlueShift", 7)); privDict.put("BlueFuzz", privateDict.getNumber("BlueFuzz", 1)); privDict.put("StdHW", privateDict.getNumber("StdHW", null)); privDict.put("StdVW", privateDict.getNumber("StdVW", null)); - privDict.put("StemSnapH", privateDict.getArray("StemSnapH", null)); - privDict.put("StemSnapV", privateDict.getArray("StemSnapV", null)); + privDict.put("StemSnapH", privateDict.getDelta("StemSnapH", null)); + privDict.put("StemSnapV", privateDict.getDelta("StemSnapV", null)); privDict.put("ForceBold", privateDict.getBoolean("ForceBold", false)); privDict.put("LanguageGroup", privateDict.getNumber("LanguageGroup", 0)); privDict.put("ExpansionFactor", privateDict.getNumber("ExpansionFactor", 0.06)); @@ -627,18 +711,18 @@ private void parseType1Dicts(CFFDataInput input, DictData topDict, CFFType1Font DictData.Entry encodingEntry = topDict.getEntry("Encoding"); CFFEncoding encoding; int encodingId = encodingEntry != null ? encodingEntry.getNumber(0).intValue() : 0; - if (encodingId == 0) + switch (encodingId) { - encoding = CFFStandardEncoding.getInstance(); - } - else if (encodingId == 1) - { - encoding = CFFExpertEncoding.getInstance(); - } - else - { - input.setPosition(encodingId); - encoding = readEncoding(input, charset); + case 0: + encoding = CFFStandardEncoding.getInstance(); + break; + case 1: + encoding = CFFExpertEncoding.getInstance(); + break; + default: + input.setPosition(encodingId); + encoding = readEncoding(input, charset); + break; } font.setEncoding(encoding); @@ -669,7 +753,7 @@ else if (encodingId == 1) } } - private String readString(int index) throws IOException + private String readString(int index) { if (index >= 0 && index <= 390) { @@ -686,7 +770,7 @@ private String readString(int index) throws IOException } } - private String getString(DictData dict, String name) throws IOException + private String getString(DictData dict, String name) { DictData.Entry entry = dict.getEntry(name); return entry != null ? readString(entry.getNumber(0).intValue()) : null; @@ -697,17 +781,14 @@ private CFFEncoding readEncoding(CFFDataInput dataInput, CFFCharset charset) thr int format = dataInput.readCard8(); int baseFormat = format & 0x7f; - if (baseFormat == 0) - { - return readFormat0Encoding(dataInput, charset, format); - } - else if (baseFormat == 1) - { - return readFormat1Encoding(dataInput, charset, format); - } - else + switch (baseFormat) { - throw new IllegalArgumentException(); + case 0: + return readFormat0Encoding(dataInput, charset, format); + case 1: + return readFormat1Encoding(dataInput, charset, format); + default: + throw new IllegalArgumentException(); } } @@ -741,9 +822,9 @@ private Format1Encoding readFormat1Encoding(CFFDataInput dataInput, CFFCharset c int gid = 1; for (int i = 0; i < encoding.nRanges; i++) { - int rangeFirst = dataInput.readCard8(); - int rangeLeft = dataInput.readCard8(); - for (int j = 0; j < 1 + rangeLeft; j++) + int rangeFirst = dataInput.readCard8(); // First code in range + int rangeLeft = dataInput.readCard8(); // Codes left in range (excluding first) + for (int j = 0; j <= rangeLeft; j++) { int sid = charset.getSIDForGID(gid); int code = rangeFirst + j; @@ -784,17 +865,14 @@ private void readSupplement(CFFDataInput dataInput, CFFBuiltInEncoding encoding) private static FDSelect readFDSelect(CFFDataInput dataInput, int nGlyphs, CFFCIDFont ros) throws IOException { int format = dataInput.readCard8(); - if (format == 0) - { - return readFormat0FDSelect(dataInput, format, nGlyphs, ros); - } - else if (format == 3) + switch (format) { - return readFormat3FDSelect(dataInput, format, nGlyphs, ros); - } - else - { - throw new IllegalArgumentException(); + case 0: + return readFormat0FDSelect(dataInput, format, nGlyphs, ros); + case 3: + return readFormat3FDSelect(dataInput, format, nGlyphs, ros); + default: + throw new IllegalArgumentException(); } } @@ -953,21 +1031,16 @@ private CFFCharset readCharset(CFFDataInput dataInput, int nGlyphs, boolean isCI throws IOException { int format = dataInput.readCard8(); - if (format == 0) - { - return readFormat0Charset(dataInput, format, nGlyphs, isCIDFont); - } - else if (format == 1) - { - return readFormat1Charset(dataInput, format, nGlyphs, isCIDFont); - } - else if (format == 2) - { - return readFormat2Charset(dataInput, format, nGlyphs, isCIDFont); - } - else - { - throw new IllegalArgumentException(); + switch (format) + { + case 0: + return readFormat0Charset(dataInput, format, nGlyphs, isCIDFont); + case 1: + return readFormat1Charset(dataInput, format, nGlyphs, isCIDFont); + case 2: + return readFormat2Charset(dataInput, format, nGlyphs, isCIDFont); + default: + throw new IllegalArgumentException(); } } @@ -1113,19 +1186,25 @@ public Entry getEntry(String name) public Boolean getBoolean(String name, boolean defaultValue) { Entry entry = getEntry(name); - return entry != null ? entry.getBoolean(0) : defaultValue; + return entry != null && !entry.getArray().isEmpty() ? entry.getBoolean(0) : defaultValue; } public List getArray(String name, List defaultValue) { Entry entry = getEntry(name); - return entry != null ? entry.getArray() : defaultValue; + return entry != null && !entry.getArray().isEmpty() ? entry.getArray() : defaultValue; } public Number getNumber(String name, Number defaultValue) { Entry entry = getEntry(name); - return entry != null ? entry.getNumber(0) : defaultValue; + return entry != null && !entry.getArray().isEmpty() ? entry.getNumber(0) : defaultValue; + } + + public List getDelta(String name, List defaultValue) + { + Entry entry = getEntry(name); + return entry != null && !entry.getArray().isEmpty() ? entry.getDelta() : defaultValue; } /** @@ -1173,6 +1252,19 @@ public List getArray() return operands; } + public List getDelta() + { + List result = new ArrayList(operands); + for (int i = 1; i < result.size(); i++) + { + Number previous = result.get(i - 1); + Number current = result.get(i); + Integer sum = previous.intValue() + current.intValue(); + result.set(i, sum); + } + return result; + } + @Override public String toString() { @@ -1272,7 +1364,7 @@ private static class EmptyCharset extends EmbeddedCharset protected EmptyCharset(int numCharStrings) { super(true); - addCID(0 ,0); // .notdef + addCID(0, 0); // .notdef // Adobe Reader treats CID as GID, PDFBOX-2571 p11. for (int i = 1; i <= numCharStrings; i++) diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CFFType1Font.java b/fontbox/src/main/java/org/apache/fontbox/cff/CFFType1Font.java index 9f8591539f5..1869b63b6d6 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CFFType1Font.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CFFType1Font.java @@ -205,12 +205,7 @@ private Object getProperty(String name) { return topDictValue; } - Object privateDictValue = privateDict.get(name); - if (privateDictValue != null) - { - return privateDictValue; - } - return null; + return privateDict.get(name); } private int getDefaultWidthX() diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CIDKeyedType2CharString.java b/fontbox/src/main/java/org/apache/fontbox/cff/CIDKeyedType2CharString.java index 69eb5c58b76..68745a7c5c0 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CIDKeyedType2CharString.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CIDKeyedType2CharString.java @@ -19,6 +19,7 @@ import org.apache.fontbox.type1.Type1CharStringReader; import java.util.List; +import java.util.Locale; /** * A CID-Keyed Type 2 CharString. @@ -43,7 +44,7 @@ public class CIDKeyedType2CharString extends Type2CharString public CIDKeyedType2CharString(Type1CharStringReader font, String fontName, int cid, int gid, List sequence, int defaultWidthX, int nomWidthX) { // glyph name is for debugging only - super(font, fontName, String.format("%04x", cid), gid, sequence, defaultWidthX, nomWidthX); + super(font, fontName, String.format(Locale.US, "%04x", cid), gid, sequence, defaultWidthX, nomWidthX); this.cid = cid; } diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CharStringHandler.java b/fontbox/src/main/java/org/apache/fontbox/cff/CharStringHandler.java index 9fd53915983..1c82dd72339 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CharStringHandler.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CharStringHandler.java @@ -17,8 +17,8 @@ package org.apache.fontbox.cff; +import java.util.ArrayList; import java.util.List; -import java.util.Stack; /** * A Handler for CharStringCommands. @@ -35,26 +35,26 @@ public abstract class CharStringHandler * @param sequence of CharStringCommands * */ - public List handleSequence(List sequence) + public List handleSequence(List sequence) { - Stack stack = new Stack(); + List numbers = new ArrayList(); for (Object obj : sequence) { if (obj instanceof CharStringCommand) { - List results = handleCommand(stack, (CharStringCommand)obj); - stack.clear(); // this is basically returning the new stack + List results = handleCommand(numbers, (CharStringCommand)obj); + numbers.clear(); if (results != null) { - stack.addAll(results); + numbers.addAll(results); } } else { - stack.push((Integer)obj); + numbers.add((Number) obj); } } - return stack; + return numbers; } /** @@ -63,5 +63,5 @@ public List handleSequence(List sequence) * @param numbers a list of numbers * @param command the CharStringCommand */ - public abstract List handleCommand(List numbers, CharStringCommand command); + public abstract List handleCommand(List numbers, CharStringCommand command); } \ No newline at end of file diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/DataInput.java b/fontbox/src/main/java/org/apache/fontbox/cff/DataInput.java index d659811201e..6dcdfaf1c11 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/DataInput.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/DataInput.java @@ -29,7 +29,7 @@ public class DataInput { - private byte[] inputBuffer = null; + private final byte[] inputBuffer; private int bufferPosition = 0; /** @@ -179,6 +179,10 @@ public int readInt() throws IOException */ public byte[] readBytes(int length) throws IOException { + if (length < 0) + { + throw new IOException("length is negative"); + } if (inputBuffer.length - bufferPosition < length) { throw new EOFException(); @@ -207,8 +211,7 @@ private int peek(int offset) { try { - int value = inputBuffer[bufferPosition + offset] & 0xff; - return value; + return inputBuffer[bufferPosition + offset] & 0xff; } catch (RuntimeException re) { diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharString.java b/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharString.java index cd72b4f12ed..9194394214c 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharString.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharString.java @@ -16,7 +16,6 @@ */ package org.apache.fontbox.cff; -import java.awt.Point; import java.awt.geom.AffineTransform; import java.awt.geom.GeneralPath; import java.awt.geom.Point2D; @@ -41,19 +40,23 @@ public class Type1CharString private static final Log LOG = LogFactory.getLog(Type1CharString.class); private Type1CharStringReader font; - private String fontName, glyphName; + private final String fontName; + private final String glyphName; private GeneralPath path = null; private int width = 0; private Point2D.Float leftSideBearing = null; private Point2D.Float current = null; private boolean isFlex = false; - private List flexPoints = new ArrayList(); + private final List flexPoints = new ArrayList(); protected List type1Sequence; protected int commandCount; /** * Constructs a new Type1CharString object. - * @param font Parent Type 1 CharString font + * + * @param font Parent Type 1 CharString font. + * @param fontName Name of the font. + * @param glyphName Name of the glyph. * @param sequence Type 1 char string sequence */ public Type1CharString(Type1CharStringReader font, String fontName, String glyphName, @@ -65,7 +68,10 @@ public Type1CharString(Type1CharStringReader font, String fontName, String glyph /** * Constructor for use in subclasses. - * @param font Parent Type 1 CharString font + * + * @param font Parent Type 1 CharString font. + * @param fontName Name of the font. + * @param glyphName Name of the glyph. */ protected Type1CharString(Type1CharStringReader font, String fontName, String glyphName) { @@ -87,9 +93,12 @@ public String getName() */ public Rectangle2D getBounds() { - if (path == null) + synchronized(LOG) { - render(); + if (path == null) + { + render(); + } } return path.getBounds2D(); } @@ -100,9 +109,12 @@ public Rectangle2D getBounds() */ public int getWidth() { - if (path == null) + synchronized(LOG) { - render(); + if (path == null) + { + render(); + } } return width; } @@ -113,9 +125,12 @@ public int getWidth() */ public GeneralPath getPath() { - if (path == null) + synchronized(LOG) { - render(); + if (path == null) + { + render(); + } } return path; } @@ -139,7 +154,7 @@ private void render() width = 0; CharStringHandler handler = new CharStringHandler() { @Override - public List handleCommand(List numbers, CharStringCommand command) + public List handleCommand(List numbers, CharStringCommand command) { return Type1CharString.this.handleCommand(numbers, command); } @@ -147,7 +162,7 @@ public List handleCommand(List numbers, CharStringCommand comm handler.handleSequence(type1Sequence); } - private List handleCommand(List numbers, CharStringCommand command) + private List handleCommand(List numbers, CharStringCommand command) { commandCount++; String name = CharStringCommand.TYPE1_VOCABULARY.get(command.getKey()); @@ -158,7 +173,7 @@ private List handleCommand(List numbers, CharStringCommand com { if (isFlex) { - flexPoints.add(new Point2D.Float(numbers.get(0), numbers.get(1))); + flexPoints.add(new Point2D.Float(numbers.get(0).floatValue(), numbers.get(1).floatValue())); } else { @@ -168,12 +183,12 @@ private List handleCommand(List numbers, CharStringCommand com } else if ("vmoveto".equals(name)) { - if (numbers.size() >= 1) + if (!numbers.isEmpty()) { if (isFlex) { // not in the Type 1 spec, but exists in some fonts - flexPoints.add(new Point2D.Float(0, numbers.get(0))); + flexPoints.add(new Point2D.Float(0f, numbers.get(0).floatValue())); } else { @@ -183,12 +198,12 @@ else if ("vmoveto".equals(name)) } else if ("hmoveto".equals(name)) { - if (numbers.size() >= 1) + if (!numbers.isEmpty()) { if (isFlex) { // not in the Type 1 spec, but exists in some fonts - flexPoints.add(new Point2D.Float(numbers.get(0), 0)); + flexPoints.add(new Point2D.Float(numbers.get(0).floatValue(), 0f)); } else { @@ -205,14 +220,14 @@ else if ("rlineto".equals(name)) } else if ("hlineto".equals(name)) { - if (numbers.size() >= 1) + if (!numbers.isEmpty()) { rlineTo(numbers.get(0), 0); } } else if ("vlineto".equals(name)) { - if (numbers.size() >= 1) + if (!numbers.isEmpty()) { rlineTo(0, numbers.get(0)); } @@ -227,14 +242,14 @@ else if ("rrcurveto".equals(name)) } else if ("closepath".equals(name)) { - closepath(); + closeCharString1Path(); } else if ("sbw".equals(name)) { if (numbers.size() >= 3) { - leftSideBearing = new Point2D.Float(numbers.get(0), numbers.get(1)); - width = numbers.get(2); + leftSideBearing = new Point2D.Float(numbers.get(0).floatValue(), numbers.get(1).floatValue()); + width = numbers.get(2).intValue(); current.setLocation(leftSideBearing); } } @@ -242,8 +257,8 @@ else if ("hsbw".equals(name)) { if (numbers.size() >= 2) { - leftSideBearing = new Point2D.Float(numbers.get(0), 0); - width = numbers.get(1); + leftSideBearing = new Point2D.Float(numbers.get(0).floatValue(), 0); + width = numbers.get(1).intValue(); current.setLocation(leftSideBearing); } } @@ -279,19 +294,19 @@ else if ("setcurrentpoint".equals(name)) } else if ("callothersubr".equals(name)) { - if (numbers.size() >= 1) + if (!numbers.isEmpty()) { - callothersubr(numbers.get(0)); + callothersubr(numbers.get(0).intValue()); } } else if ("div".equals(name)) { - int b = numbers.get(numbers.size() -1); - int a = numbers.get(numbers.size() -2); + float b = numbers.get(numbers.size() -1).floatValue(); + float a = numbers.get(numbers.size() -2).floatValue(); - int result = a / b; // TODO loss of precision, should be float + float result = a / b; - List list = new ArrayList(numbers); + List list = new ArrayList(numbers); list.remove(list.size() - 1); list.remove(list.size() - 1); list.add(result); @@ -330,9 +345,9 @@ else if (name != null) * Sets the current absolute point without performing a moveto. * Used only with results from callothersubr */ - private void setcurrentpoint(int x, int y) + private void setcurrentpoint(Number x, Number y) { - current.setLocation(x, y); + current.setLocation(x.floatValue(), y.floatValue()); } /** @@ -354,12 +369,12 @@ private void callothersubr(int num) } // reference point is relative to start point - Point.Float reference = flexPoints.get(0); + Point2D.Float reference = flexPoints.get(0); reference.setLocation(current.getX() + reference.getX(), current.getY() + reference.getY()); // first point is relative to reference point - Point.Float first = flexPoints.get(1); + Point2D.Float first = flexPoints.get(1); first.setLocation(reference.getX() + first.getX(), reference.getY() + first.getY()); // make the first point relative to the start point @@ -444,7 +459,7 @@ private void rrcurveTo(Number dx1, Number dy1, Number dx2, Number dy2, /** * Close path. */ - private void closepath() + private void closeCharString1Path() { if (path.getCurrentPoint() == null) { @@ -467,34 +482,28 @@ private void seac(Number asb, Number adx, Number ady, Number bchar, Number achar { // base character String baseName = StandardEncoding.INSTANCE.getName(bchar.intValue()); - if (baseName != null) + try { - try - { - Type1CharString base = font.getType1CharString(baseName); - path.append(base.getPath().getPathIterator(null), false); - } - catch (IOException e) - { - LOG.warn("invalid seac character in glyph " + glyphName + " of font " + fontName); - } + Type1CharString base = font.getType1CharString(baseName); + path.append(base.getPath().getPathIterator(null), false); + } + catch (IOException e) + { + LOG.warn("invalid seac character in glyph " + glyphName + " of font " + fontName); } // accent character String accentName = StandardEncoding.INSTANCE.getName(achar.intValue()); - if (accentName != null) + try { - try - { - Type1CharString accent = font.getType1CharString(accentName); - AffineTransform at = AffineTransform.getTranslateInstance( - leftSideBearing.getX() + adx.floatValue(), + Type1CharString accent = font.getType1CharString(accentName); + AffineTransform at = AffineTransform.getTranslateInstance( + leftSideBearing.getX() + adx.floatValue() - asb.floatValue(), leftSideBearing.getY() + ady.floatValue()); - path.append(accent.getPath().getPathIterator(at), false); - } - catch (IOException e) - { - LOG.warn("invalid seac character in glyph " + glyphName + " of font " + fontName); - } + path.append(accent.getPath().getPathIterator(at), false); + } + catch (IOException e) + { + LOG.warn("invalid seac character in glyph " + glyphName + " of font " + fontName); } } diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharStringParser.java b/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharStringParser.java index 3a94f8fcb5d..0433cba9036 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharStringParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/Type1CharStringParser.java @@ -17,9 +17,10 @@ package org.apache.fontbox.cff; import java.io.IOException; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Deque; import java.util.List; -import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -45,7 +46,8 @@ public class Type1CharStringParser static final int CALLOTHERSUBR = 16; static final int POP = 17; - private final String fontName, glyphName; + private final String fontName; + private final String glyphName; /** * Constructs a new Type1CharStringParser object. @@ -101,6 +103,17 @@ private List parse(byte[] bytes, List subrs, List sequen sequence.remove(sequence.size()-1); // remove "return" command } } + else + { + LOG.warn("CALLSUBR is ignored, operand: " + operand + + ", subrs.size(): " + subrs.size() + " in glyph '" + + glyphName + "' of font " + fontName); + // remove all parameters (there can be more than one) + while (sequence.get(sequence.size() - 1) instanceof Integer) + { + sequence.remove(sequence.size() - 1); + } + } } else if (b0 == TWO_BYTE && input.peekUnsignedByte(0) == CALLOTHERSUBR) { @@ -111,34 +124,33 @@ else if (b0 == TWO_BYTE && input.peekUnsignedByte(0) == CALLOTHERSUBR) Integer numArgs = (Integer)sequence.remove(sequence.size()-1); // othersubrs 0-3 have their own semantics - Stack results = new Stack(); - if (othersubrNum == 0) - { - results.push(removeInteger(sequence)); - results.push(removeInteger(sequence)); - sequence.remove(sequence.size() - 1); - // end flex - sequence.add(0); - sequence.add(new CharStringCommand(TWO_BYTE, CALLOTHERSUBR)); - } - else if (othersubrNum == 1) + Deque results = new ArrayDeque(); + switch (othersubrNum) { - // begin flex - sequence.add(1); - sequence.add(new CharStringCommand(TWO_BYTE, CALLOTHERSUBR)); - } - else if (othersubrNum == 3) - { - // allows hint replacement - results.push(removeInteger(sequence)); - } - else - { - // all remaining othersubrs use this fallback mechanism - for (int i = 0; i < numArgs; i++) - { + case 0: results.push(removeInteger(sequence)); - } + results.push(removeInteger(sequence)); + sequence.remove(sequence.size() - 1); + // end flex + sequence.add(0); + sequence.add(new CharStringCommand(TWO_BYTE, CALLOTHERSUBR)); + break; + case 1: + // begin flex + sequence.add(1); + sequence.add(new CharStringCommand(TWO_BYTE, CALLOTHERSUBR)); + break; + case 3: + // allows hint replacement + results.push(removeInteger(sequence)); + break; + default: + // all remaining othersubrs use this fallback mechanism + for (int i = 0; i < numArgs; i++) + { + results.push(removeInteger(sequence)); + } + break; } // pop must follow immediately diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/Type1FontUtil.java b/fontbox/src/main/java/org/apache/fontbox/cff/Type1FontUtil.java index af178c70d8e..0c2bf59a2cd 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/Type1FontUtil.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/Type1FontUtil.java @@ -16,6 +16,8 @@ */ package org.apache.fontbox.cff; +import java.util.Locale; + /** * This class contains some helper methods handling Type1-Fonts. * @@ -36,14 +38,14 @@ private Type1FontUtil() public static String hexEncode(byte[] bytes) { StringBuilder sb = new StringBuilder(); - for (int i = 0; i < bytes.length; i++) + for (byte aByte : bytes) { - String string = Integer.toHexString(bytes[i] & 0xff); + String string = Integer.toHexString(aByte & 0xff); if (string.length() == 1) { sb.append("0"); } - sb.append(string.toUpperCase()); + sb.append(string.toUpperCase(Locale.US)); } return sb.toString(); } @@ -92,11 +94,6 @@ private static byte[] encrypt(byte[] plaintextBytes, int r, int n) { byte[] buffer = new byte[plaintextBytes.length + n]; - for (int i = 0; i < n; i++) - { - buffer[i] = 0; - } - System.arraycopy(plaintextBytes, 0, buffer, n, buffer.length - n); int c1 = 52845; diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/Type2CharString.java b/fontbox/src/main/java/org/apache/fontbox/cff/Type2CharString.java index 2733f7a3707..60b26a90f28 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/Type2CharString.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/Type2CharString.java @@ -31,8 +31,8 @@ */ public class Type2CharString extends Type1CharString { - private int defWidthX = 0; - private int nominalWidthX = 0; + private float defWidthX = 0; + private float nominalWidthX = 0; private int pathCount = 0; private final List type2sequence; private final int gid; @@ -84,7 +84,7 @@ private void convertType1ToType2(List sequence) pathCount = 0; CharStringHandler handler = new CharStringHandler() { @Override - public List handleCommand(List numbers, CharStringCommand command) + public List handleCommand(List numbers, CharStringCommand command) { return Type2CharString.this.handleCommand(numbers, command); } @@ -93,7 +93,7 @@ public List handleCommand(List numbers, CharStringCommand comm } @SuppressWarnings(value = { "unchecked" }) - private List handleCommand(List numbers, CharStringCommand command) + private List handleCommand(List numbers, CharStringCommand command) { commandCount++; String name = CharStringCommand.TYPE2_VOCABULARY.get(command.getKey()); @@ -133,7 +133,7 @@ else if ("rrcurveto".equals(name)) else if ("endchar".equals(name)) { numbers = clearStack(numbers, numbers.size() == 5 || numbers.size() == 1); - closePath(); + closeCharString2Path(); if (numbers.size() == 4) { // deprecated "seac" operator @@ -167,24 +167,24 @@ else if ("hvcurveto".equals(name)) } else if ("hflex".equals(name)) { - List first = Arrays.asList(numbers.get(0), 0, + List first = Arrays.asList(numbers.get(0), 0, numbers.get(1), numbers.get(2), numbers.get(3), 0); - List second = Arrays.asList(numbers.get(4), 0, - numbers.get(5), -numbers.get(2), + List second = Arrays.asList(numbers.get(4), 0, + numbers.get(5), -(numbers.get(2).floatValue()), numbers.get(6), 0); addCommandList(Arrays.asList(first, second), new CharStringCommand(8)); } else if ("flex".equals(name)) { - List first = numbers.subList(0, 6); - List second = numbers.subList(6, 12); + List first = numbers.subList(0, 6); + List second = numbers.subList(6, 12); addCommandList(Arrays.asList(first, second), new CharStringCommand(8)); } else if ("hflex1".equals(name)) { - List first = Arrays.asList(numbers.get(0), numbers.get(1), + List first = Arrays.asList(numbers.get(0), numbers.get(1), numbers.get(2), numbers.get(3), numbers.get(4), 0); - List second = Arrays.asList(numbers.get(5), 0, + List second = Arrays.asList(numbers.get(5), 0, numbers.get(6), numbers.get(7), numbers.get(8), 0); addCommandList(Arrays.asList(first, second), new CharStringCommand(8)); } @@ -194,11 +194,11 @@ else if ("flex1".equals(name)) int dy = 0; for(int i = 0; i < 5; i++) { - dx += numbers.get(i * 2); - dy += numbers.get(i * 2 + 1); + dx += numbers.get(i * 2).intValue(); + dy += numbers.get(i * 2 + 1).intValue(); } - List first = numbers.subList(0, 6); - List second = Arrays.asList(numbers.get(6), numbers.get(7), numbers.get(8), + List first = numbers.subList(0, 6); + List second = Arrays.asList(numbers.get(6), numbers.get(7), numbers.get(8), numbers.get(9), (Math.abs(dx) > Math.abs(dy) ? numbers.get(10) : -dx), (Math.abs(dx) > Math.abs(dy) ? -dy : numbers.get(10))); addCommandList(Arrays.asList(first, second), new CharStringCommand(8)); @@ -211,7 +211,7 @@ else if ("hstemhm".equals(name)) else if ("hintmask".equals(name) || "cntrmask".equals(name)) { numbers = clearStack(numbers, numbers.size() % 2 != 0); - if (numbers.size() > 0) + if (!numbers.isEmpty()) { expandStemHints(numbers, false); } @@ -223,17 +223,23 @@ else if ("vstemhm".equals(name)) } else if ("rcurveline".equals(name)) { - addCommandList(split(numbers.subList(0, numbers.size() - 2), 6), - new CharStringCommand(8)); - addCommand(numbers.subList(numbers.size() - 2, numbers.size()), - new CharStringCommand(5)); + if (numbers.size() >= 2) + { + addCommandList(split(numbers.subList(0, numbers.size() - 2), 6), + new CharStringCommand(8)); + addCommand(numbers.subList(numbers.size() - 2, numbers.size()), + new CharStringCommand(5)); + } } else if ("rlinecurve".equals(name)) { - addCommandList(split(numbers.subList(0, numbers.size() - 6), 2), - new CharStringCommand(5)); - addCommand(numbers.subList(numbers.size() - 6, numbers.size()), - new CharStringCommand(8)); + if (numbers.size() >= 6) + { + addCommandList(split(numbers.subList(0, numbers.size() - 6), 2), + new CharStringCommand(5)); + addCommand(numbers.subList(numbers.size() - 6, numbers.size()), + new CharStringCommand(8)); + } } else if ("vvcurveto".equals(name)) { @@ -250,21 +256,20 @@ else if ("hhcurveto".equals(name)) return null; } - private List clearStack(List numbers, boolean flag) + private List clearStack(List numbers, boolean flag) { if (type1Sequence.isEmpty()) { if (flag) { - addCommand(Arrays.asList(0, numbers.get(0) + nominalWidthX), + addCommand(Arrays.asList((Number) 0f, numbers.get(0).floatValue() + nominalWidthX), new CharStringCommand(13)); - numbers = numbers.subList(1, numbers.size()); - } + } else { - addCommand(Arrays.asList(0, defWidthX), - new CharStringCommand(13)); + addCommand(Arrays.asList((Number) 0f, defWidthX), + new CharStringCommand(13)); } } return numbers; @@ -274,7 +279,7 @@ private List clearStack(List numbers, boolean flag) * @param numbers * @param horizontal */ - private void expandStemHints(List numbers, boolean horizontal) + private void expandStemHints(List numbers, boolean horizontal) { // TODO } @@ -283,12 +288,12 @@ private void markPath() { if (pathCount > 0) { - closePath(); + closeCharString2Path(); } pathCount++; } - private void closePath() + private void closeCharString2Path() { CharStringCommand command = pathCount > 0 ? (CharStringCommand) type1Sequence .get(type1Sequence.size() - 1) @@ -297,13 +302,13 @@ private void closePath() CharStringCommand closepathCommand = new CharStringCommand(9); if (command != null && !closepathCommand.equals(command)) { - addCommand(Collections. emptyList(), closepathCommand); + addCommand(Collections. emptyList(), closepathCommand); } } - private void drawAlternatingLine(List numbers, boolean horizontal) + private void drawAlternatingLine(List numbers, boolean horizontal) { - while (numbers.size() > 0) + while (!numbers.isEmpty()) { addCommand(numbers.subList(0, 1), new CharStringCommand( horizontal ? 6 : 7)); @@ -312,9 +317,9 @@ private void drawAlternatingLine(List numbers, boolean horizontal) } } - private void drawAlternatingCurve(List numbers, boolean horizontal) + private void drawAlternatingCurve(List numbers, boolean horizontal) { - while (numbers.size() > 0) + while (numbers.size() >= 4) { boolean last = numbers.size() == 5; if (horizontal) @@ -336,9 +341,9 @@ private void drawAlternatingCurve(List numbers, boolean horizontal) } } - private void drawCurve(List numbers, boolean horizontal) + private void drawCurve(List numbers, boolean horizontal) { - while (numbers.size() > 0) + while (numbers.size() >= 4) { boolean first = numbers.size() % 4 == 1; @@ -361,15 +366,15 @@ private void drawCurve(List numbers, boolean horizontal) } } - private void addCommandList(List> numbers, CharStringCommand command) + private void addCommandList(List> numbers, CharStringCommand command) { - for (List ns : numbers) + for (List ns : numbers) { addCommand(ns, command); } } - private void addCommand(List numbers, CharStringCommand command) + private void addCommand(List numbers, CharStringCommand command) { type1Sequence.addAll(numbers); type1Sequence.add(command); @@ -377,8 +382,9 @@ private void addCommand(List numbers, CharStringCommand command) private static List> split(List list, int size) { - List> result = new ArrayList>(); - for (int i = 0; i < list.size() / size; i++) + int listSize = list.size() / size; + List> result = new ArrayList>(listSize); + for (int i = 0; i < listSize; i++) { result.add(list.subList(i * size, (i + 1) * size)); } diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/Type2CharStringParser.java b/fontbox/src/main/java/org/apache/fontbox/cff/Type2CharStringParser.java index f2331f2dbec..02c5d170e2e 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/Type2CharStringParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/Type2CharStringParser.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Locale; /** * This class represents a converter for a mapping into a Type2-sequence. @@ -30,7 +31,9 @@ public class Type2CharStringParser private int vstemCount = 0; private List sequence = null; @SuppressWarnings("unused") - private final String fontName, glyphName; + private final String fontName; + @SuppressWarnings("unused") + private final String glyphName; /** * Constructs a new Type1CharStringParser object for a Type 1-equivalent font. @@ -53,7 +56,7 @@ public Type2CharStringParser(String fontName, String glyphName) public Type2CharStringParser(String fontName, int cid) { this.fontName = fontName; - this.glyphName = String.format("%04x", cid); // for debugging only + this.glyphName = String.format(Locale.US, "%04x", cid); // for debugging only } /** @@ -121,7 +124,7 @@ else if (b0 == 29 && globalSubroutineIndexProvided) { // process globalsubr command Integer operand=(Integer)sequence.remove(sequence.size()-1); //get subrbias - int bias = 0; + int bias; int nSubrs = globalSubrIndex.length; if (nSubrs < 1240) @@ -208,7 +211,7 @@ else if (b0 == 19 || b0 == 20) return new CharStringCommand(b0); } - private Integer readNumber(int b0, DataInput input) throws IOException + private Number readNumber(int b0, DataInput input) throws IOException { if (b0 == 28) @@ -230,15 +233,13 @@ else if (b0 >= 251 && b0 <= 254) int b1 = input.readUnsignedByte(); return -(b0 - 251) * 256 - b1 - 108; - } + } else if (b0 == 255) { short value = input.readShort(); - // The lower bytes are representing the digits after - // the decimal point and aren't needed in this context - input.readUnsignedByte(); - input.readUnsignedByte(); - return (int) value; + // The lower bytes are representing the digits after the decimal point + double fraction = input.readUnsignedShort() / 65535d; + return value + fraction; } else { @@ -249,7 +250,7 @@ else if (b0 == 255) private int getMaskLength() { int hintCount = hstemCount + vstemCount; - int length = (int)(hintCount / 8); + int length = hintCount / 8; if (hintCount % 8 > 0) { length++; @@ -264,12 +265,11 @@ private List peekNumbers() { Object object = sequence.get(i); - if (object instanceof Number) + if (!(object instanceof Number)) { - numbers.add(0, (Number) object); - continue; + return numbers; } - return numbers; + numbers.add(0, (Number) object); } return numbers; } diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/package.html b/fontbox/src/main/java/org/apache/fontbox/cff/package.html index 107badad6ad..03fef12f7ed 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/package.html +++ b/fontbox/src/main/java/org/apache/fontbox/cff/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/fontbox/src/main/java/org/apache/fontbox/cmap/CIDRange.java b/fontbox/src/main/java/org/apache/fontbox/cmap/CIDRange.java index 7d0966e6845..dbdf9329ffd 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cmap/CIDRange.java +++ b/fontbox/src/main/java/org/apache/fontbox/cmap/CIDRange.java @@ -25,7 +25,7 @@ class CIDRange private final char from; - private final char to; + private char to; private final int cid; @@ -66,4 +66,23 @@ public int unmap(int code) return -1; } + /** + * Check if the given values represent a consecutive range of the given range. If so, extend the given range instead + * of creating a new one. + * + * @param newFrom start value of the new range + * @param newTo end value of the new range + * @param newCid start CID value of the range + * @return true if the given range was extended + */ + public boolean extend(char newFrom, char newTo, int newCid) + { + if ((newFrom == to + 1) && (newCid == cid + to - from + 1)) + { + to = newTo; + return true; + } + return false; + } + } diff --git a/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java b/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java index f08a4b97ee6..d6b0fd78134 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java +++ b/fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java @@ -22,6 +22,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; /** * This class represents a CMap file. @@ -30,6 +32,8 @@ */ public class CMap { + private static final Log LOG = LogFactory.getLog(CMap.class); + private int wmode = 0; private String cmapName = null; private String cmapVersion = null; @@ -48,6 +52,9 @@ public class CMap // Unicode mappings private final Map charToUnicode = new HashMap(); + // inverted map + private final Map unicodeToByteCodes = new HashMap(); + // CID mappings private final Map codeToCid = new HashMap(); private final List codeToCidRanges = new ArrayList(); @@ -95,7 +102,7 @@ public String toUnicode(int code) /** * Reads a character code from a string in the content stream. - *

>See "CMap Mapping" and "Handling Undefined Characters" in PDF32000 for more details. + *

See "CMap Mapping" and "Handling Undefined Characters" in PDF32000 for more details. * * @param in string stream * @return character code @@ -105,6 +112,7 @@ public int readCode(InputStream in) throws IOException { byte[] bytes = new byte[maxCodeLength]; in.read(bytes,0,minCodeLength); + in.mark(maxCodeLength); for (int i = minCodeLength-1; i < maxCodeLength; i++) { final int byteCount = i+1; @@ -120,19 +128,35 @@ public int readCode(InputStream in) throws IOException bytes[byteCount] = (byte)in.read(); } } - throw new IOException("CMap is invalid"); + String seq = ""; + for (int i = 0; i < maxCodeLength; ++i) + { + seq += String.format("0x%02X (%04o) ", bytes[i], bytes[i]); + } + LOG.warn("Invalid character code sequence " + seq + "in CMap " + cmapName); + // PDFBOX-4811 reposition to where we were after initial read + if (in.markSupported()) + { + in.reset(); + } + else + { + LOG.warn("mark() and reset() not supported, " + (maxCodeLength - 1) + + " bytes have been skipped"); + } + return toInt(bytes, minCodeLength); // Adobe Reader behavior } /** * Returns an int for the given byte array */ - private int toInt(byte[] data, int dataLen) + static int toInt(byte[] data, int dataLen) { int code = 0; for (int i = 0; i < dataLen; ++i) { code <<= 8; - code |= (data[i] + 256) % 256; + code |= (data[i] & 0xFF); } return code; } @@ -187,6 +211,7 @@ private int getCodeFromArray( byte[] data, int offset, int length ) */ void addCharMapping(byte[] codes, String unicode) { + unicodeToByteCodes.put(unicode, codes.clone()); // clone needed, bytes is modified later int code = getCodeFromArray(codes, 0, codes.length); charToUnicode.put(code, unicode); @@ -197,6 +222,17 @@ void addCharMapping(byte[] codes, String unicode) } } + /** + * Get the code bytes for an unicode string. + * + * @param unicode + * @return the code bytes or null if there is none. + */ + public byte[] getCodesFromUnicode(String unicode) + { + return unicodeToByteCodes.get(unicode); + } + /** * This will add a CID mapping. * @@ -211,14 +247,22 @@ void addCIDMapping(int code, int cid) /** * This will add a CID Range. * - * @param from starting charactor of the CID range. + * @param from starting character of the CID range. * @param to ending character of the CID range. * @param cid the cid to be started with. * */ void addCIDRange(char from, char to, int cid) { - codeToCidRanges.add(new CIDRange(from, to, cid)); + CIDRange lastRange = null; + if (!codeToCidRanges.isEmpty()) + { + lastRange = codeToCidRanges.get(codeToCidRanges.size() - 1); + } + if (lastRange == null || !lastRange.extend(from, to, cid)) + { + codeToCidRanges.add(new CIDRange(from, to, cid)); + } } /** @@ -248,6 +292,9 @@ void useCmap( CMap cmap ) charToUnicode.putAll(cmap.charToUnicode); codeToCid.putAll(cmap.codeToCid); codeToCidRanges.addAll(cmap.codeToCidRanges); + + // unicodeToByteCodes should be filled too, but this isn't possible in 2.0.* + // because we don't know the code length } /** diff --git a/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java b/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java index c79455effc2..e804a87d448 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java @@ -16,12 +16,12 @@ */ package org.apache.fontbox.cmap; +import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; -import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -41,6 +41,8 @@ public class CMapParser private final byte[] tokenParserByteBuffer = new byte[512]; + private boolean strictMode = false; + /** * Creates a new instance of CMapParser. */ @@ -48,6 +50,16 @@ public CMapParser() { } + /** + * Creates a new instance of CMapParser. + * + * @param strictMode activates the strict mode used for inline CMaps + */ + public CMapParser(boolean strictMode) + { + this.strictMode = strictMode; + } + /** * Parse a CMAP file on the file system. * @@ -76,7 +88,7 @@ public CMap parse(File file) throws IOException * Parses a predefined CMap. * * @param name CMap name. - * @return The parsed predefined CMap as a java object. + * @return The parsed predefined CMap as a java object, never null. * @throws IOException If the CMap could not be parsed. */ public CMap parsePredefined(String name) throws IOException @@ -85,6 +97,8 @@ public CMap parsePredefined(String name) throws IOException try { input = getExternalCMap(name); + // deactivate strict mode + strictMode = false; return parse(input); } finally @@ -100,7 +114,7 @@ public CMap parsePredefined(String name) throws IOException * This will parse the stream and create a cmap object. * * @param input The CMAP stream to parse. - * @return The parsed stream as a java object. + * @return The parsed stream as a java object, never null. * @throws IOException If there is an error parsing the stream. */ public CMap parse(InputStream input) throws IOException @@ -114,56 +128,61 @@ public CMap parse(InputStream input) throws IOException if (token instanceof Operator) { Operator op = (Operator) token; - if (op.op.equals("usecmap")) - { - parseUsecmap(previousToken, result); - } - else if (op.op.equals("endcmap")) + if (op.op.equals("endcmap")) { // end of CMap reached, stop reading as there isn't any interesting info anymore break; } - else if (op.op.equals("begincodespacerange")) - { - parseBegincodespacerange(previousToken, cmapStream, result); - } - else if (op.op.equals("beginbfchar")) - { - parseBeginbfchar(previousToken, cmapStream, result); - } - else if (op.op.equals("beginbfrange")) - { - parseBeginbfrange(previousToken, cmapStream, result); - } - else if (op.op.equals("begincidchar")) - { - parseBegincidchar(previousToken, cmapStream, result); - } - else if (op.op.equals("begincidrange")) + + if (previousToken != null) { - parseBegincidrange(previousToken, cmapStream, result); + if (op.op.equals("usecmap") && previousToken instanceof LiteralName) + { + parseUsecmap((LiteralName) previousToken, result); + } + else if (previousToken instanceof Number) + { + if (op.op.equals("begincodespacerange")) + { + parseBegincodespacerange((Number) previousToken, cmapStream, result); + } + else if (op.op.equals("beginbfchar")) + { + parseBeginbfchar((Number) previousToken, cmapStream, result); + } + else if (op.op.equals("beginbfrange")) + { + parseBeginbfrange((Number) previousToken, cmapStream, result); + } + else if (op.op.equals("begincidchar")) + { + parseBegincidchar((Number) previousToken, cmapStream, result); + } + else if (op.op.equals("begincidrange") && previousToken instanceof Integer) + { + parseBegincidrange((Integer) previousToken, cmapStream, result); + } + } } } else if (token instanceof LiteralName) { - parseLiteralName(token, cmapStream, result); + parseLiteralName((LiteralName) token, cmapStream, result); } previousToken = token; } return result; } - private void parseUsecmap(Object previousToken, CMap result) throws IOException + private void parseUsecmap(LiteralName useCmapName, CMap result) throws IOException { - LiteralName useCmapName = (LiteralName) previousToken; InputStream useStream = getExternalCMap(useCmapName.name); CMap useCMap = parse(useStream); result.useCmap(useCMap); } - private void parseLiteralName(Object token, PushbackInputStream cmapStream, CMap result) throws IOException + private void parseLiteralName(LiteralName literal, PushbackInputStream cmapStream, CMap result) throws IOException { - LiteralName literal = (LiteralName) token; if ("WMode".equals(literal.name)) { Object next = parseNextToken(cmapStream); @@ -226,43 +245,56 @@ else if ("Supplement".equals(literal.name)) } } - private void parseBegincodespacerange(Object previousToken, PushbackInputStream cmapStream, CMap result) throws IOException + /** + * Throws an IOException if expectedOperatorName not equals operator.op + * + * @param operator Instance of operator + * @param expectedOperatorName Expected name of operator + * @param rangeName The name of the range in which the operator is expected (without a tilde + * character), to be used in the exception message. + * + * @throws IOException if expectedOperatorName not equals operator.op + */ + private void checkExpectedOperator(Operator operator, String expectedOperatorName, String rangeName) throws IOException + { + if (!operator.op.equals(expectedOperatorName)) + { + throw new IOException("Error : ~" + rangeName + " contains an unexpected operator : " + + operator.op); + } + } + + private void parseBegincodespacerange(Number cosCount, PushbackInputStream cmapStream, CMap result) throws IOException { - Number cosCount = (Number) previousToken; for (int j = 0; j < cosCount.intValue(); j++) { Object nextToken = parseNextToken(cmapStream); if (nextToken instanceof Operator) { - if (!((Operator) nextToken).op.equals("endcodespacerange")) - { - throw new IOException("Error : ~codespacerange contains an unexpected operator : " - + ((Operator) nextToken).op); - } + checkExpectedOperator((Operator) nextToken, "endcodespacerange", "codespacerange"); break; } byte[] startRange = (byte[]) nextToken; byte[] endRange = (byte[]) parseNextToken(cmapStream); - CodespaceRange range = new CodespaceRange(); - range.setStart(startRange); - range.setEnd(endRange); - result.addCodespaceRange(range); + try + { + result.addCodespaceRange(new CodespaceRange(startRange, endRange)); + } + catch (IllegalArgumentException ex) + { + throw new IOException(ex); + } } } - private void parseBeginbfchar(Object previousToken, PushbackInputStream cmapStream, CMap result) throws IOException + private void parseBeginbfchar(Number cosCount, PushbackInputStream cmapStream, CMap result) throws IOException { - Number cosCount = (Number) previousToken; for (int j = 0; j < cosCount.intValue(); j++) { Object nextToken = parseNextToken(cmapStream); if (nextToken instanceof Operator) { - if (!((Operator) nextToken).op.equals("endbfchar")) - { - throw new IOException("Error : ~bfchar contains an unexpected operator : " - + ((Operator) nextToken).op); - } + checkExpectedOperator((Operator) nextToken, "endbfchar", "bfchar"); break; } byte[] inputCode = (byte[]) nextToken; @@ -285,19 +317,14 @@ else if (nextToken instanceof LiteralName) } } - private void parseBegincidrange(Object previousToken, PushbackInputStream cmapStream, CMap result) throws IOException + private void parseBegincidrange(int numberOfLines, PushbackInputStream cmapStream, CMap result) throws IOException { - int numberOfLines = (Integer) previousToken; for (int n = 0; n < numberOfLines; n++) { Object nextToken = parseNextToken(cmapStream); if (nextToken instanceof Operator) { - if (!((Operator) nextToken).op.equals("endcidrange")) - { - throw new IOException("Error : ~cidrange contains an unexpected operator : " - + ((Operator) nextToken).op); - } + checkExpectedOperator((Operator) nextToken, "endcidrange", "cidrange"); break; } byte[] startCode = (byte[]) nextToken; @@ -307,7 +334,15 @@ private void parseBegincidrange(Object previousToken, PushbackInputStream cmapSt int mappedCode = (Integer) parseNextToken(cmapStream); if (startCode.length <= 2 && endCode.length <= 2) { - result.addCIDRange((char) start, (char) end, mappedCode); + // some CMaps are using CID ranges to map single values + if (end == start) + { + result.addCIDMapping(mappedCode, start); + } + else + { + result.addCIDRange((char) start, (char) end, mappedCode); + } } else { @@ -317,25 +352,20 @@ private void parseBegincidrange(Object previousToken, PushbackInputStream cmapSt { int mappedCID = createIntFromBytes(startCode); result.addCIDMapping(mappedCode++, mappedCID); - increment(startCode); + increment(startCode, startCode.length - 1, false); } } } } - private void parseBegincidchar(Object previousToken, PushbackInputStream cmapStream, CMap result) throws IOException + private void parseBegincidchar(Number cosCount, PushbackInputStream cmapStream, CMap result) throws IOException { - Number cosCount = (Number) previousToken; for (int j = 0; j < cosCount.intValue(); j++) { Object nextToken = parseNextToken(cmapStream); if (nextToken instanceof Operator) { - if (!((Operator) nextToken).op.equals("endcidchar")) - { - throw new IOException("Error : ~cidchar contains an unexpected operator : " - + ((Operator) nextToken).op); - } + checkExpectedOperator((Operator) nextToken, "endcidchar", "cidchar"); break; } byte[] inputCode = (byte[]) nextToken; @@ -345,76 +375,109 @@ private void parseBegincidchar(Object previousToken, PushbackInputStream cmapStr } } - private void parseBeginbfrange(Object previousToken, PushbackInputStream cmapStream, CMap result) throws IOException + private void parseBeginbfrange(Number cosCount, PushbackInputStream cmapStream, CMap result) throws IOException { - Number cosCount = (Number) previousToken; - for (int j = 0; j < cosCount.intValue(); j++) { Object nextToken = parseNextToken(cmapStream); if (nextToken instanceof Operator) { - if (!((Operator) nextToken).op.equals("endbfrange")) - { - throw new IOException("Error : ~bfrange contains an unexpected operator : " - + ((Operator) nextToken).op); - } + checkExpectedOperator((Operator) nextToken, "endbfrange", "bfrange"); break; } byte[] startCode = (byte[]) nextToken; byte[] endCode = (byte[]) parseNextToken(cmapStream); - nextToken = parseNextToken(cmapStream); - List array = null; - byte[] tokenBytes; - if (nextToken instanceof List) + int start = CMap.toInt(startCode, startCode.length); + int end = CMap.toInt(endCode, endCode.length); + // end has to be bigger than start or equal + if (end < start) { - array = (List) nextToken; - tokenBytes = array.get(0); - } - else - { - tokenBytes = (byte[]) nextToken; + // PDFBOX-4550: likely corrupt stream + break; } - boolean done = false; - - int arrayIndex = 0; - while (!done) + nextToken = parseNextToken(cmapStream); + if (nextToken instanceof List) { - if (compare(startCode, endCode) >= 0) - { - done = true; - } - String value = createStringFromBytes(tokenBytes); - result.addCharMapping(startCode, value); - increment(startCode); - - if (array == null) + List array = (List) nextToken; + // ignore empty and malformed arrays + if (!array.isEmpty() && array.size() >= end - start) { - increment(tokenBytes); + addMappingFrombfrange(result, startCode, array); } - else + } + // PDFBOX-3807: ignore null + else if (nextToken instanceof byte[]) + { + byte[] tokenBytes = (byte[]) nextToken; + // PDFBOX-3450: ignore <> + if (tokenBytes.length > 0) { - arrayIndex++; - if (arrayIndex < array.size()) + // PDFBOX-4720: + // some pdfs use the malformed bfrange <0000> <0000>. Add support by adding a identity + // mapping for the whole range instead of cutting it after 255 entries + // TODO find a more efficient method to represent all values for a identity mapping + if (tokenBytes.length == 2 && start == 0 && end == 0xffff + && tokenBytes[0] == 0 && tokenBytes[1] == 0) + { + for (int i = 0; i < 256; i++) + { + startCode[0] = (byte) i; + startCode[1] = 0; + tokenBytes[0] = (byte) i; + tokenBytes[1] = 0; + addMappingFrombfrange(result, startCode, 256, tokenBytes); + } + } + else { - tokenBytes = array.get(arrayIndex); + addMappingFrombfrange(result, startCode, end - start + 1, tokenBytes); } } } } } + private void addMappingFrombfrange(CMap cmap, byte[] startCode, List tokenBytesList) + { + for (byte[] tokenBytes : tokenBytesList) + { + String value = createStringFromBytes(tokenBytes); + cmap.addCharMapping(startCode, value); + increment(startCode, startCode.length - 1, false); + } + } + + private void addMappingFrombfrange(CMap cmap, byte[] startCode, int values, + byte[] tokenBytes) + { + for (int i = 0; i < values; i++) + { + String value = createStringFromBytes(tokenBytes); + cmap.addCharMapping(startCode, value); + if (!increment(tokenBytes, tokenBytes.length - 1, strictMode)) + { + // overflow detected -> stop adding further mappings + break; + } + increment(startCode, startCode.length - 1, false); + } + } + /** * Returns an input stream containing the given "use" CMap. + * + * @param name Name of the given "use" CMap resource. + * @throws IOException if the CMap resource doesn't exist or if there is an error opening its + * stream. */ protected InputStream getExternalCMap(String name) throws IOException { - URL url = getClass().getResource(name); - if (url == null) + InputStream resourceAsStream = getClass().getResourceAsStream(name); + if (resourceAsStream == null) { throw new IOException("Error: Could not find referenced cmap stream " + name); } - return url.openStream(); + return new BufferedInputStream(resourceAsStream); } private Object parseNextToken(PushbackInputStream is) throws IOException @@ -432,7 +495,7 @@ private Object parseNextToken(PushbackInputStream is) throws IOException { // header operations, for now return the entire line // may need to smarter in the future - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append((char) nextByte); readUntilEndOfLine(is, buffer); retval = buffer.toString(); @@ -440,7 +503,7 @@ private Object parseNextToken(PushbackInputStream is) throws IOException } case '(': { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); int stringByte = is.read(); while (stringByte != -1 && stringByte != ')') @@ -536,6 +599,11 @@ else if (isWhitespaceOrEOF(theNextByte)) if (multiplyer == 16) { bufferIndex++; + if (bufferIndex >= tokenParserByteBuffer.length) + { + throw new IOException("cmap token ist larger than buffer size " + + tokenParserByteBuffer.length); + } tokenParserByteBuffer[bufferIndex] = 0; multiplyer = 1; } @@ -554,7 +622,7 @@ else if (isWhitespaceOrEOF(theNextByte)) } case '/': { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); int stringByte = is.read(); while (!isWhitespaceOrEOF(stringByte) && !isDelimiter(stringByte)) @@ -585,7 +653,7 @@ else if (isWhitespaceOrEOF(theNextByte)) case '8': case '9': { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append((char) nextByte); nextByte = is.read(); @@ -598,7 +666,7 @@ else if (isWhitespaceOrEOF(theNextByte)) String value = buffer.toString(); if (value.indexOf('.') >= 0) { - retval = new Double(value); + retval = Double.valueOf(value); } else { @@ -608,7 +676,7 @@ else if (isWhitespaceOrEOF(theNextByte)) } default: { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append((char) nextByte); nextByte = is.read(); @@ -631,7 +699,7 @@ else if (isWhitespaceOrEOF(theNextByte)) return retval; } - private void readUntilEndOfLine(InputStream is, StringBuffer buf) throws IOException + private void readUntilEndOfLine(InputStream is, StringBuilder buf) throws IOException { int nextByte = is.read(); while (nextByte != -1 && nextByte != 0x0D && nextByte != 0x0A) @@ -667,77 +735,46 @@ private boolean isDelimiter(int aByte) } } - private void increment(byte[] data) + private boolean increment(byte[] data, int position, boolean useStrictMode) { - increment(data, data.length - 1); - } - - private void increment(byte[] data, int position) - { - if (position > 0 && (data[position] + 256) % 256 == 255) + if (position > 0 && (data[position] & 0xFF) == 255) { + // PDFBOX-4661: avoid overflow of the last byte, all following values are undefined + // PDFBOX-5090: strict mode has to be used for CMaps within pdfs + if (useStrictMode) + { + return false; + } data[position] = 0; - increment(data, position - 1); + increment(data, position - 1, useStrictMode); } else { data[position] = (byte) (data[position] + 1); } + return true; } private int createIntFromBytes(byte[] bytes) { - int intValue = (bytes[0] + 256) % 256; + int intValue = bytes[0] & 0xFF; if (bytes.length == 2) { intValue <<= 8; - intValue += (bytes[1] + 256) % 256; + intValue += bytes[1] & 0xFF; } return intValue; } - private String createStringFromBytes(byte[] bytes) throws IOException + private String createStringFromBytes(byte[] bytes) { - String retval; - if (bytes.length == 1) - { - retval = new String(bytes, Charsets.ISO_8859_1); - } - else - { - retval = new String(bytes, Charsets.UTF_16BE); - } - return retval; - } - - private int compare(byte[] first, byte[] second) - { - int retval = 1; - int firstLength = first.length; - for (int i = 0; i < firstLength; i++) - { - if (first[i] == second[i]) - { - continue; - } - else if (((first[i] + 256) % 256) < ((second[i] + 256) % 256)) - { - retval = -1; - break; - } - else - { - retval = 1; - break; - } - } - return retval; + return new String(bytes, bytes.length == 1 ? Charsets.ISO_8859_1 : Charsets.UTF_16BE); } /** * Internal class. */ - private final class LiteralName + private static final class LiteralName { private String name; @@ -750,7 +787,7 @@ private LiteralName(String theName) /** * Internal class. */ - private final class Operator + private static final class Operator { private String op; diff --git a/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java b/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java index bd183573737..b0929847c5f 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java +++ b/fontbox/src/main/java/org/apache/fontbox/cmap/CodespaceRange.java @@ -16,7 +16,6 @@ */ package org.apache.fontbox.cmap; - /** * This represents a single entry in the codespace range. * @@ -24,14 +23,49 @@ */ public class CodespaceRange { - private byte[] start; - private byte[] end; - private int startInt; - private int endInt; + private byte[] startBytes; + private byte[] endBytes; + private int[] start; + private int[] end; private int codeLength = 0; + /** + * Creates a new instance of CodespaceRange. The length of both arrays has to be the same.
+ * For one byte ranges startBytes and endBytes define a linear range of values. Double byte values define a + * rectangular range not a linear range. Examples:
+ * <00> <20> defines a linear range from 0x00 up to 0x20.
+ * <8140> to <9FFC> defines a rectangular range. The high byte has to be within 0x81 and 0x9F and the + * low byte has to be within 0x40 and 0xFC + * + * @param startBytes + * @param endBytes + */ + public CodespaceRange(byte[] startBytes, byte[] endBytes) + { + byte[] correctedStartBytes = startBytes; + if (startBytes.length != endBytes.length && startBytes.length == 1 && startBytes[0] == 0) + { + correctedStartBytes = new byte[endBytes.length]; + } + else if (startBytes.length != endBytes.length) + { + throw new IllegalArgumentException( + "The start and the end values must not have different lengths."); + } + start = new int[correctedStartBytes.length]; + end = new int[endBytes.length]; + for (int i = 0; i < correctedStartBytes.length; i++) + { + start[i] = correctedStartBytes[i] & 0xFF; + end[i] = endBytes[i] & 0xFF; + } + codeLength = endBytes.length; + } + /** * Creates a new instance of CodespaceRange. + * + * @deprecated to be removed in the next major release. */ public CodespaceRange() { @@ -47,43 +81,63 @@ public int getCodeLength() return codeLength; } - /** Getter for property end. + /** + * Getter for property end. + * * @return Value of property end. * + * @deprecated to be removed in the next major release */ public byte[] getEnd() { - return end; + return endBytes; } - /** Setter for property end. + /** + * Setter for property end. + * * @param endBytes New value of property end. * + * @deprecated to be removed in the next major release */ void setEnd(byte[] endBytes) { - end = endBytes; - endInt = toInt(endBytes, endBytes.length); + this.endBytes = endBytes; + end = new int[endBytes.length]; + for (int i = 0; i < endBytes.length; i++) + { + end[i] = endBytes[i] & 0xFF; + } } - /** Getter for property start. + /** + * Getter for property start. + * * @return Value of property start. * + * @deprecated to be removed in the next major release */ public byte[] getStart() { - return start; + return startBytes; } - /** Setter for property start. + /** + * Setter for property start. + * * @param startBytes New value of property start. * + * @deprecated to be removed in the next major release */ void setStart(byte[] startBytes) { - start = startBytes; - codeLength = start.length; - startInt = toInt(startBytes, startBytes.length); + this.startBytes = startBytes; + start = new int[startBytes.length]; + for (int i = 0; i < startBytes.length; i++) + { + start[i] = startBytes[i] & 0xFF; + } + codeLength = startBytes.length; } /** @@ -94,34 +148,25 @@ public boolean matches(byte[] code) return isFullMatch(code, code.length); } - /** - * Returns an int for the given byte array - */ - private int toInt(byte[] data, int dataLen) - { - int code = 0; - for (int i = 0; i < dataLen; ++i) - { - code <<= 8; - code |= (data[i] + 256) % 256; - } - return code; - } /** * Returns true if the given code bytes match this codespace range. */ public boolean isFullMatch(byte[] code, int codeLen) { // code must be the same length as the bounding codes - if (codeLen == codeLength) + if (codeLength != codeLen) + { + return false; + } + for (int i = 0; i < codeLength; i++) { - int value = toInt(code, codeLen); - if (value >= startInt && value <=endInt) + int codeAsInt = code[i] & 0xFF; + if (codeAsInt < start[i] || codeAsInt > end[i]) { - return true; + return false; } } - return false; + return true; } } diff --git a/fontbox/src/main/java/org/apache/fontbox/cmap/package.html b/fontbox/src/main/java/org/apache/fontbox/cmap/package.html index cb43354d6b3..10421e520ae 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cmap/package.html +++ b/fontbox/src/main/java/org/apache/fontbox/cmap/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/fontbox/src/main/java/org/apache/fontbox/encoding/Encoding.java b/fontbox/src/main/java/org/apache/fontbox/encoding/Encoding.java index 8bf1f726a12..0212b4de2d3 100644 --- a/fontbox/src/main/java/org/apache/fontbox/encoding/Encoding.java +++ b/fontbox/src/main/java/org/apache/fontbox/encoding/Encoding.java @@ -61,10 +61,11 @@ public Integer getCode( String name ) } /** - * This will take a character code and get the name from the code. + * This will take a character code and get the name from the code. This method will never return + * null. * * @param code The character code. - * @return The name of the character. + * @return The name of the character, or ".notdef" if the bame doesn't exist. */ public String getName( int code ) { diff --git a/fontbox/src/main/java/org/apache/fontbox/encoding/package.html b/fontbox/src/main/java/org/apache/fontbox/encoding/package.html index 493554b6793..d8a9e3cc08c 100644 --- a/fontbox/src/main/java/org/apache/fontbox/encoding/package.html +++ b/fontbox/src/main/java/org/apache/fontbox/encoding/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java b/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java index 9774e8eb717..06eb1032c04 100644 --- a/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java @@ -34,7 +34,7 @@ public class PfbParser { /** - * the pdf header length. + * the pfb header length. * (start-marker (1 byte), ascii-/binary-marker (1 byte), size (4 byte)) * 3*6 == 18 */ @@ -88,7 +88,20 @@ public class PfbParser */ public PfbParser(final String filename) throws IOException { - this( new BufferedInputStream(new FileInputStream(filename),BUFFER_SIZE) ); + BufferedInputStream in = null; + try + { + in = new BufferedInputStream(new FileInputStream(filename), BUFFER_SIZE); + byte[] pfb = readFully(in); + parsePfb(pfb); + } + finally + { + if (in != null) + { + in.close(); + } + } } /** @@ -98,7 +111,7 @@ public PfbParser(final String filename) throws IOException */ public PfbParser(final InputStream in) throws IOException { - byte[] pfb = readPfbInput(in); + byte[] pfb = readFully(in); parsePfb(pfb); } @@ -140,11 +153,21 @@ private void parsePfb(final byte[] pfb) throws IOException size += in.read() << 8; size += in.read() << 16; size += in.read() << 24; + if (size < 0) + { + throw new IOException("PFB record size is negative: " + size); + } lengths[records] = size; if (pointer >= pfbdata.length) { throw new EOFException("attempted to read past EOF"); } + if (size > pfbdata.length - pointer) + { + throw new IOException("PFB record size (" + size + + ") doesn't fit in buffer, position: " + pointer + + ", total length: " + pfbdata.length); + } int got = in.read(pfbdata, pointer, size); if (got < 0) { @@ -155,17 +178,17 @@ private void parsePfb(final byte[] pfb) throws IOException } /** - * Read the pdf input. + * Read the pfb input. * @param in The input. - * @return Returns the pdf-array. + * @return Returns the pfb-array. * @throws IOException if an IO-error occurs. */ - private byte[] readPfbInput(final InputStream in) throws IOException + private byte[] readFully(final InputStream in) throws IOException { // copy into an array ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] tmpbuf = new byte[BUFFER_SIZE]; - int amountRead = -1; + int amountRead; while ((amountRead = in.read(tmpbuf)) != -1) { out.write(tmpbuf, 0, amountRead); diff --git a/fontbox/src/main/java/org/apache/fontbox/pfb/package.html b/fontbox/src/main/java/org/apache/fontbox/pfb/package.html index 9c3ddea509b..df0306a0b86 100644 --- a/fontbox/src/main/java/org/apache/fontbox/pfb/package.html +++ b/fontbox/src/main/java/org/apache/fontbox/pfb/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/BufferedRandomAccessFile.java b/fontbox/src/main/java/org/apache/fontbox/ttf/BufferedRandomAccessFile.java index 576f91237ab..8938eb13616 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/BufferedRandomAccessFile.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/BufferedRandomAccessFile.java @@ -36,7 +36,7 @@ public class BufferedRandomAccessFile extends RandomAccessFile /** * Uses a byte instead of a char buffer for efficiency reasons. */ - private final byte buffer[]; + private final byte[] buffer; private int bufend = 0; private int bufpos = 0; @@ -44,11 +44,6 @@ public class BufferedRandomAccessFile extends RandomAccessFile * The position inside the actual file. */ private long realpos = 0; - - /** - * Buffer size. - */ - private final int BUFSIZE; /** * Creates a new instance of the BufferedRandomAccessFile. @@ -66,8 +61,7 @@ public BufferedRandomAccessFile(String filename, String mode, int bufsize) throws FileNotFoundException { super(filename, mode); - BUFSIZE = bufsize; - buffer = new byte[BUFSIZE]; + buffer = new byte[bufsize]; } /** @@ -86,8 +80,7 @@ public BufferedRandomAccessFile(File file, String mode, int bufsize) throws FileNotFoundException { super(file, mode); - BUFSIZE = bufsize; - buffer = new byte[BUFSIZE]; + buffer = new byte[bufsize]; } /** @@ -110,7 +103,7 @@ public final int read() throws IOException } /** - * Reads the next BUFSIZE bytes into the internal buffer. + * Reads as much bytes as possible into the internal buffer. * * @return The total number of bytes read into the buffer, or -1 if there is no more data * because the end of the file has been reached. @@ -119,7 +112,7 @@ public final int read() throws IOException */ private int fillBuffer() throws IOException { - int n = super.read(buffer, 0, BUFSIZE); + int n = super.read(buffer); if (n >= 0) { @@ -146,26 +139,39 @@ private void invalidate() throws IOException * {@inheritDoc} */ @Override - public int read(byte b[], int off, int len) throws IOException + public int read(byte[] b, int off, int len) throws IOException { - int leftover = bufend - bufpos; - if (len <= leftover) - { - System.arraycopy(buffer, bufpos, b, off, len); - bufpos += len; - return len; - } - System.arraycopy(buffer, bufpos, b, off, leftover); - bufpos += leftover; - if (fillBuffer() > 0) + int curLen = len; // length of what is left to read (shrinks) + int curOff = off; // offset where to put read data (grows) + int totalRead = 0; + + while (true) { - int bytesRead = read(b, off + leftover, len - leftover); - if (bytesRead > 0) + int leftover = bufend - bufpos; + if (curLen <= leftover) + { + System.arraycopy(buffer, bufpos, b, curOff, curLen); + bufpos += curLen; + return totalRead + curLen; + } + // curLen > leftover, we need to read more than what remains in buffer + System.arraycopy(buffer, bufpos, b, curOff, leftover); + totalRead += leftover; + bufpos += leftover; + if (fillBuffer() > 0) + { + curOff += leftover; + curLen -= leftover; + } + else { - leftover += bytesRead; + if (totalRead == 0) + { + return -1; + } + return totalRead; } } - return leftover; } /** diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java index e846788f867..136b09fe316 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java @@ -45,12 +45,13 @@ public class CFFTable extends TTFTable * @param data The stream to read the data from. * @throws java.io.IOException If there is an error reading the data. */ - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + @Override + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { byte[] bytes = data.read((int)getLength()); CFFParser parser = new CFFParser(); - cffFont = parser.parse(bytes, new ByteSource(font)).get(0); + cffFont = parser.parse(bytes, new CFFBytesource(font)).get(0); initialized = true; } @@ -66,11 +67,11 @@ public CFFFont getFont() /** * Allows bytes to be re-read later by CFFParser. */ - private static class ByteSource implements CFFParser.ByteSource + private static class CFFBytesource implements CFFParser.ByteSource { private final TrueTypeFont ttf; - ByteSource(TrueTypeFont ttf) + CFFBytesource(TrueTypeFont ttf) { this.ttf = ttf; } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/CmapLookup.java b/fontbox/src/main/java/org/apache/fontbox/ttf/CmapLookup.java new file mode 100644 index 00000000000..953aca7f504 --- /dev/null +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/CmapLookup.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.fontbox.ttf; + +import java.util.List; + +/** + * An interface that abstracts the cid <-> codepoint lookup functionality of cmap. + * + * @author Aaron Madlon-Kay + */ +public interface CmapLookup +{ + + /** + * Returns the GlyphId linked with the given character code. + * + * @param codePointAt the given character code to be mapped + * @return glyphId the corresponding glyph id for the given character code + */ + int getGlyphId(int codePointAt); + + /** + * Returns all possible character codes for the given gid, or null if there is none. + * + * @param gid glyph id + * @return a list with all character codes the given gid maps to + */ + List getCharCodes(int gid); + +} \ No newline at end of file diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java index 3eadfd68a85..ca61232d097 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java @@ -17,11 +17,14 @@ package org.apache.fontbox.ttf; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Map.Entry; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,18 +33,19 @@ * * @author Ben Litchfield */ -public class CmapSubtable +public class CmapSubtable implements CmapLookup { private static final Log LOG = LogFactory.getLog(CmapSubtable.class); - private static final long LEAD_OFFSET = 0xD800 - (0x10000 >> 10); - private static final long SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; + private static final long LEAD_OFFSET = 0xD800l - (0x10000 >> 10); + private static final long SURROGATE_OFFSET = 0x10000l - (0xD800 << 10) - 0xDC00; private int platformId; private int platformEncodingId; private long subTableOffset; private int[] glyphIdToCharacterCode; - private Map characterCodeToGlyphId; + private final Map> glyphIdToCharacterCodeMultiple = new HashMap>(); + private Map characterCodeToGlyphId= new HashMap(); /** * This will read the required data from the stream. @@ -49,7 +53,7 @@ public class CmapSubtable * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void initData(TTFDataStream data) throws IOException + void initData(TTFDataStream data) throws IOException { platformId = data.readUnsignedShort(); platformEncodingId = data.readUnsignedShort(); @@ -64,7 +68,7 @@ public void initData(TTFDataStream data) throws IOException * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void initSubtable(CmapTable cmap, int numGlyphs, TTFDataStream data) throws IOException + void initSubtable(CmapTable cmap, int numGlyphs, TTFDataStream data) throws IOException { data.seek(cmap.getOffset() + subTableOffset); int subtableFormat = data.readUnsignedShort(); @@ -124,7 +128,7 @@ public void initSubtable(CmapTable cmap, int numGlyphs, TTFDataStream data) thro * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype8(TTFDataStream data, int numGlyphs) throws IOException + void processSubtype8(TTFDataStream data, int numGlyphs) throws IOException { // --- is32 is a 65536 BITS array ( = 8192 BYTES) int[] is32 = data.readUnsignedByteArray(8192); @@ -138,6 +142,11 @@ protected void processSubtype8(TTFDataStream data, int numGlyphs) throws IOExcep glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); characterCodeToGlyphId = new HashMap(numGlyphs); + if (numGlyphs == 0) + { + LOG.warn("subtable has no glyphs"); + return; + } // -- Read all sub header for (long i = 0; i < nbGroups; ++i) { @@ -156,7 +165,11 @@ protected void processSubtype8(TTFDataStream data, int numGlyphs) throws IOExcep // -- Convert the Character code in decimal if (j > Integer.MAX_VALUE) { - throw new IOException("[Sub Format 8] Invalid Character code"); + throw new IOException("[Sub Format 8] Invalid character code " + j); + } + if ((int) j / 8 >= is32.length) + { + throw new IOException("[Sub Format 8] Invalid character code " + j); } int currentCharCode; @@ -174,7 +187,7 @@ protected void processSubtype8(TTFDataStream data, int numGlyphs) throws IOExcep long codepoint = (lead << 10) + trail + SURROGATE_OFFSET; if (codepoint > Integer.MAX_VALUE) { - throw new IOException("[Sub Format 8] Invalid Character code"); + throw new IOException("[Sub Format 8] Invalid character code " + codepoint); } currentCharCode = (int) codepoint; } @@ -198,7 +211,7 @@ protected void processSubtype8(TTFDataStream data, int numGlyphs) throws IOExcep * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype10(TTFDataStream data, int numGlyphs) throws IOException + void processSubtype10(TTFDataStream data, int numGlyphs) throws IOException { long startCode = data.readUnsignedInt(); long numChars = data.readUnsignedInt(); @@ -222,11 +235,16 @@ protected void processSubtype10(TTFDataStream data, int numGlyphs) throws IOExce * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype12(TTFDataStream data, int numGlyphs) throws IOException + void processSubtype12(TTFDataStream data, int numGlyphs) throws IOException { long nbGroups = data.readUnsignedInt(); glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); characterCodeToGlyphId = new HashMap(numGlyphs); + if (numGlyphs == 0) + { + LOG.warn("subtable has no glyphs"); + return; + } for (long i = 0; i < nbGroups; ++i) { long firstCode = data.readUnsignedInt(); @@ -273,10 +291,16 @@ protected void processSubtype12(TTFDataStream data, int numGlyphs) throws IOExce * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype13(TTFDataStream data, int numGlyphs) throws IOException + void processSubtype13(TTFDataStream data, int numGlyphs) throws IOException { long nbGroups = data.readUnsignedInt(); + glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); characterCodeToGlyphId = new HashMap(numGlyphs); + if (numGlyphs == 0) + { + LOG.warn("subtable has no glyphs"); + return; + } for (long i = 0; i < nbGroups; ++i) { long firstCode = data.readUnsignedInt(); @@ -325,7 +349,7 @@ protected void processSubtype13(TTFDataStream data, int numGlyphs) throws IOExce * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype14(TTFDataStream data, int numGlyphs) throws IOException + void processSubtype14(TTFDataStream data, int numGlyphs) throws IOException { // Unicode Variation Sequences (UVS) // see http://blogs.adobe.com/CCJKType/2013/05/opentype-cmap-table-ramblings.html @@ -339,29 +363,24 @@ protected void processSubtype14(TTFDataStream data, int numGlyphs) throws IOExce * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype6(TTFDataStream data, int numGlyphs) throws IOException + void processSubtype6(TTFDataStream data, int numGlyphs) throws IOException { int firstCode = data.readUnsignedShort(); int entryCount = data.readUnsignedShort(); - // skip emtpy tables + // skip empty tables if (entryCount == 0) { return; } - Map tmpGlyphToChar = new HashMap(numGlyphs); characterCodeToGlyphId = new HashMap(numGlyphs); int[] glyphIdArray = data.readUnsignedShortArray(entryCount); + int maxGlyphId = 0; for (int i = 0; i < entryCount; i++) { - tmpGlyphToChar.put(glyphIdArray[i], firstCode + i); - characterCodeToGlyphId.put((firstCode + i), glyphIdArray[i]); - } - glyphIdToCharacterCode = newGlyphIdToCharacterCode(Collections.max(tmpGlyphToChar.keySet()) + 1); - for (Entry entry : tmpGlyphToChar.entrySet()) - { - // link the glyphId with the right character code - glyphIdToCharacterCode[entry.getKey()] = entry.getValue(); + maxGlyphId = Math.max(maxGlyphId, glyphIdArray[i]); + characterCodeToGlyphId.put(firstCode + i, glyphIdArray[i]); } + buildGlyphIdToCharacterCodeLookup(maxGlyphId); } /** @@ -371,7 +390,7 @@ protected void processSubtype6(TTFDataStream data, int numGlyphs) throws IOExcep * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype4(TTFDataStream data, int numGlyphs) throws IOException + void processSubtype4(TTFDataStream data, int numGlyphs) throws IOException { int segCountX2 = data.readUnsignedShort(); int segCount = segCountX2 / 2; @@ -382,12 +401,11 @@ protected void processSubtype4(TTFDataStream data, int numGlyphs) throws IOExcep int reservedPad = data.readUnsignedShort(); int[] startCount = data.readUnsignedShortArray(segCount); int[] idDelta = data.readUnsignedShortArray(segCount); + long idRangeOffsetPosition = data.getCurrentPosition(); int[] idRangeOffset = data.readUnsignedShortArray(segCount); - Map tmpGlyphToChar = new HashMap(numGlyphs); characterCodeToGlyphId = new HashMap(numGlyphs); - - long currentPosition = data.getCurrentPosition(); + int maxGlyphId = 0; for (int i = 0; i < segCount; i++) { @@ -395,32 +413,27 @@ protected void processSubtype4(TTFDataStream data, int numGlyphs) throws IOExcep int end = endCount[i]; int delta = idDelta[i]; int rangeOffset = idRangeOffset[i]; + long segmentRangeOffset = idRangeOffsetPosition + (i * 2) + rangeOffset; if (start != 65535 && end != 65535) { for (int j = start; j <= end; j++) { if (rangeOffset == 0) { - int glyphid = (j + delta) % 65536; - tmpGlyphToChar.put(glyphid, j); + int glyphid = (j + delta) & 0xFFFF; + maxGlyphId = Math.max(glyphid, maxGlyphId); characterCodeToGlyphId.put(j, glyphid); } else { - long glyphOffset = currentPosition + ((rangeOffset / 2) + - (j - start) + - (i - segCount)) * 2; + long glyphOffset = segmentRangeOffset + ((j - start) * 2); data.seek(glyphOffset); int glyphIndex = data.readUnsignedShort(); if (glyphIndex != 0) { - glyphIndex += delta; - glyphIndex %= 65536; - if (!tmpGlyphToChar.containsKey(glyphIndex)) - { - tmpGlyphToChar.put(glyphIndex, j); - characterCodeToGlyphId.put(j, glyphIndex); - } + glyphIndex = (glyphIndex + delta) & 0xFFFF; + maxGlyphId = Math.max(glyphIndex, maxGlyphId); + characterCodeToGlyphId.put(j, glyphIndex); } } } @@ -431,16 +444,38 @@ protected void processSubtype4(TTFDataStream data, int numGlyphs) throws IOExcep * this is the final result key=glyphId, value is character codes Create an array that contains MAX(GlyphIds) * element, or -1 */ - if (tmpGlyphToChar.isEmpty()) + if (characterCodeToGlyphId.isEmpty()) { LOG.warn("cmap format 4 subtable is empty"); return; } - glyphIdToCharacterCode = newGlyphIdToCharacterCode(Collections.max(tmpGlyphToChar.keySet()) + 1); - for (Entry entry : tmpGlyphToChar.entrySet()) + buildGlyphIdToCharacterCodeLookup(maxGlyphId); + } + + private void buildGlyphIdToCharacterCodeLookup(int maxGlyphId) + { + glyphIdToCharacterCode = newGlyphIdToCharacterCode(maxGlyphId + 1); + for (Entry entry : characterCodeToGlyphId.entrySet()) { - // link the glyphId with the right character code - glyphIdToCharacterCode[entry.getKey()] = entry.getValue(); + if (glyphIdToCharacterCode[entry.getValue()] == -1) + { + // add new value to the array + glyphIdToCharacterCode[entry.getValue()] = entry.getKey(); + } + else + { + // there is already a mapping for the given glyphId + List mappedValues = glyphIdToCharacterCodeMultiple.get(entry.getValue()); + if (mappedValues == null) + { + mappedValues = new ArrayList(); + glyphIdToCharacterCodeMultiple.put(entry.getValue(), mappedValues); + mappedValues.add(glyphIdToCharacterCode[entry.getValue()]); + // mark value as multiple mapping + glyphIdToCharacterCode[entry.getValue()] = Integer.MIN_VALUE; + } + mappedValues.add(entry.getKey()); + } } } @@ -451,7 +486,7 @@ protected void processSubtype4(TTFDataStream data, int numGlyphs) throws IOExcep * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype2(TTFDataStream data, int numGlyphs) throws IOException + void processSubtype2(TTFDataStream data, int numGlyphs) throws IOException { int[] subHeaderKeys = new int[256]; // ---- keep the Max Index of the SubHeader array to know its length @@ -475,6 +510,11 @@ protected void processSubtype2(TTFDataStream data, int numGlyphs) throws IOExcep long startGlyphIndexOffset = data.getCurrentPosition(); glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); characterCodeToGlyphId = new HashMap(numGlyphs); + if (numGlyphs == 0) + { + LOG.warn("subtable has no glyphs"); + return; + } for (int i = 0; i <= maxSubHeaderIndex; ++i) { SubHeader sh = subHeaders[i]; @@ -498,6 +538,10 @@ protected void processSubtype2(TTFDataStream data, int numGlyphs) throws IOExcep if (p > 0) { p = (p + idDelta) % 65536; + if (p < 0) + { + p += 65536; + } } if (p >= numGlyphs) @@ -518,14 +562,14 @@ protected void processSubtype2(TTFDataStream data, int numGlyphs) throws IOExcep * @param data the data stream of the to be parsed ttf font * @throws IOException If there is an error parsing the true type font. */ - protected void processSubtype0(TTFDataStream data) throws IOException + void processSubtype0(TTFDataStream data) throws IOException { byte[] glyphMapping = data.read(256); glyphIdToCharacterCode = newGlyphIdToCharacterCode(256); characterCodeToGlyphId = new HashMap(glyphMapping.length); for (int i = 0; i < glyphMapping.length; i++) { - int glyphIndex = (glyphMapping[i] + 256) % 256; + int glyphIndex = glyphMapping[i] & 0xFF; glyphIdToCharacterCode[glyphIndex] = i; characterCodeToGlyphId.put(i, glyphIndex); } @@ -580,6 +624,7 @@ public void setPlatformId(int platformIdValue) * @param characterCode the given character code to be mapped * @return glyphId the corresponding glyph id for the given character code */ + @Override public int getGlyphId(int characterCode) { Integer glyphId = characterCodeToGlyphId.get(characterCode); @@ -591,22 +636,72 @@ public int getGlyphId(int characterCode) * * @param gid glyph id * @return character code + * + * @deprecated the mapping may be ambiguous, see {@link #getCharCodes(int)}. The first mapped value is returned by + * default. */ + @Deprecated public Integer getCharacterCode(int gid) { - if (gid < 0 || gid >= glyphIdToCharacterCode.length) + int code = getCharCode(gid); + if (code == -1) { return null; } + // ambiguous mapping + if (code == Integer.MIN_VALUE) + { + List mappedValues = glyphIdToCharacterCodeMultiple.get(gid); + if (mappedValues != null) + { + // use the first mapping + return mappedValues.get(0); + } + } + return code; + } + + private int getCharCode(int gid) + { + if (gid < 0 || gid >= glyphIdToCharacterCode.length) + { + return -1; + } + return glyphIdToCharacterCode[gid]; + } - // workaround for the fact that glyphIdToCharacterCode doesn't distinguish between - // missing character codes and code 0. - int code = glyphIdToCharacterCode[gid]; + /** + * Returns all possible character codes for the given gid, or null if there is none. + * + * @param gid glyph id + * @return a list with all character codes the given gid maps to + * + */ + @Override + public List getCharCodes(int gid) + { + int code = getCharCode(gid); if (code == -1) { return null; } - return code; + List codes = null; + if (code == Integer.MIN_VALUE) + { + List mappedValues = glyphIdToCharacterCodeMultiple.get(gid); + if (mappedValues != null) + { + codes = new ArrayList(mappedValues); + // sort the list to provide a reliable order + Collections.sort(codes); + } + } + else + { + codes = new ArrayList(1); + codes.add(code); + } + return codes; } @Override @@ -620,7 +715,7 @@ public String toString() * Class used to manage CMap - Format 2. * */ - private class SubHeader + private static class SubHeader { private final int firstCode; private final int entryCount; diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/CmapTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/CmapTable.java index 043809243c9..afe5aa502ab 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/CmapTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/CmapTable.java @@ -68,8 +68,10 @@ public class CmapTable extends TTFTable * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + @Override + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { + @SuppressWarnings({"unused", "squid:S1854", "squid:S1481"}) int version = data.readUnsignedShort(); int numberOfTables = data.readUnsignedShort(); cmaps = new CmapSubtable[numberOfTables]; diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfCompositeComp.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfCompositeComp.java index 01a6f2deff5..47cbed27ba1 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfCompositeComp.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfCompositeComp.java @@ -87,7 +87,7 @@ public class GlyfCompositeComp * @param bais the stream to be read * @throws IOException is thrown if something went wrong */ - protected GlyfCompositeComp(TTFDataStream bais) throws IOException + GlyfCompositeComp(TTFDataStream bais) throws IOException { flags = bais.readSignedShort(); glyphIndex = bais.readUnsignedShort();// number of glyph in a font is uint16 @@ -121,25 +121,25 @@ protected GlyfCompositeComp(TTFDataStream bais) throws IOException if ((flags & WE_HAVE_A_SCALE) != 0) { int i = bais.readSignedShort(); - xscale = yscale = (double) i / (double) 0x4000; + xscale = yscale = i / (double) 0x4000; } else if ((flags & WE_HAVE_AN_X_AND_Y_SCALE) != 0) { short i = bais.readSignedShort(); - xscale = (double) i / (double) 0x4000; + xscale = i / (double) 0x4000; i = bais.readSignedShort(); - yscale = (double) i / (double) 0x4000; + yscale = i / (double) 0x4000; } else if ((flags & WE_HAVE_A_TWO_BY_TWO) != 0) { int i = bais.readSignedShort(); - xscale = (double) i / (double) 0x4000; + xscale = i / (double) 0x4000; i = bais.readSignedShort(); - scale01 = (double) i / (double) 0x4000; + scale01 = i / (double) 0x4000; i = bais.readSignedShort(); - scale10 = (double) i / (double) 0x4000; + scale10 = i / (double) 0x4000; i = bais.readSignedShort(); - yscale = (double) i / (double) 0x4000; + yscale = i / (double) 0x4000; } } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfCompositeDescript.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfCompositeDescript.java index 9245a431dc1..ffd75af0416 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfCompositeDescript.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfCompositeDescript.java @@ -20,8 +20,10 @@ Licensed to the Apache Software Foundation (ASF) under one or more import java.io.IOException; import java.util.ArrayList; -import java.util.Iterator; +import java.util.HashMap; import java.util.List; +import java.util.Map; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -41,9 +43,12 @@ public class GlyfCompositeDescript extends GlyfDescript private static final Log LOG = LogFactory.getLog(GlyfCompositeDescript.class); private final List components = new ArrayList(); + private final Map descriptions = new HashMap(); private GlyphTable glyphTable = null; private boolean beingResolved = false; private boolean resolved = false; + private int pointCount = -1; + private int contourCount = -1; /** * Constructor. @@ -52,7 +57,7 @@ public class GlyfCompositeDescript extends GlyfDescript * @param glyphTable the Glyphtable containing all glyphs * @throws IOException is thrown if something went wrong */ - public GlyfCompositeDescript(TTFDataStream bais, GlyphTable glyphTable) throws IOException + GlyfCompositeDescript(TTFDataStream bais, GlyphTable glyphTable) throws IOException { super((short) -1, bais); @@ -72,6 +77,7 @@ public GlyfCompositeDescript(TTFDataStream bais, GlyphTable glyphTable) throws I { readInstructions(bais, (bais.readUnsignedShort())); } + initDescriptions(); } /** @@ -94,15 +100,12 @@ public void resolve() int firstIndex = 0; int firstContour = 0; - Iterator i = components.iterator(); - while (i.hasNext()) + for (GlyfCompositeComp comp : components) { - GlyfCompositeComp comp = i.next(); comp.setFirstIndex(firstIndex); comp.setFirstContour(firstContour); - GlyphDescription desc; - desc = getGlypDescription(comp.getGlyphIndex()); + GlyphDescription desc = descriptions.get(comp.getGlyphIndex()); if (desc != null) { desc.resolve(); @@ -123,7 +126,7 @@ public int getEndPtOfContours(int i) GlyfCompositeComp c = getCompositeCompEndPt(i); if (c != null) { - GlyphDescription gd = getGlypDescription(c.getGlyphIndex()); + GlyphDescription gd = descriptions.get(c.getGlyphIndex()); return gd.getEndPtOfContours(i - c.getFirstContour()) + c.getFirstIndex(); } return 0; @@ -138,7 +141,7 @@ public byte getFlags(int i) GlyfCompositeComp c = getCompositeComp(i); if (c != null) { - GlyphDescription gd = getGlypDescription(c.getGlyphIndex()); + GlyphDescription gd = descriptions.get(c.getGlyphIndex()); return gd.getFlags(i - c.getFirstIndex()); } return 0; @@ -153,7 +156,7 @@ public short getXCoordinate(int i) GlyfCompositeComp c = getCompositeComp(i); if (c != null) { - GlyphDescription gd = getGlypDescription(c.getGlyphIndex()); + GlyphDescription gd = descriptions.get(c.getGlyphIndex()); int n = i - c.getFirstIndex(); int x = gd.getXCoordinate(n); int y = gd.getYCoordinate(n); @@ -173,7 +176,7 @@ public short getYCoordinate(int i) GlyfCompositeComp c = getCompositeComp(i); if (c != null) { - GlyphDescription gd = getGlypDescription(c.getGlyphIndex()); + GlyphDescription gd = descriptions.get(c.getGlyphIndex()); int n = i - c.getFirstIndex(); int x = gd.getXCoordinate(n); int y = gd.getYCoordinate(n); @@ -203,14 +206,21 @@ public int getPointCount() { LOG.error("getPointCount called on unresolved GlyfCompositeDescript"); } - GlyfCompositeComp c = components.get(components.size() - 1); - GlyphDescription gd = getGlypDescription(c.getGlyphIndex()); - if (gd == null) + if (pointCount < 0) { - LOG.error("getGlypDescription(" + c.getGlyphIndex() + ") is null, returning 0"); - return 0; - } - return c.getFirstIndex() + gd.getPointCount(); + GlyfCompositeComp c = components.get(components.size() - 1); + GlyphDescription gd = descriptions.get(c.getGlyphIndex()); + if (gd == null) + { + LOG.error("GlyphDescription for index " + c.getGlyphIndex() + " is null, returning 0"); + pointCount = 0; + } + else + { + pointCount = c.getFirstIndex() + gd.getPointCount(); + } + } + return pointCount; } /** @@ -223,8 +233,12 @@ public int getContourCount() { LOG.error("getContourCount called on unresolved GlyfCompositeDescript"); } - GlyfCompositeComp c = components.get(components.size() - 1); - return c.getFirstContour() + getGlypDescription(c.getGlyphIndex()).getContourCount(); + if (contourCount < 0) + { + GlyfCompositeComp c = components.get(components.size() - 1); + contourCount = c.getFirstContour() + descriptions.get(c.getGlyphIndex()).getContourCount(); + } + return contourCount; } /** @@ -241,8 +255,8 @@ private GlyfCompositeComp getCompositeComp(int i) { for (GlyfCompositeComp c : components) { - GlyphDescription gd = getGlypDescription(c.getGlyphIndex()); - if (c.getFirstIndex() <= i && i < (c.getFirstIndex() + gd.getPointCount())) + GlyphDescription gd = descriptions.get(c.getGlyphIndex()); + if (c.getFirstIndex() <= i && gd != null && i < (c.getFirstIndex() + gd.getPointCount())) { return c; } @@ -254,8 +268,8 @@ private GlyfCompositeComp getCompositeCompEndPt(int i) { for (GlyfCompositeComp c : components) { - GlyphDescription gd = getGlypDescription(c.getGlyphIndex()); - if (c.getFirstContour() <= i && i < (c.getFirstContour() + gd.getContourCount())) + GlyphDescription gd = descriptions.get(c.getGlyphIndex()); + if (c.getFirstContour() <= i && gd != null && i < (c.getFirstContour() + gd.getContourCount())) { return c; } @@ -263,21 +277,23 @@ private GlyfCompositeComp getCompositeCompEndPt(int i) return null; } - private GlyphDescription getGlypDescription(int index) + private void initDescriptions() { - try + for (GlyfCompositeComp component : components) { - GlyphData glyph = glyphTable.getGlyph(index); - if (glyph != null) + try { - return glyph.getDescription(); + int index = component.getGlyphIndex(); + GlyphData glyph = glyphTable.getGlyph(index); + if (glyph != null) + { + descriptions.put(index, glyph.getDescription()); + } } - return null; - } - catch (IOException e) - { - LOG.error(e); - return null; + catch (IOException e) + { + LOG.error(e); + } } } } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfDescript.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfDescript.java index c577516567d..6b0deec8b95 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfDescript.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfDescript.java @@ -75,7 +75,7 @@ public abstract class GlyfDescript implements GlyphDescription * @param bais the stream to be read * @throws IOException is thrown if something went wrong */ - protected GlyfDescript(short numberOfContours, TTFDataStream bais) throws IOException + GlyfDescript(short numberOfContours, TTFDataStream bais) throws IOException { contourCount = numberOfContours; } @@ -112,7 +112,7 @@ public int[] getInstructions() * @param count the number of instructions to be read * @throws IOException is thrown if something went wrong */ - protected void readInstructions(TTFDataStream bais, int count) throws IOException + void readInstructions(TTFDataStream bais, int count) throws IOException { instructions = bais.readUnsignedByteArray(count); } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfSimpleDescript.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfSimpleDescript.java index b16eee77748..8356ea2fd7c 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfSimpleDescript.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyfSimpleDescript.java @@ -40,6 +40,17 @@ public class GlyfSimpleDescript extends GlyfDescript private short[] yCoordinates; private final int pointCount; + /** + * Constructor for an empty description. + * + * @throws IOException is thrown if something went wrong + */ + GlyfSimpleDescript() throws IOException + { + super((short) 0, null); + pointCount = 0; + } + /** * Constructor. * @@ -48,7 +59,7 @@ public class GlyfSimpleDescript extends GlyfDescript * @param x0 the initial X-position * @throws IOException is thrown if something went wrong */ - public GlyfSimpleDescript(short numberOfContours, TTFDataStream bais, short x0) throws IOException + GlyfSimpleDescript(short numberOfContours, TTFDataStream bais, short x0) throws IOException { super(numberOfContours, bais); @@ -207,6 +218,11 @@ private void readFlags(int flagCount, TTFDataStream bais) throws IOException int repeats = bais.readUnsignedByte(); for (int i = 1; i <= repeats; i++) { + if (index + i >= flags.length) + { + LOG.error("repeat count (" + repeats + ") higher than remaining space"); + return; + } flags[index + i] = flags[index]; } index += repeats; diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphData.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphData.java index 7e1c89de1da..94f2eb93111 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphData.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphData.java @@ -44,7 +44,7 @@ public class GlyphData * @param leftSideBearing The left side bearing for this glyph. * @throws IOException If there is an error reading the data. */ - public void initData( GlyphTable glyphTable, TTFDataStream data, int leftSideBearing ) throws IOException + void initData( GlyphTable glyphTable, TTFDataStream data, int leftSideBearing ) throws IOException { numberOfContours = data.readSignedShort(); xMin = data.readSignedShort(); @@ -65,7 +65,18 @@ public void initData( GlyphTable glyphTable, TTFDataStream data, int leftSideBea glyphDescription = new GlyfCompositeDescript(data, glyphTable); } } - + + /** + * Initialize an empty glyph record. + * + * @throws IOException + */ + void initEmptyData() throws IOException + { + glyphDescription = new GlyfSimpleDescript(); + boundingBox = new BoundingBox(); + } + /** * @return Returns the boundingBox. */ diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphDescription.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphDescription.java index 58574384b58..cb4c00cf0d0 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphDescription.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphDescription.java @@ -36,7 +36,11 @@ public interface GlyphDescription int getEndPtOfContours(int i); /** - * Returns the flags of the given point. + * Returns the flags of the given point. To decode these bit flags, use the static elements of + * {@link GlyfDescript}. See also "Outline flags" in + * The + * 'glyf' table in the TrueType Reference Manual. + * * @param i the given point * @return the flags value for the given point */ diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphRenderer.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphRenderer.java index 00e7274ed35..df23aafa00a 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphRenderer.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphRenderer.java @@ -22,6 +22,7 @@ import java.awt.geom.GeneralPath; import java.util.ArrayList; import java.util.List; +import java.util.Locale; /** * This class provides a glyph to GeneralPath conversion for true type fonts. @@ -63,13 +64,19 @@ public GeneralPath getPath() private Point[] describe(GlyphDescription gd) { int endPtIndex = 0; + int endPtOfContourIndex = -1; Point[] points = new Point[gd.getPointCount()]; - for (int i = 0; i < gd.getPointCount(); i++) + for (int i = 0; i < points.length; i++) { - boolean endPt = gd.getEndPtOfContours(endPtIndex) == i; + if (endPtOfContourIndex == -1) + { + endPtOfContourIndex = gd.getEndPtOfContours(endPtIndex); + } + boolean endPt = endPtOfContourIndex == i; if (endPt) { endPtIndex++; + endPtOfContourIndex = -1; } points[i] = new Point(gd.getXCoordinate(i), gd.getYCoordinate(i), (gd.getFlags(i) & GlyfDescript.ON_CURVE) != 0, endPt); @@ -146,7 +153,7 @@ private void moveTo(GeneralPath path, Point point) path.moveTo(point.x, point.y); if (LOG.isDebugEnabled()) { - LOG.trace("moveTo: " + String.format("%d,%d", point.x, point.y)); + LOG.trace("moveTo: " + String.format(Locale.US, "%d,%d", point.x, point.y)); } } @@ -155,7 +162,7 @@ private void lineTo(GeneralPath path, Point point) path.lineTo(point.x, point.y); if (LOG.isDebugEnabled()) { - LOG.trace("lineTo: " + String.format("%d,%d", point.x, point.y)); + LOG.trace("lineTo: " + String.format(Locale.US, "%d,%d", point.x, point.y)); } } @@ -164,7 +171,7 @@ private void quadTo(GeneralPath path, Point ctrlPoint, Point point) path.quadTo(ctrlPoint.x, ctrlPoint.y, point.x, point.y); if (LOG.isDebugEnabled()) { - LOG.trace("quadTo: " + String.format("%d,%d %d,%d", ctrlPoint.x, ctrlPoint.y, + LOG.trace("quadTo: " + String.format(Locale.US, "%d,%d %d,%d", ctrlPoint.x, ctrlPoint.y, point.x, point.y)); } } @@ -207,7 +214,7 @@ private static class Point @Override public String toString() { - return String.format("Point(%d,%d,%s,%s)", x, y, onCurve ? "onCurve" : "", + return String.format(Locale.US, "Point(%d,%d,%s,%s)", x, y, onCurve ? "onCurve" : "", endOfContour ? "endOfContour" : ""); } } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java new file mode 100644 index 00000000000..607723e3a33 --- /dev/null +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphSubstitutionTable.java @@ -0,0 +1,780 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * A glyph substitution 'GSUB' table in a TrueType or OpenType font. + * + * @author Aaron Madlon-Kay + */ +public class GlyphSubstitutionTable extends TTFTable +{ + private static final Log LOG = LogFactory.getLog(GlyphSubstitutionTable.class); + + public static final String TAG = "GSUB"; + + private LinkedHashMap scriptList; + // featureList and lookupList are not maps because we need to index into them + private FeatureRecord[] featureList; + private LookupTable[] lookupList; + + private final Map lookupCache = new HashMap(); + private final Map reverseLookup = new HashMap(); + + private String lastUsedSupportedScript; + + GlyphSubstitutionTable(TrueTypeFont font) + { + super(font); + } + + @Override + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + { + long start = data.getCurrentPosition(); + @SuppressWarnings("unused") + int majorVersion = data.readUnsignedShort(); + int minorVersion = data.readUnsignedShort(); + int scriptListOffset = data.readUnsignedShort(); + int featureListOffset = data.readUnsignedShort(); + int lookupListOffset = data.readUnsignedShort(); + @SuppressWarnings("unused") + long featureVariationsOffset = -1L; + if (minorVersion == 1L) + { + featureVariationsOffset = data.readUnsignedInt(); + } + + scriptList = readScriptList(data, start + scriptListOffset); + featureList = readFeatureList(data, start + featureListOffset); + lookupList = readLookupList(data, start + lookupListOffset); + } + + LinkedHashMap readScriptList(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + int scriptCount = data.readUnsignedShort(); + ScriptRecord[] scriptRecords = new ScriptRecord[scriptCount]; + int[] scriptOffsets = new int[scriptCount]; + for (int i = 0; i < scriptCount; i++) + { + ScriptRecord scriptRecord = new ScriptRecord(); + scriptRecord.scriptTag = data.readString(4); + scriptOffsets[i] = data.readUnsignedShort(); + scriptRecords[i] = scriptRecord; + } + for (int i = 0; i < scriptCount; i++) + { + scriptRecords[i].scriptTable = readScriptTable(data, offset + scriptOffsets[i]); + } + LinkedHashMap resultScriptList = new LinkedHashMap(scriptCount); + for (ScriptRecord scriptRecord : scriptRecords) + { + resultScriptList.put(scriptRecord.scriptTag, scriptRecord.scriptTable); + } + return resultScriptList; + } + + ScriptTable readScriptTable(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + ScriptTable scriptTable = new ScriptTable(); + int defaultLangSys = data.readUnsignedShort(); + int langSysCount = data.readUnsignedShort(); + LangSysRecord[] langSysRecords = new LangSysRecord[langSysCount]; + int[] langSysOffsets = new int[langSysCount]; + String prevLangSysTag = ""; + for (int i = 0; i < langSysCount; i++) + { + LangSysRecord langSysRecord = new LangSysRecord(); + langSysRecord.langSysTag = data.readString(4); + if (i > 0 && langSysRecord.langSysTag.compareTo(prevLangSysTag) <= 0) + { + // PDFBOX-4489: catch corrupt file + // https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#slTbl_sRec + throw new IOException("LangSysRecords not alphabetically sorted by LangSys tag: " + + langSysRecord.langSysTag + " <= " + prevLangSysTag); + } + langSysOffsets[i] = data.readUnsignedShort(); + langSysRecords[i] = langSysRecord; + prevLangSysTag = langSysRecord.langSysTag; + } + if (defaultLangSys != 0) + { + scriptTable.defaultLangSysTable = readLangSysTable(data, offset + defaultLangSys); + } + for (int i = 0; i < langSysCount; i++) + { + langSysRecords[i].langSysTable = readLangSysTable(data, offset + langSysOffsets[i]); + } + scriptTable.langSysTables = new LinkedHashMap(langSysCount); + for (LangSysRecord langSysRecord : langSysRecords) + { + scriptTable.langSysTables.put(langSysRecord.langSysTag, langSysRecord.langSysTable); + } + return scriptTable; + } + + LangSysTable readLangSysTable(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + LangSysTable langSysTable = new LangSysTable(); + @SuppressWarnings("unused") + int lookupOrder = data.readUnsignedShort(); + langSysTable.requiredFeatureIndex = data.readUnsignedShort(); + int featureIndexCount = data.readUnsignedShort(); + langSysTable.featureIndices = new int[featureIndexCount]; + for (int i = 0; i < featureIndexCount; i++) + { + langSysTable.featureIndices[i] = data.readUnsignedShort(); + } + return langSysTable; + } + + FeatureRecord[] readFeatureList(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + int featureCount = data.readUnsignedShort(); + FeatureRecord[] featureRecords = new FeatureRecord[featureCount]; + int[] featureOffsets = new int[featureCount]; + String prevFeatureTag = ""; + for (int i = 0; i < featureCount; i++) + { + FeatureRecord featureRecord = new FeatureRecord(); + featureRecord.featureTag = data.readString(4); + if (i > 0 && featureRecord.featureTag.compareTo(prevFeatureTag) < 0) + { + // catch corrupt file + // https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#flTbl + if (featureRecord.featureTag.matches("\\w{4}") && prevFeatureTag.matches("\\w{4}")) + { + // ArialUni.ttf has many warnings but isn't corrupt, so we assume that only + // strings with trash characters indicate real corruption + LOG.debug("FeatureRecord array not alphabetically sorted by FeatureTag: " + + featureRecord.featureTag + " < " + prevFeatureTag); + } + else + { + LOG.warn("FeatureRecord array not alphabetically sorted by FeatureTag: " + + featureRecord.featureTag + " < " + prevFeatureTag); + return new FeatureRecord[0]; + } + } + featureOffsets[i] = data.readUnsignedShort(); + featureRecords[i] = featureRecord; + prevFeatureTag = featureRecord.featureTag; + } + for (int i = 0; i < featureCount; i++) + { + featureRecords[i].featureTable = readFeatureTable(data, offset + featureOffsets[i]); + } + return featureRecords; + } + + FeatureTable readFeatureTable(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + FeatureTable featureTable = new FeatureTable(); + @SuppressWarnings("unused") + int featureParams = data.readUnsignedShort(); + int lookupIndexCount = data.readUnsignedShort(); + featureTable.lookupListIndices = new int[lookupIndexCount]; + for (int i = 0; i < lookupIndexCount; i++) + { + featureTable.lookupListIndices[i] = data.readUnsignedShort(); + } + return featureTable; + } + + LookupTable[] readLookupList(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + int lookupCount = data.readUnsignedShort(); + int[] lookups = new int[lookupCount]; + for (int i = 0; i < lookupCount; i++) + { + lookups[i] = data.readUnsignedShort(); + } + LookupTable[] lookupTables = new LookupTable[lookupCount]; + for (int i = 0; i < lookupCount; i++) + { + lookupTables[i] = readLookupTable(data, offset + lookups[i]); + } + return lookupTables; + } + + LookupTable readLookupTable(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + LookupTable lookupTable = new LookupTable(); + lookupTable.lookupType = data.readUnsignedShort(); + lookupTable.lookupFlag = data.readUnsignedShort(); + int subTableCount = data.readUnsignedShort(); + int[] subTableOffets = new int[subTableCount]; + for (int i = 0; i < subTableCount; i++) + { + subTableOffets[i] = data.readUnsignedShort(); + } + if ((lookupTable.lookupFlag & 0x0010) != 0) + { + lookupTable.markFilteringSet = data.readUnsignedShort(); + } + lookupTable.subTables = new LookupSubTable[subTableCount]; + switch (lookupTable.lookupType) + { + case 1: // Single + for (int i = 0; i < subTableCount; i++) + { + lookupTable.subTables[i] = readLookupSubTable(data, offset + subTableOffets[i]); + } + break; + default: + // Other lookup types are not supported + LOG.debug("Type " + lookupTable.lookupType + " GSUB lookup table is not supported and will be ignored"); + } + return lookupTable; + } + + LookupSubTable readLookupSubTable(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + int substFormat = data.readUnsignedShort(); + switch (substFormat) + { + case 1: + { + LookupTypeSingleSubstFormat1 lookupSubTable = new LookupTypeSingleSubstFormat1(); + lookupSubTable.substFormat = substFormat; + int coverageOffset = data.readUnsignedShort(); + lookupSubTable.deltaGlyphID = data.readSignedShort(); + lookupSubTable.coverageTable = readCoverageTable(data, offset + coverageOffset); + return lookupSubTable; + } + case 2: + { + LookupTypeSingleSubstFormat2 lookupSubTable = new LookupTypeSingleSubstFormat2(); + lookupSubTable.substFormat = substFormat; + int coverageOffset = data.readUnsignedShort(); + int glyphCount = data.readUnsignedShort(); + lookupSubTable.substituteGlyphIDs = new int[glyphCount]; + for (int i = 0; i < glyphCount; i++) + { + lookupSubTable.substituteGlyphIDs[i] = data.readUnsignedShort(); + } + lookupSubTable.coverageTable = readCoverageTable(data, offset + coverageOffset); + return lookupSubTable; + } + default: + throw new IOException("Unknown substFormat: " + substFormat); + } + } + + CoverageTable readCoverageTable(TTFDataStream data, long offset) throws IOException + { + data.seek(offset); + int coverageFormat = data.readUnsignedShort(); + switch (coverageFormat) + { + case 1: + { + CoverageTableFormat1 coverageTable = new CoverageTableFormat1(); + coverageTable.coverageFormat = coverageFormat; + int glyphCount = data.readUnsignedShort(); + coverageTable.glyphArray = new int[glyphCount]; + for (int i = 0; i < glyphCount; i++) + { + coverageTable.glyphArray[i] = data.readUnsignedShort(); + } + return coverageTable; + } + case 2: + { + CoverageTableFormat2 coverageTable = new CoverageTableFormat2(); + coverageTable.coverageFormat = coverageFormat; + int rangeCount = data.readUnsignedShort(); + coverageTable.rangeRecords = new RangeRecord[rangeCount]; + for (int i = 0; i < rangeCount; i++) + { + coverageTable.rangeRecords[i] = readRangeRecord(data); + } + return coverageTable; + + } + default: + // Should not happen (the spec indicates only format 1 and format 2) + throw new IOException("Unknown coverage format: " + coverageFormat); + } + } + + + /** + * Choose from one of the supplied OpenType script tags, depending on what the font supports and + * potentially on context. + * + * @param tags + * @return The best OpenType script tag + */ + private String selectScriptTag(String[] tags) + { + if (tags.length == 1) + { + String tag = tags[0]; + if (OpenTypeScript.INHERITED.equals(tag) + || (OpenTypeScript.TAG_DEFAULT.equals(tag) && !scriptList.containsKey(tag))) + { + // We don't know what script this should be. + if (lastUsedSupportedScript == null) + { + // We have no past context and (currently) no way to get future context so we guess. + lastUsedSupportedScript = scriptList.keySet().iterator().next(); + } + // else use past context + + return lastUsedSupportedScript; + } + } + for (String tag : tags) + { + if (scriptList.containsKey(tag)) + { + // Use the first recognized tag. We assume a single font only recognizes one version ("ver. 2") + // of a single script, or if it recognizes more than one that it prefers the latest one. + lastUsedSupportedScript = tag; + return lastUsedSupportedScript; + } + } + return tags[0]; + } + + private Collection getLangSysTables(String scriptTag) + { + Collection result = Collections.emptyList(); + ScriptTable scriptTable = scriptList.get(scriptTag); + if (scriptTable != null) + { + if (scriptTable.defaultLangSysTable == null) + { + result = scriptTable.langSysTables.values(); + } + else + { + result = new ArrayList(scriptTable.langSysTables.values()); + result.add(scriptTable.defaultLangSysTable); + } + } + return result; + } + + /** + * Get a list of {@code FeatureRecord}s from a collection of {@code LangSysTable}s. Optionally + * filter the returned features by supplying a list of allowed feature tags in + * {@code enabledFeatures}. + * + * Note that features listed as required ({@code LangSysTable#requiredFeatureIndex}) will be + * included even if not explicitly enabled. + * + * @param langSysTables The {@code LangSysTable}s indicating {@code FeatureRecord}s to search + * for + * @param enabledFeatures An optional list of feature tags ({@code null} to allow all) + * @return The indicated {@code FeatureRecord}s + */ + private List getFeatureRecords(Collection langSysTables, + final List enabledFeatures) + { + if (langSysTables.isEmpty()) + { + return Collections.emptyList(); + } + List result = new ArrayList(); + for (LangSysTable langSysTable : langSysTables) + { + int required = langSysTable.requiredFeatureIndex; + if (required != 0xffff && required < featureList.length) // if no required features = 0xFFFF + { + result.add(featureList[required]); + } + for (int featureIndex : langSysTable.featureIndices) + { + if (featureIndex < featureList.length && + (enabledFeatures == null || + enabledFeatures.contains(featureList[featureIndex].featureTag))) + { + result.add(featureList[featureIndex]); + } + } + } + + // 'vrt2' supersedes 'vert' and they should not be used together + // https://www.microsoft.com/typography/otspec/features_uz.htm + if (containsFeature(result, "vrt2")) + { + removeFeature(result, "vert"); + } + + if (enabledFeatures != null && result.size() > 1) + { + Collections.sort(result, new Comparator() + { + @Override + public int compare(FeatureRecord o1, FeatureRecord o2) + { + int i1 = enabledFeatures.indexOf(o1.featureTag); + int i2 = enabledFeatures.indexOf(o2.featureTag); + return i1 < i2 ? -1 : i1 == i2 ? 0 : 1; + } + }); + } + + return result; + } + + private boolean containsFeature(List featureRecords, String featureTag) + { + for (FeatureRecord featureRecord : featureRecords) + { + if (featureRecord.featureTag.equals(featureTag)) + { + return true; + } + } + return false; + } + + private void removeFeature(List featureRecords, String featureTag) + { + Iterator iter = featureRecords.iterator(); + while (iter.hasNext()) + { + if (iter.next().featureTag.equals(featureTag)) + { + iter.remove(); + } + } + } + + private int applyFeature(FeatureRecord featureRecord, int gid) + { + for (int lookupListIndex : featureRecord.featureTable.lookupListIndices) + { + LookupTable lookupTable = lookupList[lookupListIndex]; + if (lookupTable.lookupType != 1) + { + LOG.debug("Skipping GSUB feature '" + featureRecord.featureTag + + "' because it requires unsupported lookup table type " + lookupTable.lookupType); + continue; + } + gid = doLookup(lookupTable, gid); + } + return gid; + } + + private int doLookup(LookupTable lookupTable, int gid) + { + for (LookupSubTable lookupSubtable : lookupTable.subTables) + { + int coverageIndex = lookupSubtable.coverageTable.getCoverageIndex(gid); + if (coverageIndex >= 0) + { + return lookupSubtable.doSubstitution(gid, coverageIndex); + } + } + return gid; + } + + /** + * Apply glyph substitutions to the supplied gid. The applicable substitutions are determined by + * the {@code scriptTags} which indicate the language of the gid, and by the list of + * {@code enabledFeatures}. + * + * To ensure that a single gid isn't mapped to multiple substitutions, subsequent invocations + * with the same gid will return the same result as the first, regardless of script or enabled + * features. + * + * @param gid GID + * @param scriptTags Script tags applicable to the gid (see {@link OpenTypeScript}) + * @param enabledFeatures list of features to apply + */ + public int getSubstitution(int gid, String[] scriptTags, List enabledFeatures) + { + if (gid == -1) + { + return -1; + } + Integer cached = lookupCache.get(gid); + if (cached != null) + { + // Because script detection for indeterminate scripts (COMMON, INHERIT, etc.) depends on context, + // it is possible to return a different substitution for the same input. However we don't want that, + // as we need a one-to-one mapping. + return cached; + } + String scriptTag = selectScriptTag(scriptTags); + Collection langSysTables = getLangSysTables(scriptTag); + List featureRecords = getFeatureRecords(langSysTables, enabledFeatures); + int sgid = gid; + for (FeatureRecord featureRecord : featureRecords) + { + sgid = applyFeature(featureRecord, sgid); + } + lookupCache.put(gid, sgid); + reverseLookup.put(sgid, gid); + return sgid; + } + + /** + * For a substitute-gid (obtained from {@link #getSubstitution(int, String[], List)}), retrieve + * the original gid. + * + * Only gids previously substituted by this instance can be un-substituted. If you are trying to + * unsubstitute before you substitute, something is wrong. + * + * @param sgid Substitute GID + */ + public int getUnsubstitution(int sgid) + { + Integer gid = reverseLookup.get(sgid); + if (gid == null) + { + LOG.warn("Trying to un-substitute a never-before-seen gid: " + sgid); + return sgid; + } + return gid; + } + + RangeRecord readRangeRecord(TTFDataStream data) throws IOException + { + RangeRecord rangeRecord = new RangeRecord(); + rangeRecord.startGlyphID = data.readUnsignedShort(); + rangeRecord.endGlyphID = data.readUnsignedShort(); + rangeRecord.startCoverageIndex = data.readUnsignedShort(); + return rangeRecord; + } + + static class ScriptRecord + { + // https://www.microsoft.com/typography/otspec/scripttags.htm + String scriptTag; + ScriptTable scriptTable; + + @Override + public String toString() + { + return String.format("ScriptRecord[scriptTag=%s]", scriptTag); + } + } + + static class ScriptTable + { + LangSysTable defaultLangSysTable; + LinkedHashMap langSysTables; + + @Override + public String toString() + { + return String.format("ScriptTable[hasDefault=%s,langSysRecordsCount=%d]", + defaultLangSysTable != null, langSysTables.size()); + } + } + + static class LangSysRecord + { + // https://www.microsoft.com/typography/otspec/languagetags.htm + String langSysTag; + LangSysTable langSysTable; + + @Override + public String toString() + { + return String.format("LangSysRecord[langSysTag=%s]", langSysTag); + } + } + + static class LangSysTable + { + int requiredFeatureIndex; + int[] featureIndices; + + @Override + public String toString() + { + return String.format("LangSysTable[requiredFeatureIndex=%d]", requiredFeatureIndex); + } + } + + static class FeatureRecord + { + String featureTag; + FeatureTable featureTable; + + @Override + public String toString() + { + return String.format("FeatureRecord[featureTag=%s]", featureTag); + } + } + + static class FeatureTable + { + int[] lookupListIndices; + + @Override + public String toString() + { + return String.format("FeatureTable[lookupListIndiciesCount=%d]", + lookupListIndices.length); + } + } + + static class LookupTable + { + int lookupType; + int lookupFlag; + int markFilteringSet; + LookupSubTable[] subTables; + + @Override + public String toString() + { + return String.format("LookupTable[lookupType=%d,lookupFlag=%d,markFilteringSet=%d]", + lookupType, lookupFlag, markFilteringSet); + } + } + + static abstract class LookupSubTable + { + int substFormat; + CoverageTable coverageTable; + + abstract int doSubstitution(int gid, int coverageIndex); + } + + static class LookupTypeSingleSubstFormat1 extends LookupSubTable + { + short deltaGlyphID; + + @Override + int doSubstitution(int gid, int coverageIndex) + { + return coverageIndex < 0 ? gid : gid + deltaGlyphID; + } + + @Override + public String toString() + { + return String.format("LookupTypeSingleSubstFormat1[substFormat=%d,deltaGlyphID=%d]", + substFormat, deltaGlyphID); + } + } + + static class LookupTypeSingleSubstFormat2 extends LookupSubTable + { + int[] substituteGlyphIDs; + + @Override + int doSubstitution(int gid, int coverageIndex) + { + return coverageIndex < 0 ? gid : substituteGlyphIDs[coverageIndex]; + } + + @Override + public String toString() + { + return String.format( + "LookupTypeSingleSubstFormat2[substFormat=%d,substituteGlyphIDs=%s]", + substFormat, Arrays.toString(substituteGlyphIDs)); + } + } + + static abstract class CoverageTable + { + int coverageFormat; + + abstract int getCoverageIndex(int gid); + } + + static class CoverageTableFormat1 extends CoverageTable + { + int[] glyphArray; + + @Override + int getCoverageIndex(int gid) + { + return Arrays.binarySearch(glyphArray, gid); + } + + @Override + public String toString() + { + return String.format("CoverageTableFormat1[coverageFormat=%d,glyphArray=%s]", + coverageFormat, Arrays.toString(glyphArray)); + } + } + + static class CoverageTableFormat2 extends CoverageTable + { + RangeRecord[] rangeRecords; + + @Override + int getCoverageIndex(int gid) + { + for (RangeRecord rangeRecord : rangeRecords) + { + if (rangeRecord.startGlyphID <= gid && gid <= rangeRecord.endGlyphID) + { + return rangeRecord.startCoverageIndex + gid - rangeRecord.startGlyphID; + } + } + return -1; + } + + @Override + public String toString() + { + return String.format("CoverageTableFormat2[coverageFormat=%d]", coverageFormat); + } + } + + static class RangeRecord + { + int startGlyphID; + int endGlyphID; + int startCoverageIndex; + + @Override + public String toString() + { + return String.format("RangeRecord[startGlyphID=%d,endGlyphID=%d,startCoverageIndex=%d]", + startGlyphID, endGlyphID, startCoverageIndex); + } + } +} diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java index 83e264fd0cb..7a7fbb7d6e3 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java @@ -62,7 +62,7 @@ public class GlyphTable extends TTFTable * @throws IOException If there is an error reading the data. */ @Override - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { loca = ttf.getIndexToLocation(); numGlyphs = ttf.getNumberOfGlyphs(); @@ -80,10 +80,17 @@ public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException /** * Returns all glyphs. This method can be very slow. + * + * @throws IOException If there is an error reading the data. + * @deprecated use {@link #getGlyph(int)} instead. This will be removed in 3.0. If you need this + * method, please create an issue in JIRA. */ + @Deprecated public GlyphData[] getGlyphs() throws IOException { - synchronized (font) + // PDFBOX-4219: synchronize on data because it is accessed by several threads + // when PDFBox is accessing a standard 14 font for the first time + synchronized (data) { // the glyph offsets long[] offsets = loca.getOffsets(); @@ -157,7 +164,11 @@ public GlyphData getGlyph(int gid) throws IOException return glyphs[gid]; } - synchronized (font) + GlyphData glyph; + + // PDFBOX-4219: synchronize on data because it is accessed by several threads + // when PDFBox is accessing a standard 14 font for the first time + synchronized (data) { // read a single glyph long[] offsets = loca.getOffsets(); @@ -165,18 +176,23 @@ public GlyphData getGlyph(int gid) throws IOException if (offsets[gid] == offsets[gid + 1]) { // no outline - return null; + // PDFBOX-5135: can't return null, must return an empty glyph because + // sometimes this is used in a composite glyph. + glyph = new GlyphData(); + glyph.initEmptyData(); } - - // save - long currentPosition = data.getCurrentPosition(); + else + { + // save + long currentPosition = data.getCurrentPosition(); - data.seek(getOffset() + offsets[gid]); + data.seek(getOffset() + offsets[gid]); - GlyphData glyph = getGlyphData(gid); + glyph = getGlyphData(gid); - // restore - data.seek(currentPosition); + // restore + data.seek(currentPosition); + } if (glyphs != null && glyphs[gid] == null && cached < MAX_CACHED_GLYPHS) { diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java index 42c8b277d14..17bc917f8fb 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java @@ -71,7 +71,7 @@ public class HeaderTable extends TTFTable * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { version = data.read32Fixed(); fontRevision = data.read32Fixed(); diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/HorizontalHeaderTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/HorizontalHeaderTable.java index 7aac4ed5302..d9d3e1805b4 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/HorizontalHeaderTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/HorizontalHeaderTable.java @@ -60,7 +60,8 @@ public class HorizontalHeaderTable extends TTFTable * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + @Override + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { version = data.read32Fixed(); ascender = data.readSignedShort(); diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/HorizontalMetricsTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/HorizontalMetricsTable.java index fabf70632ab..5026aab1ed4 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/HorizontalMetricsTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/HorizontalMetricsTable.java @@ -47,9 +47,13 @@ public class HorizontalMetricsTable extends TTFTable * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { HorizontalHeaderTable hHeader = ttf.getHorizontalHeader(); + if (hHeader == null) + { + throw new IOException("Could not get hmtx table"); + } numHMetrics = hHeader.getNumberOfHMetrics(); int numGlyphs = ttf.getNumberOfGlyphs(); @@ -63,17 +67,20 @@ public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException bytesRead += 4; } - if (bytesRead < getLength()) + int numberNonHorizontal = numGlyphs - numHMetrics; + + // handle bad fonts with too many hmetrics + if (numberNonHorizontal < 0) { - int numberNonHorizontal = numGlyphs - numHMetrics; + numberNonHorizontal = numGlyphs; + } - // handle bad fonts with too many hmetrics - if (numberNonHorizontal < 0) - { - numberNonHorizontal = numGlyphs; - } + // make sure that table is never null and correct size, even with bad fonts that have no + // "leftSideBearing" table although they should + nonHorizontalLeftSideBearing = new short[numberNonHorizontal]; - nonHorizontalLeftSideBearing = new short[ numberNonHorizontal ]; + if (bytesRead < getLength()) + { for( int i=0; i> shift; } - private abstract static class PairData + private interface PairData { - public abstract void read(TTFDataStream data) throws IOException; + void read(TTFDataStream data) throws IOException; - public abstract int getKerning(int l, int r); + int getKerning(int l, int r); } - private static class PairData0Format0 extends PairData implements Comparator + private static class PairData0Format0 implements Comparator, PairData { private int searchRange; private int[][] pairs; @@ -283,17 +280,11 @@ public void read(TTFDataStream data) throws IOException public int getKerning(int l, int r) { int[] key = new int[] { l, r, 0 }; - int index; - index = Arrays.binarySearch(pairs, 0, searchRange, key, this); + int index = Arrays.binarySearch(pairs, key, this); if (index >= 0) { return pairs[index][2]; } - index = Arrays.binarySearch(pairs, searchRange, pairs.length, key, this); - if (index >= 0) - { - return pairs[searchRange + index][2]; - } return 0; } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/KerningTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/KerningTable.java index 5664e2ad7ec..5070fff3e73 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/KerningTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/KerningTable.java @@ -51,7 +51,7 @@ public class KerningTable extends TTFTable * @throws IOException If there is an error reading the data. */ @Override - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { int version = data.readUnsignedShort(); if (version != 0) @@ -95,7 +95,7 @@ public KerningSubtable getHorizontalKerningSubtable() } /** - * Obtain first subtable that supports horizontal kerning with specificed cross stream. + * Obtain first subtable that supports horizontal kerning with specified cross stream. * * @param cross true if requesting cross stream horizontal kerning * @return first matching subtable or null if none found diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/MaximumProfileTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/MaximumProfileTable.java index 7a8e976ee16..abd902edc77 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/MaximumProfileTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/MaximumProfileTable.java @@ -269,7 +269,8 @@ public void setVersion(float versionValue) * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + @Override + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { version = data.read32Fixed(); numGlyphs = data.readUnsignedShort(); diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/MemoryTTFDataStream.java b/fontbox/src/main/java/org/apache/fontbox/ttf/MemoryTTFDataStream.java index 45ffdebf569..6dacbf201c2 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/MemoryTTFDataStream.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/MemoryTTFDataStream.java @@ -44,7 +44,7 @@ class MemoryTTFDataStream extends TTFDataStream { ByteArrayOutputStream output = new ByteArrayOutputStream( is.available() ); byte[] buffer = new byte[1024]; - int amountRead = 0; + int amountRead; while( (amountRead = is.read( buffer ) ) != -1 ) { output.write( buffer, 0, amountRead ); @@ -53,10 +53,7 @@ class MemoryTTFDataStream extends TTFDataStream } finally { - if( is != null ) - { - is.close(); - } + is.close(); } } @@ -65,6 +62,7 @@ class MemoryTTFDataStream extends TTFDataStream * @return An unsigned byte. * @throws IOException If there is an error reading the data. */ + @Override public long readLong() throws IOException { return ((long)(readSignedInt()) << 32) + (readSignedInt() & 0xFFFFFFFFL); @@ -86,7 +84,7 @@ public int readSignedInt() throws IOException { throw new EOFException(); } - return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)); + return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + ch4); } /** @@ -94,6 +92,7 @@ public int readSignedInt() throws IOException * @return An unsigned byte. * @throws IOException If there is an error reading the data. */ + @Override public int read() throws IOException { if (currentPosition >= data.length) @@ -111,6 +110,7 @@ public int read() throws IOException * @return An unsigned short. * @throws IOException If there is an error reading the data. */ + @Override public int readUnsignedShort() throws IOException { int ch1 = this.read(); @@ -119,7 +119,7 @@ public int readUnsignedShort() throws IOException { throw new EOFException(); } - return (ch1 << 8) + (ch2 << 0); + return (ch1 << 8) + ch2; } /** @@ -128,6 +128,7 @@ public int readUnsignedShort() throws IOException * @return An signed short. * @throws IOException If there is an error reading the data. */ + @Override public short readSignedShort() throws IOException { int ch1 = this.read(); @@ -136,7 +137,7 @@ public short readSignedShort() throws IOException { throw new EOFException(); } - return (short)((ch1 << 8) + (ch2 << 0)); + return (short)((ch1 << 8) + ch2); } /** @@ -144,20 +145,25 @@ public short readSignedShort() throws IOException * * @throws IOException If there is an error closing the resources. */ + @Override public void close() throws IOException { - data = null; } /** * Seek into the datasource. - * + * * @param pos The position to seek to. - * @throws IOException If there is an error seeking to that position. + * @throws IOException If the seek position is negative or larger than MAXINT. */ + @Override public void seek(long pos) throws IOException { - currentPosition = (int)pos; + if (pos < 0 || pos > Integer.MAX_VALUE) + { + throw new IOException("Illegal seek position: " + pos); + } + currentPosition = (int) pos; } /** @@ -171,6 +177,7 @@ public void seek(long pos) throws IOException * * @throws IOException If there is an error reading from the stream. */ + @Override public int read(byte[] b, int off, int len) @@ -194,6 +201,7 @@ public int read(byte[] b, * @return The current position in the stream. * @throws IOException If an error occurs while reading the stream. */ + @Override public long getCurrentPosition() throws IOException { return currentPosition; @@ -202,8 +210,18 @@ public long getCurrentPosition() throws IOException /** * {@inheritDoc} */ + @Override public InputStream getOriginalData() throws IOException { return new ByteArrayInputStream( data ); } + + /** + * {@inheritDoc} + */ + @Override + public long getOriginalDataSize() + { + return data.length; + } } \ No newline at end of file diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/NameRecord.java b/fontbox/src/main/java/org/apache/fontbox/ttf/NameRecord.java index 56ae0d5a871..a421aea3ea1 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/NameRecord.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/NameRecord.java @@ -38,7 +38,12 @@ public class NameRecord public static final int ENCODING_UNICODE_2_0_FULL = 4; // Unicode encoding ids + /** + * @deprecated use {@link #LANGUAGE_UNICODE} instead. + */ + @Deprecated public static final int LANGUGAE_UNICODE = 0; + public static final int LANGUAGE_UNICODE = 0; // Windows encoding ids public static final int ENCODING_WINDOWS_SYMBOL = 0; @@ -46,13 +51,23 @@ public class NameRecord public static final int ENCODING_WINDOWS_UNICODE_UCS4 = 10; // Windows language ids + /** + * @deprecated use {@link #LANGUAGE_WINDOWS_EN_US} instead. + */ + @Deprecated public static final int LANGUGAE_WINDOWS_EN_US = 0x0409; + public static final int LANGUAGE_WINDOWS_EN_US = 0x0409; // Macintosh encoding ids public static final int ENCODING_MACINTOSH_ROMAN = 0; // Macintosh language ids + /** + * @deprecated use {@link #LANGUAGE_MACINTOSH_ENGLISH} instead. + */ + @Deprecated public static final int LANGUGAE_MACINTOSH_ENGLISH = 0; + public static final int LANGUAGE_MACINTOSH_ENGLISH = 0; // name ids public static final int NAME_COPYRIGHT = 0; @@ -165,7 +180,7 @@ public void setPlatformId(int platformIdValue) * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void initData( TrueTypeFont ttf, TTFDataStream data ) throws IOException + void initData( TrueTypeFont ttf, TTFDataStream data ) throws IOException { platformId = data.readUnsignedShort(); platformEncodingId = data.readUnsignedShort(); diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java index 265583c4771..557e38b0746 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java @@ -58,7 +58,7 @@ public class NamingTable extends TTFTable * @throws IOException If there is an error reading the data. */ @Override - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { int formatSelector = data.readUnsignedShort(); int numberOfNameRecords = data.readUnsignedShort(); @@ -94,18 +94,20 @@ else if (platform == NameRecord.PLATFORM_UNICODE) } else if (platform == NameRecord.PLATFORM_ISO) { - if (encoding == 0) + switch (encoding) { - charset = Charsets.US_ASCII; - } - else if (encoding == 1) - { - //not sure is this is correct?? - charset = Charsets.ISO_10646; - } - else if (encoding == 2) - { - charset = Charsets.ISO_8859_1; + case 0: + charset = Charsets.US_ASCII; + break; + case 1: + //not sure is this is correct?? + charset = Charsets.ISO_10646; + break; + case 2: + charset = Charsets.ISO_8859_1; + break; + default: + break; } } String string = data.readString(nr.getStringLength(), charset); @@ -149,13 +151,13 @@ else if (encoding == 2) psName = getName(NameRecord.NAME_POSTSCRIPT_NAME, NameRecord.PLATFORM_MACINTOSH, NameRecord.ENCODING_MACINTOSH_ROMAN, - NameRecord.LANGUGAE_MACINTOSH_ENGLISH); + NameRecord.LANGUAGE_MACINTOSH_ENGLISH); if (psName == null) { psName = getName(NameRecord.NAME_POSTSCRIPT_NAME, NameRecord.PLATFORM_WINDOWS, NameRecord.ENCODING_WINDOWS_UNICODE_BMP, - NameRecord.LANGUGAE_WINDOWS_EN_US); + NameRecord.LANGUAGE_WINDOWS_EN_US); } if (psName != null) { @@ -177,7 +179,7 @@ private String getEnglishName(int nameId) getName(nameId, NameRecord.PLATFORM_UNICODE, i, - NameRecord.LANGUGAE_UNICODE); + NameRecord.LANGUAGE_UNICODE); if (nameUni != null) { return nameUni; @@ -189,7 +191,7 @@ private String getEnglishName(int nameId) getName(nameId, NameRecord.PLATFORM_WINDOWS, NameRecord.ENCODING_WINDOWS_UNICODE_BMP, - NameRecord.LANGUGAE_WINDOWS_EN_US); + NameRecord.LANGUAGE_WINDOWS_EN_US); if (nameWin != null) { return nameWin; @@ -200,7 +202,7 @@ private String getEnglishName(int nameId) getName(nameId, NameRecord.PLATFORM_MACINTOSH, NameRecord.ENCODING_MACINTOSH_ROMAN, - NameRecord.LANGUGAE_MACINTOSH_ENGLISH); + NameRecord.LANGUAGE_MACINTOSH_ENGLISH); if (nameMac != null) { return nameMac; diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/OS2WindowsMetricsTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/OS2WindowsMetricsTable.java index 70899c5962b..9caf869a74f 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/OS2WindowsMetricsTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/OS2WindowsMetricsTable.java @@ -16,17 +16,27 @@ */ package org.apache.fontbox.ttf; +import java.io.EOFException; import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + /** - * A table in a true type font. - * + * The OS/2 and Windows Metrics Table in a TrueType font, see + * here. + * * @author Ben Litchfield - * + * */ public class OS2WindowsMetricsTable extends TTFTable { + /** + * Log instance. + */ + private static final Log LOG = LogFactory.getLog(OS2WindowsMetricsTable.class); + /** * Weight class constant. */ @@ -152,19 +162,19 @@ public class OS2WindowsMetricsTable extends TTFTable *

For Restricted License embedding to take effect, it must be the only level of embedding * selected. */ - public static final short FSTYPE_RESTRICTED = 0x0001; + public static final short FSTYPE_RESTRICTED = 0x0002; /** - * Preview & Print embedding: the font may be embedded, and temporarily loaded on the + * Preview and Print embedding: the font may be embedded, and temporarily loaded on the * remote system. No edits can be applied to the document. */ public static final short FSTYPE_PREVIEW_AND_PRINT = 0x0004; /** * Editable embedding: the font may be embedded but must only be installed temporarily on other - * systems. Documents may be editied and changes saved. + * systems. Documents may be edited and changes saved. */ - public static final short FSTYPE_EDITIBLE = 0x0004; + public static final short FSTYPE_EDITIBLE = 0x0008; /** * No subsetting: the font must not be subsetted prior to embedding. @@ -177,6 +187,44 @@ public class OS2WindowsMetricsTable extends TTFTable */ public static final short FSTYPE_BITMAP_ONLY = 0x0200; + private int version; + private short averageCharWidth; + private int weightClass; + private int widthClass; + private short fsType; + private short subscriptXSize; + private short subscriptYSize; + private short subscriptXOffset; + private short subscriptYOffset; + private short superscriptXSize; + private short superscriptYSize; + private short superscriptXOffset; + private short superscriptYOffset; + private short strikeoutSize; + private short strikeoutPosition; + private int familyClass; + private byte[] panose = new byte[10]; + private long unicodeRange1; + private long unicodeRange2; + private long unicodeRange3; + private long unicodeRange4; + private String achVendId = "XXXX"; + private int fsSelection; + private int firstCharIndex; + private int lastCharIndex; + private int typoAscender; + private int typoDescender; + private int typoLineGap; + private int winAscent; + private int winDescent; + private long codePageRange1 = 0; + private long codePageRange2 = 0; + private int sxHeight; + private int sCapHeight; + private int usDefaultChar; + private int usBreakChar; + private int usMaxContext; + OS2WindowsMetricsTable(TrueTypeFont font) { super(font); @@ -734,44 +782,6 @@ public int getMaxContext() return usMaxContext; } - private int version; - private short averageCharWidth; - private int weightClass; - private int widthClass; - private short fsType; - private short subscriptXSize; - private short subscriptYSize; - private short subscriptXOffset; - private short subscriptYOffset; - private short superscriptXSize; - private short superscriptYSize; - private short superscriptXOffset; - private short superscriptYOffset; - private short strikeoutSize; - private short strikeoutPosition; - private int familyClass; - private byte[] panose = new byte[10]; - private long unicodeRange1; - private long unicodeRange2; - private long unicodeRange3; - private long unicodeRange4; - private String achVendId = "XXXX"; - private int fsSelection; - private int firstCharIndex; - private int lastCharIndex; - private int typoAscender; - private int typoDescender; - private int typoLineGap; - private int winAscent; - private int winDescent; - private long codePageRange1 = -1; - private long codePageRange2 = -1; - private int sxHeight; - private int sCapHeight; - private int usDefaultChar; - private int usBreakChar; - private int usMaxContext; - /** * A tag that identifies this table type. */ @@ -784,7 +794,8 @@ public int getMaxContext() * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + @Override + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { version = data.readUnsignedShort(); averageCharWidth = data.readSignedShort(); @@ -811,23 +822,52 @@ public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException fsSelection = data.readUnsignedShort(); firstCharIndex = data.readUnsignedShort(); lastCharIndex = data.readUnsignedShort(); - typoAscender = data.readSignedShort(); - typoDescender = data.readSignedShort(); - typoLineGap = data.readSignedShort(); - winAscent = data.readUnsignedShort(); - winDescent = data.readUnsignedShort(); + try + { + typoAscender = data.readSignedShort(); + typoDescender = data.readSignedShort(); + typoLineGap = data.readSignedShort(); + winAscent = data.readUnsignedShort(); + winDescent = data.readUnsignedShort(); + } + catch (EOFException ex) + { + LOG.debug("EOF, probably some legacy TrueType font"); + initialized = true; + return; + } if (version >= 1) { - codePageRange1 = data.readUnsignedInt(); - codePageRange2 = data.readUnsignedInt(); + try + { + codePageRange1 = data.readUnsignedInt(); + codePageRange2 = data.readUnsignedInt(); + } + catch (EOFException ex) + { + version = 0; + LOG.warn("Could not read all expected parts of version >= 1, setting version to 0", ex); + initialized = true; + return; + } } - if (version >= 1.2) + if (version >= 2) { - sxHeight = data.readSignedShort(); - sCapHeight = data.readSignedShort(); - usDefaultChar = data.readUnsignedShort(); - usBreakChar = data.readUnsignedShort(); - usMaxContext = data.readUnsignedShort(); + try + { + sxHeight = data.readSignedShort(); + sCapHeight = data.readSignedShort(); + usDefaultChar = data.readUnsignedShort(); + usBreakChar = data.readUnsignedShort(); + usMaxContext = data.readUnsignedShort(); + } + catch (EOFException ex) + { + version = 1; + LOG.warn("Could not read all expected parts of version >= 2, setting version to 1", ex); + initialized = true; + return; + } } initialized = true; } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/OTFParser.java b/fontbox/src/main/java/org/apache/fontbox/ttf/OTFParser.java index 093f4856641..53005e51102 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/OTFParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/OTFParser.java @@ -104,4 +104,10 @@ else if (tag.equals("CFF ")) return super.readTable(font, tag); } } + + @Override + protected boolean allowCFF() + { + return true; + } } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/OpenTypeFont.java b/fontbox/src/main/java/org/apache/fontbox/ttf/OpenTypeFont.java index bfb3e4820b1..39c6ee82797 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/OpenTypeFont.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/OpenTypeFont.java @@ -40,7 +40,7 @@ public class OpenTypeFont extends TrueTypeFont @Override void setVersion(float versionValue) { - isPostScript = versionValue != 1.0; + isPostScript = Float.floatToIntBits(versionValue) == 0x469EA8A9; // OTTO super.setVersion(versionValue); } @@ -49,22 +49,17 @@ void setVersion(float versionValue) * * @return The "CFF" table. */ - public synchronized CFFTable getCFF() throws IOException + public CFFTable getCFF() throws IOException { if (!isPostScript) { throw new UnsupportedOperationException("TTF fonts do not have a CFF table"); } - CFFTable cff = (CFFTable)tables.get(CFFTable.TAG); - if (cff != null && !cff.getInitialized()) - { - readTable(cff); - } - return cff; + return (CFFTable) getTable(CFFTable.TAG); } @Override - public synchronized GlyphTable getGlyph() throws IOException + public GlyphTable getGlyph() throws IOException { if (isPostScript) { diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/OpenTypeScript.java b/fontbox/src/main/java/org/apache/fontbox/ttf/OpenTypeScript.java new file mode 100644 index 00000000000..8eb781ad908 --- /dev/null +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/OpenTypeScript.java @@ -0,0 +1,378 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.fontbox.ttf; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.StringTokenizer; +import java.util.TreeMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * A class for mapping Unicode codepoints to OpenType script tags + * + * @author Aaron Madlon-Kay + * + * @see Microsoft Typography: + * Script Tags + * @see Unicode Script Property + */ +public final class OpenTypeScript +{ + private static final Log LOG = LogFactory.getLog(OpenTypeScript.class); + + public static final String INHERITED = "Inherited"; + public static final String UNKNOWN = "Unknown"; + public static final String TAG_DEFAULT = "DFLT"; + + /** + * A map associating Unicode scripts with one or more OpenType script tags. Script tags are not necessarily the same + * as Unicode scripts. A single Unicode script may correspond to multiple tags, especially when there has been a + * revision to the latter (e.g. Bengali -> [bng2, beng]). When there are multiple tags, they are ordered from newest + * to oldest. + * + * @see Microsoft Typography: Script Tags + */ + private static final Map UNICODE_SCRIPT_TO_OPENTYPE_TAG_MAP; + + static + { + Object[][] table = + { + {"Adlam", new String[] { "adlm" }}, + {"Ahom", new String[] { "ahom" }}, + {"Anatolian_Hieroglyphs", new String[] { "hluw" }}, + {"Arabic", new String[] { "arab" }}, + {"Armenian", new String[] { "armn" }}, + {"Avestan", new String[] { "avst" }}, + {"Balinese", new String[] { "bali" }}, + {"Bamum", new String[] { "bamu" }}, + {"Bassa_Vah", new String[] { "bass" }}, + {"Batak", new String[] { "batk" }}, + {"Bengali", new String[] { "bng2", "beng" }}, + {"Bhaiksuki", new String[] { "bhks" }}, + {"Bopomofo", new String[] { "bopo" }}, + {"Brahmi", new String[] { "brah" }}, + {"Braille", new String[] { "brai" }}, + {"Buginese", new String[] { "bugi" }}, + {"Buhid", new String[] { "buhd" }}, + // Byzantine Music: byzm + {"Canadian_Aboriginal", new String[] { "cans" }}, + {"Carian", new String[] { "cari" }}, + {"Caucasian_Albanian", new String[] { "aghb" }}, + {"Chakma", new String[] { "cakm" }}, + {"Cham", new String[] { "cham" }}, + {"Cherokee", new String[] { "cher" }}, + {"Common", new String[] { TAG_DEFAULT }}, // "Default" in OpenType + {"Coptic", new String[] { "copt" }}, + {"Cuneiform", new String[] { "xsux" }}, // "Sumero-Akkadian Cuneiform" in OpenType + {"Cypriot", new String[] { "cprt" }}, + {"Cyrillic", new String[] { "cyrl" }}, + {"Deseret", new String[] { "dsrt" }}, + {"Devanagari", new String[] { "dev2", "deva" }}, + {"Duployan", new String[] { "dupl" }}, + {"Egyptian_Hieroglyphs", new String[] { "egyp" }}, + {"Elbasan", new String[] { "elba" }}, + {"Ethiopic", new String[] { "ethi" }}, + {"Georgian", new String[] { "geor" }}, + {"Glagolitic", new String[] { "glag" }}, + {"Gothic", new String[] { "goth" }}, + {"Grantha", new String[] { "gran" }}, + {"Greek", new String[] { "grek" }}, + {"Gujarati", new String[] { "gjr2", "gujr" }}, + {"Gurmukhi", new String[] { "gur2", "guru" }}, + {"Han", new String[] { "hani" }}, // "CJK Ideographic" in OpenType + {"Hangul", new String[] { "hang" }}, + // Hangul Jamo: jamo + {"Hanunoo", new String[] { "hano" }}, + {"Hatran", new String[] { "hatr" }}, + {"Hebrew", new String[] { "hebr" }}, + {"Hiragana", new String[] { "kana" }}, + {"Imperial_Aramaic", new String[] { "armi" }}, + {INHERITED, new String[] { INHERITED }}, + {"Inscriptional_Pahlavi", new String[] { "phli" }}, + {"Inscriptional_Parthian", new String[] { "prti" }}, + {"Javanese", new String[] { "java" }}, + {"Kaithi", new String[] { "kthi" }}, + {"Kannada", new String[] { "knd2", "knda" }}, + {"Katakana", new String[] { "kana" }}, + {"Kayah_Li", new String[] { "kali" }}, + {"Kharoshthi", new String[] { "khar" }}, + {"Khmer", new String[] { "khmr" }}, + {"Khojki", new String[] { "khoj" }}, + {"Khudawadi", new String[] { "sind" }}, + {"Lao", new String[] { "lao " }}, + {"Latin", new String[] { "latn" }}, + {"Lepcha", new String[] { "lepc" }}, + {"Limbu", new String[] { "limb" }}, + {"Linear_A", new String[] { "lina" }}, + {"Linear_B", new String[] { "linb" }}, + {"Lisu", new String[] { "lisu" }}, + {"Lycian", new String[] { "lyci" }}, + {"Lydian", new String[] { "lydi" }}, + {"Mahajani", new String[] { "mahj" }}, + {"Malayalam", new String[] { "mlm2", "mlym" }}, + {"Mandaic", new String[] { "mand" }}, + {"Manichaean", new String[] { "mani" }}, + {"Marchen", new String[] { "marc" }}, + // Mathematical Alphanumeric Symbols: math + {"Meetei_Mayek", new String[] { "mtei" }}, + {"Mende_Kikakui", new String[] { "mend" }}, + {"Meroitic_Cursive", new String[] { "merc" }}, + {"Meroitic_Hieroglyphs", new String[] { "mero" }}, + {"Miao", new String[] { "plrd" }}, + {"Modi", new String[] { "modi" }}, + {"Mongolian", new String[] { "mong" }}, + {"Mro", new String[] { "mroo" }}, + {"Multani", new String[] { "mult" }}, + // Musical Symbols: musc + {"Myanmar", new String[] { "mym2", "mymr" }}, + {"Nabataean", new String[] { "nbat" }}, + {"Newa", new String[] { "newa" }}, + {"New_Tai_Lue", new String[] { "talu" }}, + {"Nko", new String[] { "nko " }}, + {"Ogham", new String[] { "ogam" }}, + {"Ol_Chiki", new String[] { "olck" }}, + {"Old_Italic", new String[] { "ital" }}, + {"Old_Hungarian", new String[] { "hung" }}, + {"Old_North_Arabian", new String[] { "narb" }}, + {"Old_Permic", new String[] { "perm" }}, + {"Old_Persian", new String[] { "xpeo" }}, + {"Old_South_Arabian", new String[] { "sarb" }}, + {"Old_Turkic", new String[] { "orkh" }}, + {"Oriya", new String[] { "ory2", "orya" }}, // "Odia (formerly Oriya)" in OpenType + {"Osage", new String[] { "osge" }}, + {"Osmanya", new String[] { "osma" }}, + {"Pahawh_Hmong", new String[] { "hmng" }}, + {"Palmyrene", new String[] { "palm" }}, + {"Pau_Cin_Hau", new String[] { "pauc" }}, + {"Phags_Pa", new String[] { "phag" }}, + {"Phoenician", new String[] { "phnx" }}, + {"Psalter_Pahlavi", new String[] { "phlp" }}, + {"Rejang", new String[] { "rjng" }}, + {"Runic", new String[] { "runr" }}, + {"Samaritan", new String[] { "samr" }}, + {"Saurashtra", new String[] { "saur" }}, + {"Sharada", new String[] { "shrd" }}, + {"Shavian", new String[] { "shaw" }}, + {"Siddham", new String[] { "sidd" }}, + {"SignWriting", new String[] { "sgnw" }}, + {"Sinhala", new String[] { "sinh" }}, + {"Sora_Sompeng", new String[] { "sora" }}, + {"Sundanese", new String[] { "sund" }}, + {"Syloti_Nagri", new String[] { "sylo" }}, + {"Syriac", new String[] { "syrc" }}, + {"Tagalog", new String[] { "tglg" }}, + {"Tagbanwa", new String[] { "tagb" }}, + {"Tai_Le", new String[] { "tale" }}, + {"Tai_Tham", new String[] { "lana" }}, + {"Tai_Viet", new String[] { "tavt" }}, + {"Takri", new String[] { "takr" }}, + {"Tamil", new String[] { "tml2", "taml" }}, + {"Tangut", new String[] { "tang" }}, + {"Telugu", new String[] { "tel2", "telu" }}, + {"Thaana", new String[] { "thaa" }}, + {"Thai", new String[] { "thai" }}, + {"Tibetan", new String[] { "tibt" }}, + {"Tifinagh", new String[] { "tfng" }}, + {"Tirhuta", new String[] { "tirh" }}, + {"Ugaritic", new String[] { "ugar" }}, + {UNKNOWN, new String[] { TAG_DEFAULT }}, + {"Vai", new String[] { "vai " }}, + {"Warang_Citi", new String[] { "wara" }}, + {"Yi", new String[] { "yi " }} + }; + UNICODE_SCRIPT_TO_OPENTYPE_TAG_MAP = new HashMap(table.length); + for (Object[] array : table) + { + UNICODE_SCRIPT_TO_OPENTYPE_TAG_MAP.put((String) array[0], (String[]) array[1]); + } + } + + private static int[] unicodeRangeStarts; + private static String[] unicodeRangeScripts; + + static + { + String path = "/org/apache/fontbox/unicode/Scripts.txt"; + InputStream input = null; + try + { + input = new BufferedInputStream(OpenTypeScript.class.getResourceAsStream(path)); + parseScriptsFile(input); + } + catch (IOException e) + { + LOG.warn("Could not parse Scripts.txt, mirroring char map will be empty: " + + e.getMessage()); + } + finally + { + if (input != null) + { + try + { + input.close(); + } + catch (IOException ex) + { + LOG.warn("Could not close Scripts.txt"); + } + } + } + } + + private OpenTypeScript() + { + } + + private static void parseScriptsFile(InputStream inputStream) throws IOException + { + Map unicodeRanges = new TreeMap(new Comparator() + { + @Override + public int compare(int[] o1, int[] o2) + { + return o1[0] < o2[0] ? -1 : o1[0] == o2[0] ? 0 : 1; + }; + }); + LineNumberReader rd = new LineNumberReader(new InputStreamReader(inputStream)); + int[] lastRange = { Integer.MIN_VALUE, Integer.MIN_VALUE }; + String lastScript = null; + do + { + String s = rd.readLine(); + if (s == null) + { + break; + } + + // ignore comments + int comment = s.indexOf('#'); + if (comment != -1) + { + s = s.substring(0, comment); + } + + if (s.length() < 2) + { + continue; + } + + StringTokenizer st = new StringTokenizer(s, ";"); + int nFields = st.countTokens(); + if (nFields < 2) + { + continue; + } + String characters = st.nextToken().trim(); + String script = st.nextToken().trim(); + int[] range = new int[2]; + int rangeDelim = characters.indexOf(".."); + if (rangeDelim == -1) + { + range[0] = range[1] = Integer.parseInt(characters, 16); + } + else + { + range[0] = Integer.parseInt(characters.substring(0, rangeDelim), 16); + range[1] = Integer.parseInt(characters.substring(rangeDelim + 2), 16); + } + if (range[0] == lastRange[1] + 1 && script.equals(lastScript)) + { + // Combine with previous range + lastRange[1] = range[1]; + } + else + { + unicodeRanges.put(range, script); + lastRange = range; + lastScript = script; + } + } + while (true); + rd.close(); + + unicodeRangeStarts = new int[unicodeRanges.size()]; + unicodeRangeScripts = new String[unicodeRanges.size()]; + int i = 0; + for (Entry e : unicodeRanges.entrySet()) + { + unicodeRangeStarts[i] = e.getKey()[0]; + unicodeRangeScripts[i] = e.getValue(); + i++; + } + } + + /** + * Obtain the Unicode script associated with the given Unicode codepoint. + * + * @param codePoint + * @return A Unicode script string, or {@code #UNKNOWN} if unknown + */ + private static String getUnicodeScript(int codePoint) + { + ensureValidCodePoint(codePoint); + int type = Character.getType(codePoint); + if (type == Character.UNASSIGNED) + { + return UNKNOWN; + } + int scriptIndex = Arrays.binarySearch(unicodeRangeStarts, codePoint); + if (scriptIndex < 0) + { + scriptIndex = -scriptIndex - 2; + } + return unicodeRangeScripts[scriptIndex]; + } + + /** + * Obtain the OpenType script tags associated with the given Unicode codepoint. + * + * The result may contain the special value {@code #INHERITED}, which indicates that the + * codepoint's script can only be determined by its context. + * + * Unknown codepoints are mapped to {@code #TAG_DEFAULT}. + * + * @param codePoint + * @return An array of four-char script tags + */ + public static String[] getScriptTags(int codePoint) + { + ensureValidCodePoint(codePoint); + String unicode = getUnicodeScript(codePoint); + return UNICODE_SCRIPT_TO_OPENTYPE_TAG_MAP.get(unicode); + } + + private static void ensureValidCodePoint(int codePoint) + { + if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) + { + throw new IllegalArgumentException("Invalid codepoint: " + codePoint); + } + } +} diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/PostScriptTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/PostScriptTable.java index d39f4bd558c..7f6752e300d 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/PostScriptTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/PostScriptTable.java @@ -17,6 +17,8 @@ package org.apache.fontbox.ttf; import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; /** * A table in a true type font. @@ -25,6 +27,7 @@ */ public class PostScriptTable extends TTFTable { + private static final Log LOG = LogFactory.getLog(PostScriptTable.class); private float formatType; private float italicAngle; private short underlinePosition; @@ -54,7 +57,7 @@ public class PostScriptTable extends TTFTable * @throws IOException If there is an error reading the data. */ @Override - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { formatType = data.read32Fixed(); italicAngle = data.read32Fixed(); @@ -98,13 +101,27 @@ else if (formatType == 2.0f) for (int i = 0; i < maxIndex - WGL4Names.NUMBER_OF_MAC_GLYPHS + 1; i++) { int numberOfChars = data.readUnsignedByte(); - nameArray[i] = data.readString(numberOfChars); + try + { + nameArray[i] = data.readString(numberOfChars); + } + catch (IOException ex) + { + // PDFBOX-4851: EOF + LOG.warn("Error reading names in PostScript table at entry " + i + " of " + + nameArray.length + ", setting remaining entries to .notdef", ex); + for (int j = i; j < nameArray.length; ++j) + { + nameArray[j] = ".notdef"; + } + break; + } } } for (int i = 0; i < numGlyphs; i++) { int index = glyphNameIndex[i]; - if (index < WGL4Names.NUMBER_OF_MAC_GLYPHS) + if (index >= 0 && index < WGL4Names.NUMBER_OF_MAC_GLYPHS) { glyphNames[i] = WGL4Names.MAC_GLYPH_NAMES[index]; } @@ -131,17 +148,26 @@ else if (formatType == 2.5f) glyphNames = new String[glyphNameIndex.length]; for (int i = 0; i < glyphNames.length; i++) { - String name = WGL4Names.MAC_GLYPH_NAMES[glyphNameIndex[i]]; - if (name != null) + int index = glyphNameIndex[i]; + if (index >= 0 && index < WGL4Names.NUMBER_OF_MAC_GLYPHS) { - glyphNames[i] = name; + String name = WGL4Names.MAC_GLYPH_NAMES[index]; + if (name != null) + { + glyphNames[i] = name; + } + } + else + { + LOG.debug("incorrect glyph name index " + index + + ", valid numbers 0.." + WGL4Names.NUMBER_OF_MAC_GLYPHS); } } - } else if (formatType == 3.0f) { // no postscript information is provided. + LOG.debug("No PostScript name information is provided for the font " + font.getName()); } initialized = true; } @@ -311,7 +337,7 @@ public void setGlyphNames(String[] glyphNamesValue) */ public String getName(int gid) { - if (gid < 0 || glyphNames == null || gid > glyphNames.length) + if (gid < 0 || glyphNames == null || gid >= glyphNames.length) { return null; } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/RAFDataStream.java b/fontbox/src/main/java/org/apache/fontbox/ttf/RAFDataStream.java index aa7287ec1e2..0e54827ccdc 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/RAFDataStream.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/RAFDataStream.java @@ -65,10 +65,11 @@ class RAFDataStream extends TTFDataStream } /** - * Read an signed short. + * Read a signed short. * * @return An signed short. * @throws IOException If there is an error reading the data. + * @see RandomAccessFile#readShort() */ @Override public short readSignedShort() throws IOException @@ -95,14 +96,18 @@ public long getCurrentPosition() throws IOException @Override public void close() throws IOException { - raf.close(); - raf = null; + if (raf != null) + { + raf.close(); + raf = null; + } } /** * Read an unsigned byte. * @return An unsigned byte. * @throws IOException If there is an error reading the data. + * @see RandomAccessFile#read() */ @Override public int read() throws IOException @@ -115,6 +120,7 @@ public int read() throws IOException * * @return An unsigned short. * @throws IOException If there is an error reading the data. + * @see RandomAccessFile#readUnsignedShort() */ @Override public int readUnsignedShort() throws IOException @@ -123,9 +129,11 @@ public int readUnsignedShort() throws IOException } /** - * Read an unsigned byte. - * @return An unsigned byte. + * Read a signed 64-bit integer. + * + * @return eight bytes interpreted as a long. * @throws IOException If there is an error reading the data. + * @see RandomAccessFile#readLong() */ @Override public long readLong() throws IOException @@ -170,4 +178,13 @@ public InputStream getOriginalData() throws IOException { return new FileInputStream( ttfFile ); } + + /** + * {@inheritDoc} + */ + @Override + public long getOriginalDataSize() + { + return ttfFile.length(); + } } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/SubstitutingCmapLookup.java b/fontbox/src/main/java/org/apache/fontbox/ttf/SubstitutingCmapLookup.java new file mode 100644 index 00000000000..a053cf10ea4 --- /dev/null +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/SubstitutingCmapLookup.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.fontbox.ttf; + +import java.util.List; + +/** + * A cmap lookup that performs substitution via the 'GSUB' table. + * + * @author Aaron Madlon-Kay + */ +public class SubstitutingCmapLookup implements CmapLookup +{ + private final CmapSubtable cmap; + private final GlyphSubstitutionTable gsub; + private final List enabledFeatures; + + public SubstitutingCmapLookup(CmapSubtable cmap, GlyphSubstitutionTable gsub, + List enabledFeatures) + { + this.cmap = cmap; + this.gsub = gsub; + this.enabledFeatures = enabledFeatures; + } + + @Override + public int getGlyphId(int characterCode) + { + int gid = cmap.getGlyphId(characterCode); + String[] scriptTags = OpenTypeScript.getScriptTags(characterCode); + return gsub.getSubstitution(gid, scriptTags, enabledFeatures); + } + + @Override + public List getCharCodes(int gid) + { + return cmap.getCharCodes(gsub.getUnsubstitution(gid)); + } +} diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java index ad3ce848f8e..96abcfb0b7f 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java @@ -88,4 +88,10 @@ public InputStream getOriginalData() throws IOException { return stream.getOriginalData(); } + + @Override + public long getOriginalDataSize() + { + return stream.getOriginalDataSize(); + } } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java index 8f4934b3d82..e2603fa6824 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java @@ -22,7 +22,6 @@ import java.io.InputStream; import java.nio.charset.Charset; import java.util.Calendar; -import java.util.GregorianCalendar; import java.util.TimeZone; import org.apache.fontbox.util.Charsets; @@ -116,7 +115,7 @@ public String readString(int length, Charset charset) throws IOException public int readSignedByte() throws IOException { int signedByte = read(); - return signedByte < 127 ? signedByte : signedByte - 256; + return signedByte <= 127 ? signedByte : signedByte - 256; } /** @@ -138,7 +137,7 @@ public int readUnsignedByte() throws IOException /** * Read an unsigned integer. * - * @return An unsiged integer. + * @return An unsigned integer. * @throws IOException If there is an error reading the data. */ public long readUnsignedInt() throws IOException @@ -151,7 +150,7 @@ public long readUnsignedInt() throws IOException { throw new EOFException(); } - return (byte1 << 24) + (byte2 << 16) + (byte3 << 8) + (byte4 << 0); + return (byte1 << 24) + (byte2 << 16) + (byte3 << 8) + byte4; } /** @@ -213,7 +212,7 @@ public int[] readUnsignedShortArray(int length) throws IOException public Calendar readInternationalDate() throws IOException { long secondsSince1904 = readLong(); - Calendar cal = GregorianCalendar.getInstance(TimeZone.getTimeZone("UTC")); + Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); cal.set(1904, 0, 1, 0, 0, 0); cal.set(Calendar.MILLISECOND, 0); long millisFor1904 = cal.getTimeInMillis(); @@ -231,14 +230,6 @@ public String readTag() throws IOException return new String(read(4), Charsets.US_ASCII); } - /** - * Close the underlying resources. - * - * @throws IOException If there is an error closing the resources. - */ - @Override - public abstract void close() throws IOException; - /** * Seek into the datasource. * @@ -304,4 +295,11 @@ public byte[] read(int numberOfBytes) throws IOException */ public abstract InputStream getOriginalData() throws IOException; + /** + * This will get the original data size that was used for this stream. + * + * @return The size of the original data. + * @throws IOException If there is an issue reading the data. + */ + public abstract long getOriginalDataSize(); } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java index 022f2f9792b..af1350f23f3 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java @@ -147,7 +147,7 @@ TrueTypeFont parse(TTFDataStream raf) throws IOException // parse tables if wanted if (!parseOnDemandOnly) { - parseTables(font, raf); + parseTables(font); } return font; @@ -162,10 +162,9 @@ TrueTypeFont newFont(TTFDataStream raf) * Parse all tables and check if all needed tables are present. * * @param font the TrueTypeFont instance holding the parsed data. - * @param raf the data stream of the to be parsed ttf font * @throws IOException If there is an error parsing the TrueType font. */ - private void parseTables(TrueTypeFont font, TTFDataStream raf) throws IOException + private void parseTables(TrueTypeFont font) throws IOException { for (TTFTable table : font.getTables()) { @@ -174,7 +173,9 @@ private void parseTables(TrueTypeFont font, TTFDataStream raf) throws IOExceptio font.readTable(table); } } - + + boolean isPostScript = allowCFF() && font.tables.containsKey(CFFTable.TAG); + HeaderTable head = font.getHeader(); if (head == null) { @@ -200,35 +201,44 @@ private void parseTables(TrueTypeFont font, TTFDataStream raf) throws IOExceptio throw new IOException("post is mandatory"); } - IndexToLocationTable loc = font.getIndexToLocation(); - if (loc == null) - { - throw new IOException("loca is mandatory"); - } - // check other mandatory tables - if (font.getGlyph() == null) + if (!isPostScript) { - throw new IOException("glyf is mandatory"); + IndexToLocationTable loc = font.getIndexToLocation(); + if (loc == null) + { + throw new IOException("loca is mandatory"); + } + + if (font.getGlyph() == null) + { + throw new IOException("glyf is mandatory"); + } } + if (font.getNaming() == null && !isEmbedded) { throw new IOException("name is mandatory"); } + if (font.getHorizontalMetrics() == null) { throw new IOException("hmtx is mandatory"); } - - // check others mandatory tables + if (!isEmbedded && font.getCmap() == null) { throw new IOException("cmap is mandatory"); } } + protected boolean allowCFF() + { + return false; + } + private TTFTable readTableDirectory(TrueTypeFont font, TTFDataStream raf) throws IOException { - TTFTable table = null; + TTFTable table; String tag = raf.readString(4); if (tag.equals(CmapTable.TAG)) { @@ -290,6 +300,10 @@ else if (tag.equals(VerticalOriginTable.TAG)) { table = new VerticalOriginTable(font); } + else if (tag.equals(GlyphSubstitutionTable.TAG)) + { + table = new GlyphSubstitutionTable(font); + } else { table = readTable(font, tag); @@ -299,8 +313,8 @@ else if (tag.equals(VerticalOriginTable.TAG)) table.setOffset(raf.readUnsignedInt()); table.setLength(raf.readUnsignedInt()); - // skip tables with zero length - if (table.getLength() == 0) + // skip tables with zero length (except glyf) + if (table.getLength() == 0 && !tag.equals(GlyphTable.TAG)) { return null; } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java index d11844fb2a5..6c5e01bc82e 100755 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFSubsetter.java @@ -24,17 +24,20 @@ import java.io.OutputStream; import java.nio.charset.Charset; import java.util.Calendar; -import java.util.GregorianCalendar; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; +import java.util.TimeZone; import java.util.TreeMap; import java.util.TreeSet; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; /** * Subsetter for TrueType (TTF) fonts. @@ -46,10 +49,12 @@ */ public final class TTFSubsetter { + private static final Log LOG = LogFactory.getLog(TTFSubsetter.class); + private static final byte[] PAD_BUF = new byte[] { 0, 0, 0 }; private final TrueTypeFont ttf; - private final CmapSubtable unicodeCmap; + private final CmapLookup unicodeCmap; private final SortedMap uniToGID; private final List keepTables; @@ -82,7 +87,7 @@ public TTFSubsetter(TrueTypeFont ttf, List tables) throws IOException glyphIds = new TreeSet(); // find the best Unicode cmap - this.unicodeCmap = ttf.getUnicodeCmap(); + this.unicodeCmap = ttf.getUnicodeCmapLookup(); // always copy GID 0 glyphIds.add(0); @@ -125,7 +130,7 @@ public void addAll(Set unicodeSet) } /** - * Returns the map of new -> old GIDs. + * Returns the map of new -> old GIDs. */ public Map getGIDMap() throws IOException { @@ -249,7 +254,15 @@ private byte[] buildHheaTable() throws IOException writeSInt16(out, h.getReserved4()); writeSInt16(out, h.getReserved5()); writeSInt16(out, h.getMetricDataFormat()); - writeUint16(out, glyphIds.subSet(0, h.getNumberOfHMetrics()).size()); + + // is there a GID >= numberOfHMetrics ? Then keep the last entry of original hmtx table, + // (add if it isn't in our set of GIDs), see also in buildHmtxTable() + int hmetrics = glyphIds.subSet(0, h.getNumberOfHMetrics()).size(); + if (glyphIds.last() >= h.getNumberOfHMetrics() && !glyphIds.contains(h.getNumberOfHMetrics()-1)) + { + ++hmetrics; + } + writeUint16(out, hmetrics); out.flush(); return bos.toByteArray(); @@ -259,7 +272,7 @@ private boolean shouldCopyNameRecord(NameRecord nr) { return nr.getPlatformId() == NameRecord.PLATFORM_WINDOWS && nr.getPlatformEncodingId() == NameRecord.ENCODING_WINDOWS_UNICODE_BMP - && nr.getLanguageId() == NameRecord.LANGUGAE_WINDOWS_EN_US + && nr.getLanguageId() == NameRecord.LANGUAGE_WINDOWS_EN_US && nr.getNameId() >= 0 && nr.getNameId() < 7; } @@ -388,7 +401,7 @@ private byte[] buildMaxpTable() throws IOException private byte[] buildOS2Table() throws IOException { OS2WindowsMetricsTable os2 = ttf.getOS2Windows(); - if (os2 == null || keepTables != null && !keepTables.contains("OS/2")) + if (os2 == null || uniToGID.isEmpty() || keepTables != null && !keepTables.contains("OS/2")) { return null; } @@ -425,12 +438,8 @@ private byte[] buildOS2Table() throws IOException out.write(os2.getAchVendId().getBytes("US-ASCII")); - Iterator> it = uniToGID.entrySet().iterator(); - it.next(); - Entry first = it.next(); - writeUint16(out, os2.getFsSelection()); - writeUint16(out, first.getKey()); + writeUint16(out, uniToGID.firstKey()); writeUint16(out, uniToGID.lastKey()); writeUint16(out, os2.getTypoAscender()); writeUint16(out, os2.getTypoDescender()); @@ -442,6 +451,7 @@ private byte[] buildOS2Table() throws IOException return bos.toByteArray(); } + // never returns null private byte[] buildLocaTable(long[] newOffsets) throws IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(); @@ -544,9 +554,11 @@ else if ((flags & 1 << 3) != 0) glyphIds.addAll(glyphIdsToAdd); } hasNested = glyphIdsToAdd != null; - } while (hasNested); + } + while (hasNested); } + // never returns null private byte[] buildGlyfTable(long[] newOffsets) throws IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(); @@ -678,7 +690,7 @@ private int getNewGlyphId(Integer oldGid) private byte[] buildCmapTable() throws IOException { - if (ttf.getCmap() == null || keepTables != null && !keepTables.contains("cmap")) + if (ttf.getCmap() == null || uniToGID.isEmpty() || keepTables != null && !keepTables.contains("cmap")) { return null; } @@ -693,18 +705,18 @@ private byte[] buildCmapTable() throws IOException // encoding record writeUint16(out, CmapTable.PLATFORM_WINDOWS); // platformID writeUint16(out, CmapTable.ENCODING_WIN_UNICODE_BMP); // platformSpecificID - writeUint32(out, 4 * 2 + 4); // offset + writeUint32(out, 12); // offset 4 * 2 + 4 // build Format 4 subtable (Unicode BMP) Iterator> it = uniToGID.entrySet().iterator(); - it.next(); Entry lastChar = it.next(); Entry prevChar = lastChar; int lastGid = getNewGlyphId(lastChar.getValue()); - int[] startCode = new int[uniToGID.size()]; - int[] endCode = new int[uniToGID.size()]; - int[] idDelta = new int[uniToGID.size()]; + // +1 because .notdef is missing in uniToGID + int[] startCode = new int[uniToGID.size()+1]; + int[] endCode = new int[startCode.length]; + int[] idDelta = new int[startCode.length]; int segCount = 0; while(it.hasNext()) { @@ -756,7 +768,7 @@ else if (!lastChar.getKey().equals(prevChar.getKey())) segCount++; // write format 4 subtable - int searchRange = 2 * (int)Math.pow(2, Math.floor(log2(segCount))); + int searchRange = 2 * (int)Math.pow(2, log2(segCount)); writeUint16(out, 4); // format writeUint16(out, 8 * 2 + segCount * 4*2); // length writeUint16(out, 0); // language @@ -821,7 +833,7 @@ private byte[] buildPostTable() throws IOException writeUint16(out, glyphIds.size()); // glyphNameIndex[numGlyphs] - Map names = new TreeMap(); + Map names = new LinkedHashMap(); for (int gid : glyphIds) { String name = post.getName(gid); @@ -862,41 +874,48 @@ private byte[] buildHmtxTable() throws IOException HorizontalHeaderTable h = ttf.getHorizontalHeader(); HorizontalMetricsTable hm = ttf.getHorizontalMetrics(); - byte [] buf = new byte[4]; InputStream is = ttf.getOriginalData(); + + // more info: https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6hmtx.html + int lastgid = h.getNumberOfHMetrics() - 1; + // true if lastgid is not in the set: we'll need its width (but not its left side bearing) later + boolean needLastGidWidth = false; + if (glyphIds.last() > lastgid && !glyphIds.contains(lastgid)) + { + needLastGidWidth = true; + } + try { is.skip(hm.getOffset()); - long lastOff = 0; + long lastOffset = 0; for (Integer glyphId : glyphIds) { // offset in original file - long off; - if (glyphId < h.getNumberOfHMetrics()) + long offset; + if (glyphId <= lastgid) { - off = glyphId * 4; + // copy width and lsb + offset = glyphId * 4l; + lastOffset = copyBytes(is, bos, offset, lastOffset, 4); } - else + else { - off = h.getNumberOfHMetrics() * 4 + (glyphId - h.getNumberOfHMetrics()) * 2; - } - // skip over from last original offset - if (off != lastOff) - { - long nskip = off-lastOff; - if (nskip != is.skip(nskip)) + if (needLastGidWidth) { - throw new EOFException("Unexpected EOF exception parsing glyphId of hmtx table."); + // one time only: copy width from lastgid, whose width applies + // to all later glyphs + needLastGidWidth = false; + offset = lastgid * 4l; + lastOffset = copyBytes(is, bos, offset, lastOffset, 2); + + // then go on with lsb from actual glyph (lsb are individual even in monotype fonts) } + + // copy lsb only, as we are beyond numOfHMetrics + offset = h.getNumberOfHMetrics() * 4l + (glyphId - h.getNumberOfHMetrics()) * 2l; + lastOffset = copyBytes(is, bos, offset, lastOffset, 2); } - // read left side bearings only, if we are beyond numOfHMetrics - int n = glyphId < h.getNumberOfHMetrics() ? 4 : 2; - if (n != is.read(buf, 0, n)) - { - throw new EOFException("Unexpected EOF exception parsing glyphId of hmtx table."); - } - bos.write(buf, 0, n); - lastOff = off + n; } return bos.toByteArray(); @@ -907,10 +926,28 @@ private byte[] buildHmtxTable() throws IOException } } + private long copyBytes(InputStream is, OutputStream os, long newOffset, long lastOffset, int count) + throws IOException + { + // skip over from last original offset + long nskip = newOffset - lastOffset; + if (nskip != is.skip(nskip)) + { + throw new EOFException("Unexpected EOF exception parsing glyphId of hmtx table."); + } + byte[] buf = new byte[count]; + if (count != is.read(buf, 0, count)) + { + throw new EOFException("Unexpected EOF exception parsing glyphId of hmtx table."); + } + os.write(buf, 0, count); + return newOffset + count; + } + /** * Write the subfont to the given output stream. * - * @param os the stream used for writing + * @param os the stream used for writing. It will be closed by this method. * @throws IOException if something went wrong. * @throws IllegalStateException if the subset is empty. */ @@ -918,7 +955,7 @@ public void writeToStream(OutputStream os) throws IOException { if (glyphIds.isEmpty() || uniToGID.isEmpty()) { - throw new IllegalStateException("subset is empty"); + LOG.info("font subset is empty"); } addCompoundReferences(); @@ -950,17 +987,11 @@ public void writeToStream(OutputStream os) throws IOException { tables.put("cmap", cmap); } - if (glyf != null) - { - tables.put("glyf", glyf); - } + tables.put("glyf", glyf); tables.put("head", head); tables.put("hhea", hhea); tables.put("hmtx", hmtx); - if (loca != null) - { - tables.put("loca", loca); - } + tables.put("loca", loca); tables.put("maxp", maxp); if (name != null) { @@ -1040,7 +1071,9 @@ private void writeUint8(DataOutputStream out, int i) throws IOException private void writeLongDateTime(DataOutputStream out, Calendar calendar) throws IOException { // inverse operation of TTFDataStream.readInternationalDate() - GregorianCalendar cal = new GregorianCalendar( 1904, 0, 1 ); + Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + cal.set(1904, 0, 1, 0, 0, 0); + cal.set(Calendar.MILLISECOND, 0); long millisFor1904 = cal.getTimeInMillis(); long secondsSince1904 = (calendar.getTimeInMillis() - millisFor1904) / 1000L; out.writeLong(secondsSince1904); diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java index 6fd4415cc30..9d3ae6dfce0 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java @@ -33,7 +33,7 @@ public class TTFTable /** * Indicates if the table is initialized or not. */ - protected boolean initialized; + protected volatile boolean initialized; /** * The font which contains this table. diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java index 01349a6e17f..b04bca38897 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java @@ -20,8 +20,10 @@ import java.io.Closeable; import java.io.IOException; import java.io.InputStream; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -41,7 +43,8 @@ public class TrueTypeFont implements FontBoxFont, Closeable protected Map tables = new HashMap(); private final TTFDataStream data; private Map postScriptNames; - + private final List enabledGsubFeatures = new ArrayList(); + /** * Constructor. Clients should use the TTFParser to create a new TrueTypeFont object. * @@ -58,6 +61,14 @@ public void close() throws IOException data.close(); } + @Override + protected void finalize() throws Throwable + { + super.finalize(); + // PDFBOX-4963: risk of memory leaks due to SoftReference in FontCache + close(); + } + /** * @return Returns the version. */ @@ -106,7 +117,7 @@ public Map getTableMap() } /** - * Returns the war bytes of the given table. + * Returns the raw bytes of the given table. * @param table the table to read. * @throws IOException if there was an error accessing the table. */ @@ -123,237 +134,195 @@ public synchronized byte[] getTableBytes(TTFTable table) throws IOException data.seek(currentPosition); return bytes; } - + /** - * This will get the naming table for the true type font. + * This will get the table for the given tag. * - * @return The naming table. + * @param tag the name of the table to be returned + * @return The table with the given tag. * @throws IOException if there was an error reading the table. */ - public synchronized NamingTable getNaming() throws IOException + protected synchronized TTFTable getTable(String tag) throws IOException { - NamingTable naming = (NamingTable)tables.get( NamingTable.TAG ); - if (naming != null && !naming.getInitialized()) + TTFTable ttfTable = tables.get(tag); + if (ttfTable != null && !ttfTable.getInitialized()) { - readTable(naming); + readTable(ttfTable); } - return naming; + return ttfTable; + } + + /** + * This will get the naming table for the true type font. + * + * @return The naming table or null if it doesn't exist. + * @throws IOException if there was an error reading the table. + */ + public NamingTable getNaming() throws IOException + { + return (NamingTable) getTable(NamingTable.TAG); } /** * Get the postscript table for this TTF. * - * @return The postscript table. + * @return The postscript table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized PostScriptTable getPostScript() throws IOException + public PostScriptTable getPostScript() throws IOException { - PostScriptTable postscript = (PostScriptTable)tables.get( PostScriptTable.TAG ); - if (postscript != null && !postscript.getInitialized()) - { - readTable(postscript); - } - return postscript; + return (PostScriptTable) getTable(PostScriptTable.TAG); } /** * Get the OS/2 table for this TTF. * - * @return The OS/2 table. + * @return The OS/2 table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized OS2WindowsMetricsTable getOS2Windows() throws IOException + public OS2WindowsMetricsTable getOS2Windows() throws IOException { - OS2WindowsMetricsTable os2WindowsMetrics = (OS2WindowsMetricsTable)tables.get( OS2WindowsMetricsTable.TAG ); - if (os2WindowsMetrics != null && !os2WindowsMetrics.getInitialized()) - { - readTable(os2WindowsMetrics); - } - return os2WindowsMetrics; + return (OS2WindowsMetricsTable) getTable(OS2WindowsMetricsTable.TAG); } - + /** * Get the maxp table for this TTF. * - * @return The maxp table. + * @return The maxp table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized MaximumProfileTable getMaximumProfile() throws IOException + public MaximumProfileTable getMaximumProfile() throws IOException { - MaximumProfileTable maximumProfile = (MaximumProfileTable)tables.get( MaximumProfileTable.TAG ); - if (maximumProfile != null && !maximumProfile.getInitialized()) - { - readTable(maximumProfile); - } - return maximumProfile; + return (MaximumProfileTable) getTable(MaximumProfileTable.TAG); } /** * Get the head table for this TTF. * - * @return The head table. + * @return The head table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized HeaderTable getHeader() throws IOException + public HeaderTable getHeader() throws IOException { - HeaderTable header = (HeaderTable)tables.get( HeaderTable.TAG ); - if (header != null && !header.getInitialized()) - { - readTable(header); - } - return header; + return (HeaderTable) getTable(HeaderTable.TAG); } /** * Get the hhea table for this TTF. * - * @return The hhea table. + * @return The hhea table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized HorizontalHeaderTable getHorizontalHeader() throws IOException + public HorizontalHeaderTable getHorizontalHeader() throws IOException { - HorizontalHeaderTable horizontalHeader = (HorizontalHeaderTable)tables.get( HorizontalHeaderTable.TAG ); - if (horizontalHeader != null && !horizontalHeader.getInitialized()) - { - readTable(horizontalHeader); - } - return horizontalHeader; + return (HorizontalHeaderTable) getTable(HorizontalHeaderTable.TAG); } /** * Get the hmtx table for this TTF. * - * @return The hmtx table. + * @return The hmtx table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized HorizontalMetricsTable getHorizontalMetrics() throws IOException + public HorizontalMetricsTable getHorizontalMetrics() throws IOException { - HorizontalMetricsTable horizontalMetrics = (HorizontalMetricsTable)tables.get( HorizontalMetricsTable.TAG ); - if (horizontalMetrics != null && !horizontalMetrics.getInitialized()) - { - readTable(horizontalMetrics); - } - return horizontalMetrics; + return (HorizontalMetricsTable) getTable(HorizontalMetricsTable.TAG); } /** * Get the loca table for this TTF. * - * @return The loca table. + * @return The loca table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized IndexToLocationTable getIndexToLocation() throws IOException + public IndexToLocationTable getIndexToLocation() throws IOException { - IndexToLocationTable indexToLocation = (IndexToLocationTable)tables.get( IndexToLocationTable.TAG ); - if (indexToLocation != null && !indexToLocation.getInitialized()) - { - readTable(indexToLocation); - } - return indexToLocation; + return (IndexToLocationTable) getTable(IndexToLocationTable.TAG); } /** * Get the glyf table for this TTF. * - * @return The glyf table. + * @return The glyf table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized GlyphTable getGlyph() throws IOException + public GlyphTable getGlyph() throws IOException { - GlyphTable glyph = (GlyphTable)tables.get( GlyphTable.TAG ); - if (glyph != null && !glyph.getInitialized()) - { - readTable(glyph); - } - return glyph; + return (GlyphTable) getTable(GlyphTable.TAG); } /** * Get the "cmap" table for this TTF. * - * @return The "cmap" table. - * @throws IOException if there was an error reading the table. - */ - public synchronized CmapTable getCmap() throws IOException + * @return The "cmap" table or null if it doesn't exist. + * @throws IOException if there was an error reading the table. + */ + public CmapTable getCmap() throws IOException { - CmapTable cmap = (CmapTable)tables.get( CmapTable.TAG ); - if (cmap != null && !cmap.getInitialized()) - { - readTable(cmap); - } - return cmap; + return (CmapTable) getTable(CmapTable.TAG); } /** * Get the vhea table for this TTF. * - * @return The vhea table. + * @return The vhea table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized VerticalHeaderTable getVerticalHeader() throws IOException + public VerticalHeaderTable getVerticalHeader() throws IOException { - VerticalHeaderTable verticalHeader = (VerticalHeaderTable)tables.get( VerticalHeaderTable.TAG ); - if (verticalHeader != null && !verticalHeader.getInitialized()) - { - readTable(verticalHeader); - } - return verticalHeader; + return (VerticalHeaderTable) getTable(VerticalHeaderTable.TAG); } /** * Get the vmtx table for this TTF. * - * @return The vmtx table. + * @return The vmtx table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized VerticalMetricsTable getVerticalMetrics() throws IOException + public VerticalMetricsTable getVerticalMetrics() throws IOException { - VerticalMetricsTable verticalMetrics = (VerticalMetricsTable)tables.get( VerticalMetricsTable.TAG ); - if (verticalMetrics != null && !verticalMetrics.getInitialized()) - { - readTable(verticalMetrics); - } - return verticalMetrics; + return (VerticalMetricsTable) getTable(VerticalMetricsTable.TAG); } /** * Get the VORG table for this TTF. * - * @return The VORG table. + * @return The VORG table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized VerticalOriginTable getVerticalOrigin() throws IOException + public VerticalOriginTable getVerticalOrigin() throws IOException { - VerticalOriginTable verticalOrigin = (VerticalOriginTable)tables.get( VerticalOriginTable.TAG ); - if (verticalOrigin != null && !verticalOrigin.getInitialized()) - { - readTable(verticalOrigin); - } - return verticalOrigin; + return (VerticalOriginTable) getTable(VerticalOriginTable.TAG); } /** * Get the "kern" table for this TTF. * - * @return The "kern" table. + * @return The "kern" table or null if it doesn't exist. * @throws IOException if there was an error reading the table. */ - public synchronized KerningTable getKerning() throws IOException + public KerningTable getKerning() throws IOException { - KerningTable kerning = (KerningTable)tables.get( KerningTable.TAG ); - if (kerning != null && !kerning.getInitialized()) - { - readTable(kerning); - } - return kerning; + return (KerningTable) getTable(KerningTable.TAG); } - + /** - * This permit to get the data of the True Type Font + * Get the "gsub" table for this TTF. + * + * @return The "gsub" table or null if it doesn't exist. + * @throws IOException if there was an error reading the table. + */ + public GlyphSubstitutionTable getGsub() throws IOException + { + return (GlyphSubstitutionTable) getTable(GlyphSubstitutionTable.TAG); + } + + /** + * Get the data of the TrueType Font * program representing the stream used to build this * object (normally from the TTFParser object). * - * @return COSStream True type font program stream + * @return COSStream TrueType font program stream * * @throws IOException If there is an error getting the font data. */ @@ -361,6 +330,17 @@ public InputStream getOriginalData() throws IOException { return data.getOriginalData(); } + + /** + * Get the data size of the TrueType Font program representing the stream used to build this + * object (normally from the TTFParser object). + * + * @return the size. + */ + public long getOriginalDataSize() + { + return data.getOriginalDataSize(); + } /** * Read the given table if necessary. Package-private, used by TTFParser only. @@ -371,16 +351,21 @@ public InputStream getOriginalData() throws IOException */ void readTable(TTFTable table) throws IOException { - // save current position - long currentPosition = data.getCurrentPosition(); - data.seek(table.getOffset()); - table.read(this, data); - // restore current position - data.seek(currentPosition); + // PDFBOX-4219: synchronize on data because it is accessed by several threads + // when PDFBox is accessing a standard 14 font for the first time + synchronized (data) + { + // save current position + long currentPosition = data.getCurrentPosition(); + data.seek(table.getOffset()); + table.read(this, data); + // restore current position + data.seek(currentPosition); + } } /** - * Returns the number of glyphs (MaximuProfile.numGlyphs). + * Returns the number of glyphs (MaximumProfile.numGlyphs). * * @return the number of glyphs * @throws IOException if there was an error reading the table. @@ -507,7 +492,9 @@ private synchronized void readPostScriptNames() throws IOException * by which this is accomplished are implementation-dependent." * * @throws IOException if the font could not be read + * @deprecated Use {@link #getUnicodeCmapLookup()} instead */ + @Deprecated public CmapSubtable getUnicodeCmap() throws IOException { return getUnicodeCmap(true); @@ -519,15 +506,59 @@ public CmapSubtable getUnicodeCmap() throws IOException * * @param isStrict False if we allow falling back to any cmap, even if it's not Unicode. * @throws IOException if the font could not be read, or there is no Unicode cmap + * @deprecated Use {@link #getUnicodeCmapLookup(boolean)} instead */ + @Deprecated public CmapSubtable getUnicodeCmap(boolean isStrict) throws IOException + { + return getUnicodeCmapImpl(isStrict); + } + + /** + * Returns the best Unicode from the font (the most general). The PDF spec says that "The means + * by which this is accomplished are implementation-dependent." + * + * The returned cmap will perform glyph substitution. + * + * @throws IOException if the font could not be read + */ + public CmapLookup getUnicodeCmapLookup() throws IOException + { + return getUnicodeCmapLookup(true); + } + + /** + * Returns the best Unicode from the font (the most general). The PDF spec says that "The means + * by which this is accomplished are implementation-dependent." + * + * The returned cmap will perform glyph substitution. + * + * @param isStrict False if we allow falling back to any cmap, even if it's not Unicode. + * @throws IOException if the font could not be read, or there is no Unicode cmap + */ + public CmapLookup getUnicodeCmapLookup(boolean isStrict) throws IOException + { + CmapSubtable cmap = getUnicodeCmapImpl(isStrict); + if (!enabledGsubFeatures.isEmpty()) + { + GlyphSubstitutionTable table = getGsub(); + if (table != null) + { + return new SubstitutingCmapLookup(cmap, (GlyphSubstitutionTable) table, + Collections.unmodifiableList(enabledGsubFeatures)); + } + } + return cmap; + } + + private CmapSubtable getUnicodeCmapImpl(boolean isStrict) throws IOException { CmapTable cmapTable = getCmap(); if (cmapTable == null) { if (isStrict) { - throw new IOException("The TrueType font does not contain a 'cmap' table"); + throw new IOException("The TrueType font " + getName() + " does not contain a 'cmap' table"); } else { @@ -538,6 +569,11 @@ public CmapSubtable getUnicodeCmap(boolean isStrict) throws IOException CmapSubtable cmap = cmapTable.getSubtable(CmapTable.PLATFORM_UNICODE, CmapTable.ENCODING_UNICODE_2_0_FULL); if (cmap == null) + { + cmap = cmapTable.getSubtable(CmapTable.PLATFORM_WINDOWS, + CmapTable.ENCODING_WIN_UNICODE_FULL); + } + if (cmap == null) { cmap = cmapTable.getSubtable(CmapTable.PLATFORM_UNICODE, CmapTable.ENCODING_UNICODE_2_0_BMP); @@ -560,7 +596,7 @@ public CmapSubtable getUnicodeCmap(boolean isStrict) throws IOException { throw new IOException("The TrueType font does not contain a Unicode cmap"); } - else + else if (cmapTable.getCmaps().length > 0) { // fallback to the first cmap (may not be Unicode, so may produce poor results) cmap = cmapTable.getCmaps()[0]; @@ -577,17 +613,20 @@ public int nameToGID(String name) throws IOException { // look up in 'post' table readPostScriptNames(); - Integer gid = postScriptNames.get(name); - if (gid != null && gid > 0 && gid < getMaximumProfile().getNumGlyphs()) + if (postScriptNames != null) { - return gid; + Integer gid = postScriptNames.get(name); + if (gid != null && gid > 0 && gid < getMaximumProfile().getNumGlyphs()) + { + return gid; + } } // look up in 'cmap' int uni = parseUniName(name); if (uni > -1) { - CmapSubtable cmap = getUnicodeCmap(false); + CmapLookup cmap = getUnicodeCmapLookup(false); return cmap.getGlyphId(uni); } @@ -597,7 +636,7 @@ public int nameToGID(String name) throws IOException /** * Parses a Unicode PostScript name in the format uniXXXX. */ - private int parseUniName(String name) throws IOException + private int parseUniName(String name) { if (name.startsWith("uni") && name.length() == 7) { @@ -649,7 +688,7 @@ public GeneralPath getPath(String name) throws IOException @Override public float getWidth(String name) throws IOException { - Integer gid = nameToGID(name); + int gid = nameToGID(name); return getAdvanceWidth(gid); } @@ -662,10 +701,11 @@ public boolean hasGlyph(String name) throws IOException @Override public BoundingBox getFontBBox() throws IOException { - short xMin = getHeader().getXMin(); - short xMax = getHeader().getXMax(); - short yMin = getHeader().getYMin(); - short yMax = getHeader().getYMax(); + HeaderTable headerTable = getHeader(); + short xMin = headerTable.getXMin(); + short xMax = headerTable.getXMax(); + short yMin = headerTable.getYMin(); + short yMax = headerTable.getYMax(); float scale = 1000f / getUnitsPerEm(); return new BoundingBox(xMin * scale, yMin * scale, xMax * scale, yMax * scale); } @@ -677,6 +717,36 @@ public List getFontMatrix() throws IOException return Arrays.asList(0.001f * scale, 0, 0, 0.001f * scale, 0, 0); } + /** + * Enable a particular glyph substitution feature. This feature might not be supported by the + * font, or might not be implemented in PDFBox yet. + * + * @param featureTag The GSUB feature to enable + */ + public void enableGsubFeature(String featureTag) + { + enabledGsubFeatures.add(featureTag); + } + + /** + * Disable a particular glyph substitution feature. + * + * @param featureTag The GSUB feature to disable + */ + public void disableGsubFeature(String featureTag) + { + enabledGsubFeatures.remove(featureTag); + } + + /** + * Enable glyph substitutions for vertical writing. + */ + public void enableVerticalSubstitutions() + { + enableGsubFeature("vrt2"); + enableGsubFeature("vert"); + } + @Override public String toString() { diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/VerticalHeaderTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/VerticalHeaderTable.java index 5d54beb64d7..7849940cbcf 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/VerticalHeaderTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/VerticalHeaderTable.java @@ -71,7 +71,7 @@ public class VerticalHeaderTable extends TTFTable * @throws IOException If there is an error reading the data. */ @Override - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { version = data.read32Fixed(); ascender = data.readSignedShort(); diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/VerticalMetricsTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/VerticalMetricsTable.java index ddd6fe0544e..3cb404d6793 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/VerticalMetricsTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/VerticalMetricsTable.java @@ -38,7 +38,7 @@ public class VerticalMetricsTable extends TTFTable private int[] advanceHeight; private short[] topSideBearing; - private short[] nonVerticalTopSideBearing; + private short[] additionalTopSideBearing; private int numVMetrics; VerticalMetricsTable(TrueTypeFont font) @@ -54,9 +54,13 @@ public class VerticalMetricsTable extends TTFTable * @throws IOException If there is an error reading the data. */ @Override - public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { VerticalHeaderTable vHeader = ttf.getVerticalHeader(); + if (vHeader == null) + { + throw new IOException("Could not get vhea table"); + } numVMetrics = vHeader.getNumberOfVMetrics(); int numGlyphs = ttf.getNumberOfGlyphs(); @@ -80,12 +84,12 @@ public void read(TrueTypeFont ttf, TTFDataStream data) throws IOException numberNonVertical = numGlyphs; } - nonVerticalTopSideBearing = new short[ numberNonVertical ]; + additionalTopSideBearing = new short[numberNonVertical]; for( int i=0; i - - + + diff --git a/fontbox/src/main/java/org/apache/fontbox/type1/Token.java b/fontbox/src/main/java/org/apache/fontbox/type1/Token.java index d37a40e978b..1f3cba2bbc2 100644 --- a/fontbox/src/main/java/org/apache/fontbox/type1/Token.java +++ b/fontbox/src/main/java/org/apache/fontbox/type1/Token.java @@ -30,11 +30,13 @@ class Token * All different types of tokens. * */ - static enum Kind + enum Kind { NONE, STRING, NAME, LITERAL, REAL, INTEGER, - START_ARRAY, END_ARRAY, START_PROC, - END_PROC, CHARSTRING + START_ARRAY, END_ARRAY, + START_PROC, END_PROC, + START_DICT, END_DICT, + CHARSTRING } // exposed statically for convenience @@ -48,6 +50,8 @@ static enum Kind static final Kind START_PROC = Kind.START_PROC; static final Kind END_PROC = Kind.END_PROC; static final Kind CHARSTRING = Kind.CHARSTRING; + static final Kind START_DICT = Kind.START_DICT; + static final Kind END_DICT = Kind.END_DICT; private String text; private byte[] data; diff --git a/fontbox/src/main/java/org/apache/fontbox/type1/Type1Font.java b/fontbox/src/main/java/org/apache/fontbox/type1/Type1Font.java index ce2c63cbe54..0acee644185 100644 --- a/fontbox/src/main/java/org/apache/fontbox/type1/Type1Font.java +++ b/fontbox/src/main/java/org/apache/fontbox/type1/Type1Font.java @@ -113,7 +113,8 @@ public static Type1Font createWithSegments(byte[] segment1, byte[] segment2) thr List familyBlues = new ArrayList(); List familyOtherBlues = new ArrayList(); float blueScale; - int blueShift, blueFuzz; + int blueShift; + int blueFuzz; List stdHW = new ArrayList(); List stdVW = new ArrayList(); List stemSnapH = new ArrayList(); @@ -130,7 +131,8 @@ public static Type1Font createWithSegments(byte[] segment1, byte[] segment2) thr new ConcurrentHashMap(); // raw data - private final byte[] segment1, segment2; + private final byte[] segment1; + private final byte[] segment2; /** * Constructs a new Type1Font, called by Type1Parser. diff --git a/fontbox/src/main/java/org/apache/fontbox/type1/Type1Lexer.java b/fontbox/src/main/java/org/apache/fontbox/type1/Type1Lexer.java index 3d4dad3093c..9024c3e13a7 100644 --- a/fontbox/src/main/java/org/apache/fontbox/type1/Type1Lexer.java +++ b/fontbox/src/main/java/org/apache/fontbox/type1/Type1Lexer.java @@ -34,7 +34,7 @@ * context-free, and the execution of the program can modify the * the behaviour of the lexer/parser. * - * Nevertheless, this class represents an attempt to artificially seperate + * Nevertheless, this class represents an attempt to artificially separate * the PostScript parsing process into separate lexing and parsing phases * in order to reduce the complexity of the parsing phase. * @@ -142,6 +142,34 @@ else if (c == '/') { return new Token(readRegular(), Token.LITERAL); } + else if (c == '<') + { + char c2 = getChar(); + if (c2 == c) + { + return new Token("<<", Token.START_DICT); + } + else + { + // code may have to be changed in something better, maybe new token type + buffer.position(buffer.position() - 1); + return new Token(c, Token.NAME); + } + } + else if (c == '>') + { + char c2 = getChar(); + if (c2 == c) + { + return new Token(">>", Token.END_DICT); + } + else + { + // code may have to be changed in something better, maybe new token type + buffer.position(buffer.position() - 1); + return new Token(c, Token.NAME); + } + } else if (Character.isWhitespace(c)) { skip = true; @@ -175,7 +203,7 @@ else if (c == 0) if (name.equals("RD") || name.equals("-|")) { // return the next CharString instead - if (prevToken.getKind() == Token.INTEGER) + if (prevToken != null && prevToken.getKind() == Token.INTEGER) { return readCharString(prevToken.intValue()); } @@ -191,7 +219,8 @@ else if (c == 0) } } } - } while (skip); + } + while (skip); return null; } @@ -241,7 +270,7 @@ else if (sb.length() == 0 || !hasDigit) buffer.reset(); return null; } - else + else if (c != 'e' && c != 'E') { // integer buffer.position(buffer.position() -1); @@ -254,7 +283,7 @@ else if (sb.length() == 0 || !hasDigit) sb.append(c); c = getChar(); } - else + else if (c != 'e' && c != 'E') { // failure buffer.reset(); @@ -269,7 +298,7 @@ else if (sb.length() == 0 || !hasDigit) } // optional E - if (c == 'E') + if (c == 'E' || c == 'e') { sb.append(c); c = getChar(); @@ -337,12 +366,11 @@ private String readRegular() sb.append(c); } } - String regular = sb.toString(); - if (regular.length() == 0) + if (sb.length() == 0) { return null; } - return regular; + return sb.toString(); } /** @@ -369,7 +397,7 @@ private String readComment() /** * Reads a (string). */ - private Token readString() + private Token readString() throws IOException { StringBuilder sb = new StringBuilder(); @@ -378,54 +406,59 @@ private Token readString() char c = getChar(); // string context - if (c == '(') + switch (c) { - openParens++; - sb.append('('); - } - else if (c == ')') - { - if (openParens == 0) - { - // end of string - return new Token(sb.toString(), Token.STRING); - } - else - { + case '(': + openParens++; + sb.append('('); + break; + case ')': + if (openParens == 0) + { + // end of string + return new Token(sb.toString(), Token.STRING); + } sb.append(')'); openParens--; - } - } - else if (c == '\\') - { - // escapes: \n \r \t \b \f \\ \( \) - char c1 = getChar(); - switch (c1) - { - case 'n': - case 'r': sb.append("\n"); break; - case 't': sb.append('\t'); break; - case 'b': sb.append('\b'); break; - case 'f': sb.append('\f'); break; - case '\\': sb.append('\\'); break; - case '(': sb.append('('); break; - case ')': sb.append(')'); break; - } - // octal \ddd - if (Character.isDigit(c1)) - { - String num = String.valueOf(new char[] { c1, getChar(), getChar() }); - Integer code = Integer.parseInt(num, 8); - sb.append((char)(int)code); - } - } - else if (c == '\r' || c == '\n') - { - sb.append("\n"); - } - else - { - sb.append(c); + break; + case '\\': + // escapes: \n \r \t \b \f \\ \( \) + char c1 = getChar(); + switch (c1) + { + case 'n': + case 'r': sb.append("\n"); break; + case 't': sb.append('\t'); break; + case 'b': sb.append('\b'); break; + case 'f': sb.append('\f'); break; + case '\\': sb.append('\\'); break; + case '(': sb.append('('); break; + case ')': sb.append(')'); break; + default: + break; + } + // octal \ddd + if (Character.isDigit(c1)) + { + String num = String.valueOf(new char[] { c1, getChar(), getChar() }); + try + { + int code = Integer.parseInt(num, 8); + sb.append((char) code); + } + catch (NumberFormatException ex) + { + throw new IOException(ex); + } + } + break; + case '\r': + case '\n': + sb.append("\n"); + break; + default: + sb.append(c); + break; } } return null; diff --git a/fontbox/src/main/java/org/apache/fontbox/type1/Type1Parser.java b/fontbox/src/main/java/org/apache/fontbox/type1/Type1Parser.java index 7e448060ad9..9f5e0b885b1 100644 --- a/fontbox/src/main/java/org/apache/fontbox/type1/Type1Parser.java +++ b/fontbox/src/main/java/org/apache/fontbox/type1/Type1Parser.java @@ -73,14 +73,14 @@ private void parseASCII(byte[] bytes) throws IOException { if (bytes.length == 0) { - throw new IllegalArgumentException("byte[] is empty"); + throw new IOException("ASCII segment of type 1 font is empty"); } // %!FontType1-1.0 // %!PS-AdobeFont-1.0 if (bytes.length < 2 || (bytes[0] != '%' && bytes[1] != '!')) { - throw new IOException("Invalid start of ASCII segment"); + throw new IOException("Invalid start of ASCII segment of type 1 font"); } lexer = new Type1Lexer(bytes); @@ -92,30 +92,37 @@ private void parseASCII(byte[] bytes) throws IOException read(Token.LITERAL); // font name read(Token.NAME, "known"); read(Token.START_PROC); - readProc(); + readProcVoid(); read(Token.START_PROC); - readProc(); + readProcVoid(); read(Token.NAME, "ifelse"); } // font dict int length = read(Token.INTEGER).intValue(); read(Token.NAME, "dict"); - readMaybe(Token.NAME, "dup"); // found in some TeX fonts + // found in some TeX fonts + readMaybe(Token.NAME, "dup"); + // if present, the "currentdict" is not required read(Token.NAME, "begin"); for (int i = 0; i < length; i++) { // premature end - if (lexer.peekToken().getKind() == Token.NAME && - lexer.peekToken().getText().equals("currentdict")) + Token token = lexer.peekToken(); + if (token == null) + { + break; + } + if (token.getKind() == Token.NAME && + (token.getText().equals("currentdict") || token.getText().equals("end"))) { break; } // key/value String key = read(Token.LITERAL).getText(); - if (key.equals("FontInfo")) + if (key.equals("FontInfo") || key.equals("Fontinfo")) { readFontInfo(readSimpleDict()); } @@ -133,7 +140,7 @@ else if (key.equals("Encoding")) } } - read(Token.NAME, "currentdict"); + readMaybe(Token.NAME, "currentdict"); read(Token.NAME, "end"); read(Token.NAME, "currentfile"); @@ -246,7 +253,8 @@ else if (token.getKind() == Token.INTEGER) } else { - throw new IOException("Expected INTEGER or REAL but got " + token.getKind()); + throw new IOException("Expected INTEGER or REAL but got " + token + + " at array position " + i); } } return numbers; @@ -316,12 +324,20 @@ private Map> readSimpleDict() throws IOException for (int i = 0; i < length; i++) { + if (lexer.peekToken() == null) + { + break; + } if (lexer.peekToken().getKind() == Token.NAME && !lexer.peekToken().getText().equals("end")) { read(Token.NAME); } // premature end + if (lexer.peekToken() == null) + { + break; + } if (lexer.peekToken().getKind() == Token.NAME && lexer.peekToken().getText().equals("end")) { @@ -360,6 +376,10 @@ private List readValue() throws IOException { List value = new ArrayList(); Token token = lexer.nextToken(); + if (lexer.peekToken() == null) + { + return value; + } value.add(token); if (token.getKind() == Token.START_ARRAY) @@ -367,6 +387,10 @@ private List readValue() throws IOException int openArray = 1; while (true) { + if (lexer.peekToken() == null) + { + return value; + } if (lexer.peekToken().getKind() == Token.START_ARRAY) { openArray++; @@ -389,7 +413,19 @@ else if (token.getKind() == Token.START_PROC) { value.addAll(readProc()); } + else if (token.getKind() == Token.START_DICT) + { + // skip "/GlyphNames2HostCode << >> def" + read(Token.END_DICT); + return value; + } + readPostScriptWrapper(value); + return value; + } + + private void readPostScriptWrapper(List value) throws IOException + { // postscript wrapper (not in the Type 1 spec) if (lexer.peekToken().getText().equals("systemdict")) { @@ -398,10 +434,10 @@ else if (token.getKind() == Token.START_PROC) read(Token.NAME, "known"); read(Token.START_PROC); - readProc(); + readProcVoid(); read(Token.START_PROC); - readProc(); + readProcVoid(); read(Token.NAME, "ifelse"); @@ -414,7 +450,6 @@ else if (token.getKind() == Token.START_PROC) read(Token.NAME, "if"); } - return value; } /** @@ -453,24 +488,71 @@ private List readProc() throws IOException return value; } + /** + * Reads a procedure but without returning anything. + */ + private void readProcVoid() throws IOException + { + int openProc = 1; + while (true) + { + if (lexer.peekToken().getKind() == Token.START_PROC) + { + openProc++; + } + + Token token = lexer.nextToken(); + + if (token.getKind() == Token.END_PROC) + { + openProc--; + if (openProc == 0) + { + break; + } + } + } + readMaybe(Token.NAME, "executeonly"); + } + /** * Parses the binary portion of a Type 1 font. */ private void parseBinary(byte[] bytes) throws IOException { - byte[] decrypted = decrypt(bytes, EEXEC_KEY, 4); + byte[] decrypted; + // Sometimes, fonts use the hex format, so this needs to be converted before decryption + if (isBinary(bytes)) + { + decrypted = decrypt(bytes, EEXEC_KEY, 4); + } + else + { + decrypted = decrypt(hexToBinary(bytes), EEXEC_KEY, 4); + } lexer = new Type1Lexer(decrypted); // find /Private dict - while (!lexer.peekToken().getText().equals("Private")) + Token peekToken = lexer.peekToken(); + while (peekToken != null && !peekToken.getText().equals("Private")) { + // for a more thorough validation, the presence of "begin" before Private + // determines how code before and following charstrings should look + // it is not currently checked anyway lexer.nextToken(); + peekToken = lexer.peekToken(); + } + if (peekToken == null) + { + throw new IOException("/Private token not found"); } // Private dict read(Token.LITERAL, "Private"); int length = read(Token.INTEGER).intValue(); read(Token.NAME, "dict"); + // actually could also be "/Private 10 dict def Private begin" + // instead of the "dup" readMaybe(Token.NAME, "dup"); read(Token.NAME, "begin"); @@ -479,7 +561,7 @@ private void parseBinary(byte[] bytes) throws IOException for (int i = 0; i < length; i++) { // premature end - if (lexer.peekToken().getKind() != Token.LITERAL) + if (lexer.peekToken() == null || lexer.peekToken().getKind() != Token.LITERAL) { break; } @@ -487,34 +569,44 @@ private void parseBinary(byte[] bytes) throws IOException // key/value String key = read(Token.LITERAL).getText(); - if (key.equals("Subrs")) + if ("Subrs".equals(key)) { readSubrs(lenIV); } - else if (key.equals("OtherSubrs")) + else if ("OtherSubrs".equals(key)) { readOtherSubrs(); } - else if (key.equals("lenIV")) + else if ("lenIV".equals(key)) { lenIV = readDictValue().get(0).intValue(); } - else if (key.equals("ND")) + else if ("ND".equals(key)) { read(Token.START_PROC); - read(Token.NAME, "noaccess"); + // the access restrictions are not mandatory + readMaybe(Token.NAME, "noaccess"); read(Token.NAME, "def"); read(Token.END_PROC); - read(Token.NAME, "executeonly"); + readMaybe(Token.NAME, "executeonly"); read(Token.NAME, "def"); } - else if (key.equals("NP")) + else if ("NP".equals(key)) { read(Token.START_PROC); - read(Token.NAME, "noaccess"); + readMaybe(Token.NAME, "noaccess"); read(Token.NAME); read(Token.END_PROC); - read(Token.NAME, "executeonly"); + readMaybe(Token.NAME, "executeonly"); + read(Token.NAME, "def"); + } + else if ("RD".equals(key)) + { + // /RD {string currentfile exch readstring pop} bind executeonly def + read(Token.START_PROC); + readProcVoid(); + readMaybe(Token.NAME, "bind"); + readMaybe(Token.NAME, "executeonly"); read(Token.NAME, "def"); } else @@ -613,6 +705,10 @@ private void readSubrs(int lenIV) throws IOException for (int i = 0; i < length; i++) { // premature end + if (lexer.peekToken() == null) + { + break; + } if (!(lexer.peekToken().getKind() == Token.NAME && lexer.peekToken().getText().equals("dup"))) { @@ -663,12 +759,18 @@ private void readCharStrings(int lenIV) throws IOException { int length = read(Token.INTEGER).intValue(); read(Token.NAME, "dict"); + // could actually be a sequence ending in "CharStrings begin", too + // instead of the "dup begin" read(Token.NAME, "dup"); read(Token.NAME, "begin"); for (int i = 0; i < length; i++) { // premature end + if (lexer.peekToken() == null) + { + break; + } if (lexer.peekToken().getKind() == Token.NAME && lexer.peekToken().getText().equals("end")) { @@ -686,6 +788,9 @@ private void readCharStrings(int lenIV) throws IOException // some fonts have one "end", others two read(Token.NAME, "end"); + // since checking ends here, this does not matter .... + // more thorough checking would see whether there is "begin" before /Private + // and expect a "def" somewhere, otherwise a "put" } /** @@ -743,7 +848,7 @@ else if (token.getText().equals("noaccess")) private Token read(Token.Kind kind) throws IOException { Token token = lexer.nextToken(); - if (token.getKind() != kind) + if (token == null || token.getKind() != kind) { throw new IOException("Found " + token + " but expected " + kind); } @@ -770,7 +875,7 @@ private void read(Token.Kind kind, String name) throws IOException private Token readMaybe(Token.Kind kind, String name) throws IOException { Token token = lexer.peekToken(); - if (token.getKind() == kind && token.getText().equals(name)) + if (token != null && token.getKind() == kind && token.getText().equals(name)) { return lexer.nextToken(); } @@ -813,4 +918,59 @@ private byte[] decrypt(byte[] cipherBytes, int r, int n) } return plainBytes; } + + // Check whether binary or hex encoded. See Adobe Type 1 Font Format specification + // 7.2 eexec encryption + private boolean isBinary(byte[] bytes) + { + if (bytes.length < 4) + { + return true; + } + // "At least one of the first 4 ciphertext bytes must not be one of + // the ASCII hexadecimal character codes (a code for 0-9, A-F, or a-f)." + for (int i = 0; i < 4; ++i) + { + byte by = bytes[i]; + if (by != 0x0a && by != 0x0d && by != 0x20 && by != '\t' && + Character.digit((char) by, 16) == -1) + { + return true; + } + } + return false; + } + + private byte[] hexToBinary(byte[] bytes) + { + // calculate needed length + int len = 0; + for (byte by : bytes) + { + if (Character.digit((char) by, 16) != -1) + { + ++len; + } + } + byte[] res = new byte[len / 2]; + int r = 0; + int prev = -1; + for (byte by : bytes) + { + int digit = Character.digit((char) by, 16); + if (digit != -1) + { + if (prev == -1) + { + prev = digit; + } + else + { + res[r++] = (byte) (prev * 16 + digit); + prev = -1; + } + } + } + return res; + } } diff --git a/fontbox/src/main/java/org/apache/fontbox/type1/package.html b/fontbox/src/main/java/org/apache/fontbox/type1/package.html index 9eadeba996b..1125d66353f 100644 --- a/fontbox/src/main/java/org/apache/fontbox/type1/package.html +++ b/fontbox/src/main/java/org/apache/fontbox/type1/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontFileFinder.java b/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontFileFinder.java index de92a571683..666851573f8 100644 --- a/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontFileFinder.java +++ b/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontFileFinder.java @@ -19,7 +19,11 @@ import java.io.File; import java.net.URI; +import java.util.ArrayList; import java.util.List; +import java.util.Locale; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; /** * Helps to autodetect/locate available operating system fonts. This class is based on a class provided by Apache FOP. @@ -27,6 +31,7 @@ */ public class FontFileFinder { + private static final Log LOG = LogFactory.getLog(FontFileFinder.class); private FontDirFinder fontDirFinder = null; @@ -44,16 +49,17 @@ private FontDirFinder determineDirFinder() { return new WindowsFontDirFinder(); } + else if (osName.startsWith("Mac")) + { + return new MacFontDirFinder(); + } + else if (osName.startsWith("OS/400")) + { + return new OS400FontDirFinder(); + } else { - if (osName.startsWith("Mac")) - { - return new MacFontDirFinder(); - } - else - { - return new UnixFontDirFinder(); - } + return new UnixFontDirFinder(); } } @@ -69,7 +75,7 @@ public List find() fontDirFinder = determineDirFinder(); } List fontDirs = fontDirFinder.find(); - List results = new java.util.ArrayList(); + List results = new ArrayList(); for (File dir : fontDirs) { walk(dir, results); @@ -85,7 +91,7 @@ public List find() */ public List find(String dir) { - List results = new java.util.ArrayList(); + List results = new ArrayList(); File directory = new File(dir); if (directory.isDirectory()) { @@ -95,7 +101,7 @@ public List find(String dir) } /** - * walk down the driectory tree and search for font files. + * walk down the directory tree and search for font files. * * @param directory the directory to start at * @param results names of all found font files @@ -103,31 +109,39 @@ public List find(String dir) private void walk(File directory, List results) { // search for font files recursively in the given directory - if (directory.isDirectory()) + if (!directory.isDirectory()) + { + return; + } + File[] filelist = directory.listFiles(); + if (filelist == null) { - File[] filelist = directory.listFiles(); - if (filelist != null) + return; + } + for (File file : filelist) + { + if (file.isDirectory()) { - int numOfFiles = filelist.length; - for (int i=0;i

+ * Try to avoid using this constructor because it creates a new scratch file in memory. Instead, + * use {@link COSDocument#createCOSStream() document.getDocument().createCOSStream()} which will + * use the existing scratch file (in memory or in temp file) of the document. + *

*/ public COSStream() { @@ -63,20 +68,22 @@ public COSStream() */ public COSStream(ScratchFile scratchFile) { - super(); + setInt(COSName.LENGTH, 0); this.scratchFile = scratchFile != null ? scratchFile : ScratchFile.getMainMemoryOnlyInstance(); } - + /** * Throws if the random access backing store has been closed. Helpful for catching cases where * a user tries to use a COSStream which has outlived its COSDocument. */ private void checkClosed() throws IOException { - if ((randomAccess != null) && randomAccess.isClosed()) + if (randomAccess != null && randomAccess.isClosed()) { throw new IOException("COSStream has been closed and cannot be read. " + "Perhaps its enclosing PDDocument has been closed?"); + // Tip for debugging: look at the destination file with an editor, you'll see an + // incomplete stream at the bottom. } } @@ -152,6 +159,11 @@ public InputStream getUnfilteredStream() throws IOException * @throws IOException If the stream could not be read. */ public COSInputStream createInputStream() throws IOException + { + return createInputStream(DecodeOptions.DEFAULT); + } + + public COSInputStream createInputStream(DecodeOptions options) throws IOException { checkClosed(); if (isWriting) @@ -160,7 +172,7 @@ public COSInputStream createInputStream() throws IOException } ensureRandomAccessExists(true); InputStream input = new RandomAccessInputStream(randomAccess); - return COSInputStream.create(getFilterList(), this, input, scratchFile); + return COSInputStream.create(getFilterList(), this, input, scratchFile, options); } /** @@ -206,12 +218,19 @@ public OutputStream createOutputStream(COSBase filters) throws IOException { setItem(COSName.FILTER, filters); } - randomAccess = scratchFile.createBuffer(); // discards old data - TODO: close existing buffer? + IOUtils.closeQuietly(randomAccess); + randomAccess = scratchFile.createBuffer(); OutputStream randomOut = new RandomAccessOutputStream(randomAccess); OutputStream cosOut = new COSOutputStream(getFilterList(), this, randomOut, scratchFile); isWriting = true; return new FilterOutputStream(cosOut) { + @Override + public void write(byte[] b, int off, int len) throws IOException + { + this.out.write(b, off, len); + } + @Override public void close() throws IOException { @@ -250,11 +269,18 @@ public OutputStream createRawOutputStream() throws IOException { throw new IllegalStateException("Cannot have more than one open stream writer."); } - randomAccess = scratchFile.createBuffer(); // discards old data - TODO: close existing buffer? + IOUtils.closeQuietly(randomAccess); + randomAccess = scratchFile.createBuffer(); OutputStream out = new RandomAccessOutputStream(randomAccess); isWriting = true; return new FilterOutputStream(out) { + @Override + public void write(byte[] b, int off, int len) throws IOException + { + this.out.write(b, off, len); + } + @Override public void close() throws IOException { @@ -270,21 +296,32 @@ public void close() throws IOException */ private List getFilterList() throws IOException { - List filterList = new ArrayList(); + List filterList; COSBase filters = getFilters(); if (filters instanceof COSName) { + filterList = new ArrayList(1); filterList.add(FilterFactory.INSTANCE.getFilter((COSName)filters)); } else if (filters instanceof COSArray) { COSArray filterArray = (COSArray)filters; + filterList = new ArrayList(filterArray.size()); for (int i = 0; i < filterArray.size(); i++) { - COSName filterName = (COSName)filterArray.get(i); - filterList.add(FilterFactory.INSTANCE.getFilter(filterName)); + COSBase base = filterArray.get(i); + if (!(base instanceof COSName)) + { + throw new IOException("Forbidden type in filter array: " + + (base == null ? "null" : base.getClass().getName())); + } + filterList.add(FilterFactory.INSTANCE.getFilter((COSName) base)); } } + else + { + filterList = new ArrayList(); + } return filterList; } @@ -307,9 +344,11 @@ public long getLength() /** * This will return the filters to apply to the byte stream. * The method will return - * - null if no filters are to be applied - * - a COSName if one filter is to be applied - * - a COSArray containing COSNames if multiple filters are to be applied + *
    + *
  • null if no filters are to be applied + *
  • a COSName if one filter is to be applied + *
  • a COSArray containing COSNames if multiple filters are to be applied + *
* * @return the COSBase object representing the filters */ @@ -333,6 +372,8 @@ public void setFilters(COSBase filters) throws IOException /** * Returns the contents of the stream as a text string. + * + * @return the string representation of this string. * * @deprecated Use {@link #toTextString()} instead. */ @@ -344,25 +385,28 @@ public String getString() /** * Returns the contents of the stream as a PDF "text string". + * + * @return the text string representation of this stream. */ public String toTextString() { - ByteArrayOutputStream out = new ByteArrayOutputStream(); InputStream input = null; + byte[] array; try { input = createInputStream(); - IOUtils.copy(input, out); + array = IOUtils.toByteArray(input); } catch (IOException e) { + LOG.debug("An exception occurred trying to get the content - returning empty string instead", e); return ""; } finally { IOUtils.closeQuietly(input); } - COSString string = new COSString(out.toByteArray()); + COSString string = new COSString(array); return string.getString(); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java b/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java index e51139dd7c8..208f7f95ae6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java @@ -47,55 +47,13 @@ public final class COSString extends COSBase { private static final Log LOG = LogFactory.getLog(COSString.class); + private byte[] bytes; + private boolean forceHexForm; + // legacy behaviour for old PDFParser public static final boolean FORCE_PARSING = Boolean.getBoolean("org.apache.pdfbox.forceParsing"); - /** - * This will create a COS string from a string of hex characters. - * - * @param hex A hex string. - * @return A cos string with the hex characters converted to their actual bytes. - * @throws IOException If there is an error with the hex string. - */ - public static COSString parseHex(String hex) throws IOException - { - ByteArrayOutputStream bytes = new ByteArrayOutputStream(); - StringBuilder hexBuffer = new StringBuilder(hex.trim()); - - // if odd number then the last hex digit is assumed to be 0 - if (hexBuffer.length() % 2 != 0) - { - hexBuffer.append('0'); - } - - int length = hexBuffer.length(); - for (int i = 0; i < length; i += 2) - { - try - { - bytes.write(Integer.parseInt(hexBuffer.substring(i, i + 2), 16)); - } - catch (NumberFormatException e) - { - if (FORCE_PARSING) - { - LOG.warn("Encountered a malformed hex string"); - bytes.write('?'); // todo: what does Acrobat do? Any example PDFs? - } - else - { - throw new IOException("Invalid hex string: " + hex, e); - } - } - } - - return new COSString(bytes.toByteArray()); - } - - private byte[] bytes; - private boolean forceHexForm; - /** * Creates a new PDF string from a byte array. This method can be used to read a string from * an existing PDF file, or to create a new byte string. @@ -134,20 +92,53 @@ public COSString(String text) { // UTF-16BE encoded string with a leading byte order marker byte[] data = text.getBytes(Charsets.UTF_16BE); - ByteArrayOutputStream out = new ByteArrayOutputStream(data.length + 2); - out.write(0xFE); // BOM - out.write(0xFF); // BOM + bytes = new byte[data.length + 2]; + bytes[0] = (byte) 0xFE; + bytes[1] = (byte) 0xFF; + System.arraycopy(data, 0, bytes, 2, data.length); + } + } + + /** + * This will create a COS string from a string of hex characters. + * + * @param hex A hex string. + * @return A cos string with the hex characters converted to their actual bytes. + * @throws IOException If there is an error with the hex string. + */ + public static COSString parseHex(String hex) throws IOException + { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + StringBuilder hexBuffer = new StringBuilder(hex.trim()); + + // if odd number then the last hex digit is assumed to be 0 + if (hexBuffer.length() % 2 != 0) + { + hexBuffer.append('0'); + } + + int length = hexBuffer.length(); + for (int i = 0; i < length; i += 2) + { try { - out.write(data); + bytes.write(Integer.parseInt(hexBuffer.substring(i, i + 2), 16)); } - catch (IOException e) + catch (NumberFormatException e) { - // should never happen - throw new RuntimeException(e); + if (FORCE_PARSING) + { + LOG.warn("Encountered a malformed hex string"); + bytes.write('?'); // todo: what does Acrobat do? Any example PDFs? + } + else + { + throw new IOException("Invalid hex string: " + hex, e); + } } - bytes = out.toByteArray(); } + + return new COSString(bytes.toByteArray()); } /** @@ -173,6 +164,8 @@ public void setForceHexForm(boolean value) /** * Returns true if the string is to be written in hex form. + * + * @return the hex representation of this string. */ public boolean getForceHexForm() { @@ -181,11 +174,13 @@ public boolean getForceHexForm() /** * Returns the content of this string as a PDF text string. + * + * @return the string representation of this string using the given encoding. */ public String getString() { // text string - BOM indicates Unicode - if (bytes.length > 2) + if (bytes.length >= 2) { if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF) { @@ -198,13 +193,14 @@ else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE) return new String(bytes, 2, bytes.length - 2, Charsets.UTF_16LE); } } - // otherwise use PDFDocEncoding return PDFDocEncoding.toString(bytes); } /** * Returns the content of this string as a PDF ASCII string. + * + * @return the ASCII representation of this string. */ public String getASCII() { @@ -214,6 +210,8 @@ public String getASCII() /** * Returns the raw bytes of the string. Best used with a PDF byte string. + * + * @return the raw bytes of this string. */ public byte[] getBytes() { @@ -227,12 +225,7 @@ public byte[] getBytes() */ public String toHexString() { - StringBuilder sb = new StringBuilder(bytes.length * 2); - for (byte b : bytes) - { - sb.append(Hex.getString(b)); - } - return sb.toString(); + return Hex.getString(bytes); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java b/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java index 4753d771bcf..1bd54d445ec 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java @@ -19,15 +19,17 @@ public interface COSUpdateInfo { /** - * Get the update state for the COSWriter. - * + * Get the update state for the COSWriter. This indicates whether an object is to be written + * when there is an incremental save. + * * @return the update state. */ boolean isNeedToBeUpdated(); /** - * Set the update state of the dictionary for the COSWriter. - * + * Set the update state of the dictionary for the COSWriter. This indicates whether an object is + * to be written when there is an incremental save. + * * @param flag the update state. */ void setNeedToBeUpdated(boolean flag); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java index 8c8bae51429..a33dbaa4996 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java @@ -40,6 +40,20 @@ final class PDFDocEncoding // initialize with basically ISO-8859-1 for (int i = 0; i < 256; i++) { + // skip entries not in Unicode column + if (i > 0x17 && i < 0x20) + { + continue; + } + if (i > 0x7E && i < 0xA1) + { + continue; + } + if (i == 0xAD) + { + continue; + } + set(i, (char)i); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java index c483eb0c864..512c7c0eb52 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java @@ -28,7 +28,7 @@ final class UnmodifiableCOSDictionary extends COSDictionary { /** - * @inheritDoc + * {@inheritDoc} */ UnmodifiableCOSDictionary(COSDictionary dict) { @@ -37,7 +37,7 @@ final class UnmodifiableCOSDictionary extends COSDictionary } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void clear() @@ -46,7 +46,7 @@ public void clear() } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setItem(COSName key, COSBase value) @@ -55,7 +55,7 @@ public void setItem(COSName key, COSBase value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setItem(COSName key, COSObjectable value) @@ -64,7 +64,7 @@ public void setItem(COSName key, COSObjectable value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setItem(String key, COSObjectable value) @@ -73,7 +73,7 @@ public void setItem(String key, COSObjectable value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setBoolean(String key, boolean value) @@ -82,7 +82,7 @@ public void setBoolean(String key, boolean value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setBoolean(COSName key, boolean value) @@ -91,7 +91,7 @@ public void setBoolean(COSName key, boolean value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setItem(String key, COSBase value) @@ -100,7 +100,7 @@ public void setItem(String key, COSBase value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setName(String key, String value) @@ -109,7 +109,7 @@ public void setName(String key, String value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setName(COSName key, String value) @@ -118,7 +118,7 @@ public void setName(COSName key, String value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setDate(String key, Calendar date) @@ -127,7 +127,7 @@ public void setDate(String key, Calendar date) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setDate(COSName key, Calendar date) @@ -136,7 +136,7 @@ public void setDate(COSName key, Calendar date) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setEmbeddedDate(String embedded, String key, Calendar date) @@ -145,7 +145,7 @@ public void setEmbeddedDate(String embedded, String key, Calendar date) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setEmbeddedDate(String embedded, COSName key, Calendar date) @@ -154,7 +154,7 @@ public void setEmbeddedDate(String embedded, COSName key, Calendar date) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setString(String key, String value) @@ -163,7 +163,7 @@ public void setString(String key, String value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setString(COSName key, String value) @@ -172,7 +172,7 @@ public void setString(COSName key, String value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setEmbeddedString(String embedded, String key, String value) @@ -181,7 +181,7 @@ public void setEmbeddedString(String embedded, String key, String value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setEmbeddedString(String embedded, COSName key, String value) @@ -190,7 +190,7 @@ public void setEmbeddedString(String embedded, COSName key, String value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setInt(String key, int value) @@ -199,7 +199,7 @@ public void setInt(String key, int value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setInt(COSName key, int value) @@ -208,7 +208,7 @@ public void setInt(COSName key, int value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setLong(String key, long value) @@ -217,7 +217,7 @@ public void setLong(String key, long value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setLong(COSName key, long value) @@ -226,7 +226,7 @@ public void setLong(COSName key, long value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setEmbeddedInt(String embeddedDictionary, String key, int value) @@ -235,7 +235,7 @@ public void setEmbeddedInt(String embeddedDictionary, String key, int value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setEmbeddedInt(String embeddedDictionary, COSName key, int value) @@ -244,7 +244,7 @@ public void setEmbeddedInt(String embeddedDictionary, COSName key, int value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setFloat(String key, float value) @@ -253,7 +253,7 @@ public void setFloat(String key, float value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void setFloat(COSName key, float value) @@ -262,7 +262,7 @@ public void setFloat(COSName key, float value) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void removeItem(COSName key) @@ -271,7 +271,7 @@ public void removeItem(COSName key) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void addAll(COSDictionary dic) @@ -280,11 +280,30 @@ public void addAll(COSDictionary dic) } /** - * @inheritDoc + * {@inheritDoc} */ @Override public void mergeInto(COSDictionary dic) { throw new UnsupportedOperationException(); } + + /** + * {@inheritDoc} + */ + @Override + public void setFlag(COSName field, int bitFlag, boolean value) + { + throw new UnsupportedOperationException(); + } + + /** + * {@inheritDoc} + */ + @Override + public void setNeedToBeUpdated(boolean flag) + { + throw new UnsupportedOperationException(); + } + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/cos/package.html b/pdfbox/src/main/java/org/apache/pdfbox/cos/package.html index 5c4fd8946f4..3415f4088fa 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/cos/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/cos/package.html @@ -15,13 +15,13 @@ ! limitations under the License. !--> - - + + These are the low level objects that make up a PDF document. -

+

See the PDF Reference 1.4. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCII85Filter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCII85Filter.java index 93c2f888cd6..4f523b2f251 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCII85Filter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCII85Filter.java @@ -37,12 +37,7 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, try { is = new ASCII85InputStream(encoded); - byte[] buffer = new byte[1024]; - int amountRead; - while((amountRead = is.read(buffer, 0, 1024))!= -1) - { - decoded.write(buffer, 0, amountRead); - } + IOUtils.copy(is, decoded); decoded.flush(); } finally @@ -57,12 +52,7 @@ protected void encode(InputStream input, OutputStream encoded, COSDictionary par throws IOException { ASCII85OutputStream os = new ASCII85OutputStream(encoded); - byte[] buffer = new byte[1024]; - int amountRead; - while((amountRead = input.read(buffer, 0, 1024))!= -1) - { - os.write(buffer, 0, amountRead); - } + IOUtils.copy(input, os); os.close(); encoded.flush(); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCII85InputStream.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCII85InputStream.java index 73e55d37299..7d398aca953 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCII85InputStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCII85InputStream.java @@ -257,7 +257,7 @@ public int available() * @param readlimit ignored. */ @Override - public void mark(int readlimit) + public synchronized void mark(int readlimit) { } @@ -267,7 +267,7 @@ public void mark(int readlimit) * @throws IOException telling that this is an unsupported action. */ @Override - public void reset() throws IOException + public synchronized void reset() throws IOException { throw new IOException("Reset is not supported"); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java index c9c52f57596..6faedb3556a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java @@ -45,7 +45,22 @@ final class ASCIIHexFilter extends Filter /* 70 */ 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 80 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 90 */ -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, - /* 100 */ 13, 14, 15 + /* 100 */ 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + /* 110 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 120 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 130 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 140 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 150 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 160 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 170 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 180 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 190 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 200 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 210 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 220 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 230 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 240 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 250 */ -1, -1, -1, -1, -1, -1 }; @Override @@ -116,7 +131,7 @@ public void encode(InputStream input, OutputStream encoded, COSDictionary parame int byteRead; while ((byteRead = input.read()) != -1) { - encoded.write(Hex.getBytes((byte)byteRead)); + Hex.writeHexByte((byte)byteRead, encoded); } encoded.flush(); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java new file mode 100644 index 00000000000..5b69430d800 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java @@ -0,0 +1,841 @@ +/* + * Copyright (c) 2012, Harald Kuhr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.apache.pdfbox.filter; + + +import java.io.EOFException; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +/** + * CCITT Modified Huffman RLE, Group 3 (T4) and Group 4 (T6) fax compression. + * + * @author Harald Kuhr + * @author Oliver Schmidtmer + * @author last modified by $Author: haraldk$ + * @version $Id: CCITTFaxDecoderStream.java,v 1.0 23.05.12 15:55 haraldk Exp$ + * + * Taken from commit 24c6682236e5a02151359486aa4075ddc5ab1534 of 18.08.2018 from twelvemonkeys/imageio/plugins/tiff/CCITTFaxDecoderStream.java + * + * Initial changes for PDFBox, discussed in PDFBOX-3338: + * - removed Validate() usages + * - catch VALUE_EOL in decode1D() + */ +final class CCITTFaxDecoderStream extends FilterInputStream { + // See TIFF 6.0 Specification, Section 10: "Modified Huffman Compression", page 43. + + private final int columns; + private final byte[] decodedRow; + + private final boolean optionG32D; + // Leading zeros for aligning EOL + private final boolean optionG3Fill; + private final boolean optionUncompressed; + private final boolean optionByteAligned; + + // Need to take fill order into account (?) (use flip table?) + private final int fillOrder; + private final int type; + + private int decodedLength; + private int decodedPos; + + private int[] changesReferenceRow; + private int[] changesCurrentRow; + private int changesReferenceRowCount; + private int changesCurrentRowCount; + + private int lastChangingElement = 0; + + /** + * Creates a CCITTFaxDecoderStream. + * This constructor may be used for CCITT streams embedded in PDF files, + * which use EncodedByteAlign. + * + * @param stream the compressed CCITT stream. + * @param columns the number of columns in the stream. + * @param type the type of stream, must be one of {@code COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE}, + * {@code COMPRESSION_CCITT_T4} or {@code COMPRESSION_CCITT_T6}. + * @param fillOrder fillOrder, must be {@code FILL_LEFT_TO_RIGHT} or + * {@code FILL_RIGHT_TO_LEFT}. + * @param options CCITT T.4 or T.6 options. + * @param byteAligned enable byte alignment used in PDF files (EncodedByteAlign). + */ + public CCITTFaxDecoderStream(final InputStream stream, final int columns, final int type, final int fillOrder, + final long options, final boolean byteAligned) { + super(stream); + + this.columns = columns; + this.type = type; + this.fillOrder = fillOrder; + + // We know this is only used for b/w (1 bit) + decodedRow = new byte[(columns + 7) / 8]; + changesReferenceRow = new int[columns + 2]; + changesCurrentRow = new int[columns + 2]; + + switch (type) { + case TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE: + optionByteAligned = byteAligned; + optionG32D = false; + optionG3Fill = false; + optionUncompressed = false; + break; + case TIFFExtension.COMPRESSION_CCITT_T4: + optionByteAligned = byteAligned; + optionG32D = (options & TIFFExtension.GROUP3OPT_2DENCODING) != 0; + optionG3Fill = (options & TIFFExtension.GROUP3OPT_FILLBITS) != 0; + optionUncompressed = (options & TIFFExtension.GROUP3OPT_UNCOMPRESSED) != 0; + break; + case TIFFExtension.COMPRESSION_CCITT_T6: + optionByteAligned = byteAligned; + optionG32D = false; + optionG3Fill = false; + optionUncompressed = (options & TIFFExtension.GROUP4OPT_UNCOMPRESSED) != 0; + break; + default: + throw new IllegalArgumentException("Illegal parameter: " + type); + } + + } + + /** + * Creates a CCITTFaxDecoderStream. + * + * @param stream the compressed CCITT stream. + * @param columns the number of columns in the stream. + * @param type the type of stream, must be one of {@code COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE}, + * {@code COMPRESSION_CCITT_T4} or {@code COMPRESSION_CCITT_T6}. + * @param fillOrder fillOrder, must be {@code FILL_LEFT_TO_RIGHT} or + * {@code FILL_RIGHT_TO_LEFT}. + * @param options CCITT T.4 or T.6 options. + */ + public CCITTFaxDecoderStream(final InputStream stream, final int columns, final int type, final int fillOrder, + final long options) { + this(stream, columns, type, fillOrder, options, type == TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE); + } + + private void fetch() throws IOException { + if (decodedPos >= decodedLength) { + decodedLength = 0; + + try { + decodeRow(); + } + catch (EOFException e) { + // TODO: Rewrite to avoid throw/catch for normal flow... + if (decodedLength != 0) { + throw e; + } + + // ..otherwise, just let client code try to read past the + // end of stream + decodedLength = -1; + } + + decodedPos = 0; + } + } + + private void decode1D() throws IOException { + int index = 0; + boolean white = true; + changesCurrentRowCount = 0; + + do { + int completeRun; + + if (white) { + completeRun = decodeRun(whiteRunTree); + } + else { + completeRun = decodeRun(blackRunTree); + } + + index += completeRun; + changesCurrentRow[changesCurrentRowCount++] = index; + + // Flip color for next run + white = !white; + } while (index < columns); + } + + private void decode2D() throws IOException { + changesReferenceRowCount = changesCurrentRowCount; + int[] tmp = changesCurrentRow; + changesCurrentRow = changesReferenceRow; + changesReferenceRow = tmp; + + boolean white = true; + int index = 0; + changesCurrentRowCount = 0; + + mode: while (index < columns) { + // read mode + Node n = codeTree.root; + + while (true) { + n = n.walk(readBit()); + + if (n == null) { + continue mode; + } + else if (n.isLeaf) { + switch (n.value) { + case VALUE_HMODE: + int runLength; + runLength = decodeRun(white ? whiteRunTree : blackRunTree); + index += runLength; + changesCurrentRow[changesCurrentRowCount++] = index; + + runLength = decodeRun(white ? blackRunTree : whiteRunTree); + index += runLength; + changesCurrentRow[changesCurrentRowCount++] = index; + break; + + case VALUE_PASSMODE: + int pChangingElement = getNextChangingElement(index, white) + 1; + + if (pChangingElement >= changesReferenceRowCount) { + index = columns; + } + else { + index = changesReferenceRow[pChangingElement]; + } + + break; + + default: + // Vertical mode (-3 to 3) + int vChangingElement = getNextChangingElement(index, white); + + if (vChangingElement >= changesReferenceRowCount || vChangingElement == -1) { + index = columns + n.value; + } + else { + index = changesReferenceRow[vChangingElement] + n.value; + } + + changesCurrentRow[changesCurrentRowCount] = index; + changesCurrentRowCount++; + white = !white; + + break; + } + + continue mode; + } + } + } + } + + private int getNextChangingElement(final int a0, final boolean white) { + int start = (lastChangingElement & 0xFFFFFFFE) + (white ? 0 : 1); + if (start > 2) { + start -= 2; + } + + if (a0 == 0) { + return start; + } + + for (int i = start; i < changesReferenceRowCount; i += 2) { + if (a0 < changesReferenceRow[i]) { + lastChangingElement = i; + return i; + } + } + + return -1; + } + + private void decodeRowType2() throws IOException { + if (optionByteAligned) { + resetBuffer(); + } + decode1D(); + } + + private void decodeRowType4() throws IOException { + if (optionByteAligned) { + resetBuffer(); + } + eof: while (true) { + // read till next EOL code + Node n = eolOnlyTree.root; + + while (true) { + n = n.walk(readBit()); + + if (n == null) { + continue eof; + } + + if (n.isLeaf) { + break eof; + } + } + } + + if (!optionG32D || readBit()) { + decode1D(); + } + else { + decode2D(); + } + } + + private void decodeRowType6() throws IOException { + if (optionByteAligned) { + resetBuffer(); + } + decode2D(); + } + + private void decodeRow() throws IOException { + switch (type) { + case TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE: + decodeRowType2(); + break; + case TIFFExtension.COMPRESSION_CCITT_T4: + decodeRowType4(); + break; + case TIFFExtension.COMPRESSION_CCITT_T6: + decodeRowType6(); + break; + default: + throw new IllegalArgumentException("Illegal parameter: " + type); + } + + int index = 0; + boolean white = true; + + lastChangingElement = 0; + for (int i = 0; i <= changesCurrentRowCount; i++) { + int nextChange = columns; + + if (i != changesCurrentRowCount) { + nextChange = changesCurrentRow[i]; + } + + if (nextChange > columns) { + nextChange = columns; + } + + int byteIndex = index / 8; + + while (index % 8 != 0 && (nextChange - index) > 0) { + decodedRow[byteIndex] |= (white ? 0 : 1 << (7 - ((index) % 8))); + index++; + } + + if (index % 8 == 0) { + byteIndex = index / 8; + final byte value = (byte) (white ? 0x00 : 0xff); + + while ((nextChange - index) > 7) { + decodedRow[byteIndex] = value; + index += 8; + ++byteIndex; + } + } + + while ((nextChange - index) > 0) { + if (index % 8 == 0) { + decodedRow[byteIndex] = 0; + } + + decodedRow[byteIndex] |= (white ? 0 : 1 << (7 - ((index) % 8))); + index++; + } + + white = !white; + } + + if (index != columns) { + throw new IOException("Sum of run-lengths does not equal scan line width: " + index + " > " + columns); + } + + decodedLength = (index + 7) / 8; + } + + private int decodeRun(final Tree tree) throws IOException { + int total = 0; + + Node n = tree.root; + + while (true) { + boolean bit = readBit(); + n = n.walk(bit); + + if (n == null) { + throw new IOException("Unknown code in Huffman RLE stream"); + } + + if (n.isLeaf) { + total += n.value; + if (n.value >= 64) { + n = tree.root; + } + else if (n.value >= 0) { + return total; + } + else { + return columns; + } + } + } + } + + private void resetBuffer() { + bufferPos = -1; + } + + int buffer = -1; + int bufferPos = -1; + + private boolean readBit() throws IOException { + if (bufferPos < 0 || bufferPos > 7) { + buffer = in.read(); + + if (buffer == -1) { + throw new EOFException("Unexpected end of Huffman RLE stream"); + } + + bufferPos = 0; + } + + boolean isSet; + + if (fillOrder == TIFFExtension.FILL_LEFT_TO_RIGHT) { + isSet = ((buffer >> (7 - bufferPos)) & 1) == 1; + } + else { + isSet = ((buffer >> (bufferPos)) & 1) == 1; + } + + bufferPos++; + + if (bufferPos > 7) { + bufferPos = -1; + } + + return isSet; + } + + @Override + public int read() throws IOException { + if (decodedLength < 0) { + return 0x0; + } + + if (decodedPos >= decodedLength) { + fetch(); + + if (decodedLength < 0) { + return 0x0; + } + } + + return decodedRow[decodedPos++] & 0xff; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (decodedLength < 0) { + Arrays.fill(b, off, off + len, (byte) 0x0); + return len; + } + + if (decodedPos >= decodedLength) { + fetch(); + + if (decodedLength < 0) { + Arrays.fill(b, off, off + len, (byte) 0x0); + return len; + } + } + + int read = Math.min(decodedLength - decodedPos, len); + System.arraycopy(decodedRow, decodedPos, b, off, read); + decodedPos += read; + + return read; + } + + @Override + public long skip(long n) throws IOException { + if (decodedLength < 0) { + return -1; + } + + if (decodedPos >= decodedLength) { + fetch(); + + if (decodedLength < 0) { + return -1; + } + } + + int skipped = (int) Math.min(decodedLength - decodedPos, n); + decodedPos += skipped; + + return skipped; + } + + @Override + public boolean markSupported() { + return false; + } + + @Override + public synchronized void reset() throws IOException { + throw new IOException("mark/reset not supported"); + } + + private static final class Node { + Node left; + Node right; + + int value; // > 63 non term. + + boolean canBeFill = false; + boolean isLeaf = false; + + void set(final boolean next, final Node node) { + if (!next) { + left = node; + } + else { + right = node; + } + } + + Node walk(final boolean next) { + return next ? right : left; + } + + @Override + public String toString() { + return "[leaf=" + isLeaf + ", value=" + value + ", canBeFill=" + canBeFill + "]"; + } + } + + private static final class Tree { + final Node root = new Node(); + + void fill(final int depth, final int path, final int value) throws IOException { + Node current = root; + + for (int i = 0; i < depth; i++) { + int bitPos = depth - 1 - i; + boolean isSet = ((path >> bitPos) & 1) == 1; + Node next = current.walk(isSet); + + if (next == null) { + next = new Node(); + + if (i == depth - 1) { + next.value = value; + next.isLeaf = true; + } + + if (path == 0) { + next.canBeFill = true; + } + + current.set(isSet, next); + } + else { + if (next.isLeaf) { + throw new IOException("node is leaf, no other following"); + } + } + + current = next; + } + } + + void fill(final int depth, final int path, final Node node) throws IOException { + Node current = root; + + for (int i = 0; i < depth; i++) { + int bitPos = depth - 1 - i; + boolean isSet = ((path >> bitPos) & 1) == 1; + Node next = current.walk(isSet); + + if (next == null) { + if (i == depth - 1) { + next = node; + } + else { + next = new Node(); + } + + if (path == 0) { + next.canBeFill = true; + } + + current.set(isSet, next); + } + else { + if (next.isLeaf) { + throw new IOException("node is leaf, no other following"); + } + } + + current = next; + } + } + } + + static final short[][] BLACK_CODES = { + { // 2 bits + 0x2, 0x3, + }, + { // 3 bits + 0x2, 0x3, + }, + { // 4 bits + 0x2, 0x3, + }, + { // 5 bits + 0x3, + }, + { // 6 bits + 0x4, 0x5, + }, + { // 7 bits + 0x4, 0x5, 0x7, + }, + { // 8 bits + 0x4, 0x7, + }, + { // 9 bits + 0x18, + }, + { // 10 bits + 0x17, 0x18, 0x37, 0x8, 0xf, + }, + { // 11 bits + 0x17, 0x18, 0x28, 0x37, 0x67, 0x68, 0x6c, 0x8, 0xc, 0xd, + }, + { // 12 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, 0x24, 0x27, 0x28, 0x2b, 0x2c, 0x33, + 0x34, 0x35, 0x37, 0x38, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xd2, 0xd3, + 0xd4, 0xd5, 0xd6, 0xd7, 0xda, 0xdb, + }, + { // 13 bits + 0x4a, 0x4b, 0x4c, 0x4d, 0x52, 0x53, 0x54, 0x55, 0x5a, 0x5b, 0x64, 0x65, 0x6c, 0x6d, 0x72, 0x73, + 0x74, 0x75, 0x76, 0x77, + } + }; + static final short[][] BLACK_RUN_LENGTHS = { + { // 2 bits + 3, 2, + }, + { // 3 bits + 1, 4, + }, + { // 4 bits + 6, 5, + }, + { // 5 bits + 7, + }, + { // 6 bits + 9, 8, + }, + { // 7 bits + 10, 11, 12, + }, + { // 8 bits + 13, 14, + }, + { // 9 bits + 15, + }, + { // 10 bits + 16, 17, 0, 18, 64, + }, + { // 11 bits + 24, 25, 23, 22, 19, 20, 21, 1792, 1856, 1920, + }, + { // 12 bits + 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, 52, 55, 56, 59, 60, 320, 384, 448, 53, + 54, 50, 51, 44, 45, 46, 47, 57, 58, 61, 256, 48, 49, 62, 63, 30, 31, 32, 33, 40, 41, 128, 192, 26, + 27, 28, 29, 34, 35, 36, 37, 38, 39, 42, 43, + }, + { // 13 bits + 640, 704, 768, 832, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 512, 576, 896, 960, 1024, 1088, + 1152, 1216, + } + }; + + public static final short[][] WHITE_CODES = { + { // 4 bits + 0x7, 0x8, 0xb, 0xc, 0xe, 0xf, + }, + { // 5 bits + 0x12, 0x13, 0x14, 0x1b, 0x7, 0x8, + }, + { // 6 bits + 0x17, 0x18, 0x2a, 0x2b, 0x3, 0x34, 0x35, 0x7, 0x8, + }, + { // 7 bits + 0x13, 0x17, 0x18, 0x24, 0x27, 0x28, 0x2b, 0x3, 0x37, 0x4, 0x8, 0xc, + }, + { // 8 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x2, 0x24, 0x25, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, + 0x3, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x4, 0x4a, 0x4b, 0x5, 0x52, 0x53, 0x54, 0x55, 0x58, 0x59, + 0x5a, 0x5b, 0x64, 0x65, 0x67, 0x68, 0xa, 0xb, + }, + { // 9 bits + 0x98, 0x99, 0x9a, 0x9b, 0xcc, 0xcd, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + }, + { // 10 bits + }, + { // 11 bits + 0x8, 0xc, 0xd, + }, + { // 12 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, + } + }; + + public static final short[][] WHITE_RUN_LENGTHS = { + { // 4 bits + 2, 3, 4, 5, 6, 7, + }, + { // 5 bits + 128, 8, 9, 64, 10, 11, + }, + { // 6 bits + 192, 1664, 16, 17, 13, 14, 15, 1, 12, + }, + { // 7 bits + 26, 21, 28, 27, 18, 24, 25, 22, 256, 23, 20, 19, + }, + { // 8 bits + 33, 34, 35, 36, 37, 38, 31, 32, 29, 53, 54, 39, 40, 41, 42, 43, 44, 30, 61, 62, 63, 0, 320, 384, 45, + 59, 60, 46, 49, 50, 51, 52, 55, 56, 57, 58, 448, 512, 640, 576, 47, 48, + }, + { // 9 bits + 1472, 1536, 1600, 1728, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344, 1408, + }, + { // 10 bits + }, + { // 11 bits + 1792, 1856, 1920, + }, + { // 12 bits + 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, + } + }; + + final static Node EOL; + final static Node FILL; + final static Tree blackRunTree; + final static Tree whiteRunTree; + final static Tree eolOnlyTree; + final static Tree codeTree; + + final static int VALUE_EOL = -2000; + final static int VALUE_FILL = -1000; + final static int VALUE_PASSMODE = -3000; + final static int VALUE_HMODE = -4000; + + static { + EOL = new Node(); + EOL.isLeaf = true; + EOL.value = VALUE_EOL; + FILL = new Node(); + FILL.value = VALUE_FILL; + FILL.left = FILL; + FILL.right = EOL; + + eolOnlyTree = new Tree(); + try { + eolOnlyTree.fill(12, 0, FILL); + eolOnlyTree.fill(12, 1, EOL); + } + catch (IOException e) { + throw new AssertionError(e); + } + + blackRunTree = new Tree(); + try { + for (int i = 0; i < BLACK_CODES.length; i++) { + for (int j = 0; j < BLACK_CODES[i].length; j++) { + blackRunTree.fill(i + 2, BLACK_CODES[i][j], BLACK_RUN_LENGTHS[i][j]); + } + } + blackRunTree.fill(12, 0, FILL); + blackRunTree.fill(12, 1, EOL); + } + catch (IOException e) { + throw new AssertionError(e); + } + + whiteRunTree = new Tree(); + try { + for (int i = 0; i < WHITE_CODES.length; i++) { + for (int j = 0; j < WHITE_CODES[i].length; j++) { + whiteRunTree.fill(i + 4, WHITE_CODES[i][j], WHITE_RUN_LENGTHS[i][j]); + } + } + + whiteRunTree.fill(12, 0, FILL); + whiteRunTree.fill(12, 1, EOL); + } + catch (IOException e) { + throw new AssertionError(e); + } + + codeTree = new Tree(); + try { + codeTree.fill(4, 1, VALUE_PASSMODE); // pass mode + codeTree.fill(3, 1, VALUE_HMODE); // H mode + codeTree.fill(1, 1, 0); // V(0) + codeTree.fill(3, 3, 1); // V_R(1) + codeTree.fill(6, 3, 2); // V_R(2) + codeTree.fill(7, 3, 3); // V_R(3) + codeTree.fill(3, 2, -1); // V_L(1) + codeTree.fill(6, 2, -2); // V_L(2) + codeTree.fill(7, 2, -3); // V_L(3) + } + catch (IOException e) { + throw new AssertionError(e); + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxEncoderStream.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxEncoderStream.java new file mode 100644 index 00000000000..094c5f27e2b --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxEncoderStream.java @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2013, Harald Kuhr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name "TwelveMonkeys" nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.apache.pdfbox.filter; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * CCITT Modified Group 4 (T6) fax compression. + * + * @author Oliver Schmidtmer + * + * Taken from commit 047884e3d9e1b30516c79b147ead763303dc9bcb of 21.4.2016 from + * twelvemonkeys/imageio/plugins/tiff/CCITTFaxEncoderStream.java + * + * Initial changes for PDFBox: + * - removed Validate + * - G4 compression only + * - removed options + */ +final class CCITTFaxEncoderStream extends OutputStream { + + private int currentBufferLength = 0; + private final byte[] inputBuffer; + private final int inputBufferLength; + private final int columns; + private final int rows; + + private int[] changesCurrentRow; + private int[] changesReferenceRow; + private int currentRow = 0; + private int changesCurrentRowLength = 0; + private int changesReferenceRowLength = 0; + private byte outputBuffer = 0; + private byte outputBufferBitLength = 0; + private final int fillOrder; + private final OutputStream stream; + + CCITTFaxEncoderStream(final OutputStream stream, final int columns, final int rows, final int fillOrder) { + + this.stream = stream; + this.columns = columns; + this.rows = rows; + this.fillOrder = fillOrder; + + this.changesReferenceRow = new int[columns]; + this.changesCurrentRow = new int[columns]; + + inputBufferLength = (columns + 7) / 8; + inputBuffer = new byte[inputBufferLength]; + } + + @Override + public void write(int b) throws IOException { + inputBuffer[currentBufferLength] = (byte) b; + currentBufferLength++; + + if (currentBufferLength == inputBufferLength) { + encodeRow(); + currentBufferLength = 0; + } + } + + @Override + public void flush() throws IOException { + stream.flush(); + } + + @Override + public void close() throws IOException { + stream.close(); + } + + private void encodeRow() throws IOException { + currentRow++; + int[] tmp = changesReferenceRow; + changesReferenceRow = changesCurrentRow; + changesCurrentRow = tmp; + changesReferenceRowLength = changesCurrentRowLength; + changesCurrentRowLength = 0; + + int index = 0; + boolean white = true; + while (index < columns) { + int byteIndex = index / 8; + int bit = index % 8; + if ((((inputBuffer[byteIndex] >> (7 - bit)) & 1) == 1) == (white)) { + changesCurrentRow[changesCurrentRowLength] = index; + changesCurrentRowLength++; + white = !white; + } + index++; + } + + encodeRowType6(); + + if (currentRow == rows) { + writeEOL(); + writeEOL(); + fill(); + } + } + + + private void encodeRowType6() throws IOException { + encode2D(); + } + + private int[] getNextChanges(int pos, boolean white) { + int[] result = new int[] {columns, columns}; + for (int i = 0; i < changesCurrentRowLength; i++) { + if (pos < changesCurrentRow[i] || (pos == 0 && white)) { + result[0] = changesCurrentRow[i]; + if ((i + 1) < changesCurrentRowLength) { + result[1] = changesCurrentRow[i + 1]; + } + break; + } + } + + return result; + } + + private void writeRun(int runLength, boolean white) throws IOException { + int nonterm = runLength / 64; + Code[] codes = white ? WHITE_NONTERMINATING_CODES : BLACK_NONTERMINATING_CODES; + while (nonterm > 0) { + if (nonterm >= codes.length) { + write(codes[codes.length - 1].code, codes[codes.length - 1].length); + nonterm -= codes.length; + } + else { + write(codes[nonterm - 1].code, codes[nonterm - 1].length); + nonterm = 0; + } + } + + Code c = white ? WHITE_TERMINATING_CODES[runLength % 64] : BLACK_TERMINATING_CODES[runLength % 64]; + write(c.code, c.length); + } + + private void encode2D() throws IOException { + boolean white = true; + int index = 0; // a0 + while (index < columns) { + int[] nextChanges = getNextChanges(index, white); // a1, a2 + + int[] nextRefs = getNextRefChanges(index, white); // b1, b2 + + int difference = nextChanges[0] - nextRefs[0]; + if (nextChanges[0] > nextRefs[1]) { + // PMODE + write(1, 4); + index = nextRefs[1]; + } + else if (difference > 3 || difference < -3) { + // HMODE + write(1, 3); + writeRun(nextChanges[0] - index, white); + writeRun(nextChanges[1] - nextChanges[0], !white); + index = nextChanges[1]; + + } + else { + // VMODE + switch (difference) { + case 0: + write(1, 1); + break; + case 1: + write(3, 3); + break; + case 2: + write(3, 6); + break; + case 3: + write(3, 7); + break; + case -1: + write(2, 3); + break; + case -2: + write(2, 6); + break; + case -3: + write(2, 7); + break; + default: + break; + } + white = !white; + index = nextRefs[0] + difference; + } + } + } + + private int[] getNextRefChanges(int a0, boolean white) { + int[] result = new int[] {columns, columns}; + for (int i = (white ? 0 : 1); i < changesReferenceRowLength; i += 2) { + if (changesReferenceRow[i] > a0 || (a0 == 0 && i == 0)) { + result[0] = changesReferenceRow[i]; + if ((i + 1) < changesReferenceRowLength) { + result[1] = changesReferenceRow[i + 1]; + } + break; + } + } + return result; + } + + private void write(int code, int codeLength) throws IOException { + + for (int i = 0; i < codeLength; i++) { + boolean codeBit = ((code >> (codeLength - i - 1)) & 1) == 1; + if (fillOrder == TIFFExtension.FILL_LEFT_TO_RIGHT) { + outputBuffer |= (codeBit ? 1 << (7 - ((outputBufferBitLength) % 8)) : 0); + } + else { + outputBuffer |= (codeBit ? 1 << (((outputBufferBitLength) % 8)) : 0); + } + outputBufferBitLength++; + + if (outputBufferBitLength == 8) { + stream.write(outputBuffer); + clearOutputBuffer(); + } + } + } + + private void writeEOL() throws IOException { + write(1, 12); + } + + private void fill() throws IOException { + if (outputBufferBitLength != 0) { + stream.write(outputBuffer); + } + clearOutputBuffer(); + } + + private void clearOutputBuffer() { + outputBuffer = 0; + outputBufferBitLength = 0; + } + + private static class Code { + private Code(int code, int length) { + this.code = code; + this.length = length; + } + + final int code; + final int length; + } + + private static final Code[] WHITE_TERMINATING_CODES; + + private static final Code[] WHITE_NONTERMINATING_CODES; + + private static final Code[] BLACK_TERMINATING_CODES; + + private static final Code[] BLACK_NONTERMINATING_CODES; + + static { + // Setup HUFFMAN Codes + WHITE_TERMINATING_CODES = new Code[64]; + WHITE_NONTERMINATING_CODES = new Code[40]; + for (int i = 0; i < CCITTFaxDecoderStream.WHITE_CODES.length; i++) { + int bitLength = i + 4; + for (int j = 0; j < CCITTFaxDecoderStream.WHITE_CODES[i].length; j++) { + int value = CCITTFaxDecoderStream.WHITE_RUN_LENGTHS[i][j]; + int code = CCITTFaxDecoderStream.WHITE_CODES[i][j]; + + if (value < 64) { + WHITE_TERMINATING_CODES[value] = new Code(code, bitLength); + } + else { + WHITE_NONTERMINATING_CODES[(value / 64) - 1] = new Code(code, bitLength); + } + } + } + + BLACK_TERMINATING_CODES = new Code[64]; + BLACK_NONTERMINATING_CODES = new Code[40]; + for (int i = 0; i < CCITTFaxDecoderStream.BLACK_CODES.length; i++) { + int bitLength = i + 2; + for (int j = 0; j < CCITTFaxDecoderStream.BLACK_CODES[i].length; j++) { + int value = CCITTFaxDecoderStream.BLACK_RUN_LENGTHS[i][j]; + int code = CCITTFaxDecoderStream.BLACK_CODES[i][j]; + + if (value < 64) { + BLACK_TERMINATING_CODES[value] = new Code(code, bitLength); + } + else { + BLACK_NONTERMINATING_CODES[(value / 64) - 1] = new Code(code, bitLength); + } + } + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java index 72b859c7e2d..6b72e75dd58 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java @@ -16,19 +16,19 @@ */ package org.apache.pdfbox.filter; +import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.PushbackInputStream; + import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.filter.ccitt.CCITTFaxG31DDecodeInputStream; -import org.apache.pdfbox.filter.ccitt.FillOrderChangeInputStream; -import org.apache.pdfbox.filter.ccitt.TIFFFaxDecoder; import org.apache.pdfbox.io.IOUtils; /** * Decodes image data that has been encoded using either Group 3 or Group 4 - * CCITT facsimile (fax) encoding. + * CCITT facsimile (fax) encoding, and encodes image data to Group 4. * * @author Ben Litchfield * @author Marcel Kammer @@ -40,9 +40,6 @@ final class CCITTFaxFilter extends Filter public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, int index) throws IOException { - DecodeResult result = new DecodeResult(new COSDictionary()); - result.getParameters().addAll(parameters); - // get decode parameters COSDictionary decodeParms = getDecodeParams(parameters, index); @@ -52,8 +49,8 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, int height = parameters.getInt(COSName.HEIGHT, COSName.H, 0); if (rows > 0 && height > 0) { - // ensure that rows doesn't contain implausible data, see PDFBOX-771 - rows = Math.min(rows, height); + // PDFBOX-771, PDFBOX-3727: rows in DecodeParms sometimes contains an incorrect value + rows = height; } else { @@ -66,30 +63,51 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, boolean encodedByteAlign = decodeParms.getBoolean(COSName.ENCODED_BYTE_ALIGN, false); int arraySize = (cols + 7) / 8 * rows; // TODO possible options?? + byte[] decompressed = new byte[arraySize]; + CCITTFaxDecoderStream s; + int type; long tiffOptions = 0; - byte[] decompressed; if (k == 0) { - InputStream in = new CCITTFaxG31DDecodeInputStream(encoded, cols, rows, encodedByteAlign); - in = new FillOrderChangeInputStream(in); - decompressed = IOUtils.toByteArray(in); - in.close(); - } - else - { - TIFFFaxDecoder faxDecoder = new TIFFFaxDecoder(1, cols, rows); - byte[] compressed = IOUtils.toByteArray(encoded); - decompressed = new byte[arraySize]; - if (k > 0) + type = TIFFExtension.COMPRESSION_CCITT_T4; // Group 3 1D + byte[] streamData = new byte[20]; + int bytesRead = encoded.read(streamData); + if (bytesRead != streamData.length) { - faxDecoder.decode2D(decompressed, compressed, 0, rows, tiffOptions); + throw new EOFException("Can't read " + streamData.length + " bytes"); } - else + encoded = new PushbackInputStream(encoded, streamData.length); + ((PushbackInputStream) encoded).unread(streamData); + if (streamData[0] != 0 || (streamData[1] >> 4 != 1 && streamData[1] != 1)) { - // k < 0 - faxDecoder.decodeT6(decompressed, compressed, 0, rows, tiffOptions, encodedByteAlign); + // leading EOL (0b000000000001) not found, search further and try RLE if not + // found + type = TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE; + short b = (short) (((streamData[0] << 8) + (streamData[1] & 0xff)) >> 4); + for (int i = 12; i < 160; i++) + { + b = (short) ((b << 1) + ((streamData[(i / 8)] >> (7 - (i % 8))) & 0x01)); + if ((b & 0xFFF) == 1) + { + type = TIFFExtension.COMPRESSION_CCITT_T4; + break; + } + } } } + else if (k > 0) + { + // Group 3 2D + type = TIFFExtension.COMPRESSION_CCITT_T4; + tiffOptions = TIFFExtension.GROUP3OPT_2DENCODING; + } + else + { + // Group 4 + type = TIFFExtension.COMPRESSION_CCITT_T6; + } + s = new CCITTFaxDecoderStream(encoded, cols, type, TIFFExtension.FILL_LEFT_TO_RIGHT, tiffOptions, encodedByteAlign); + readFromDecoderStream(s, decompressed); // invert bitmap boolean blackIsOne = decodeParms.getBoolean(COSName.BLACK_IS_1, false); @@ -102,16 +120,25 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, invertBitmap(decompressed); } - // repair missing color space - if (!parameters.containsKey(COSName.COLORSPACE)) - { - result.getParameters().setName(COSName.COLORSPACE, COSName.DEVICEGRAY.getName()); - } - decoded.write(decompressed); return new DecodeResult(parameters); } + void readFromDecoderStream(CCITTFaxDecoderStream decoderStream, byte[] result) + throws IOException + { + int pos = 0; + int read; + while ((read = decoderStream.read(result, pos, result.length - pos)) > -1) + { + pos += read; + if (pos >= result.length) + { + break; + } + } + } + private void invertBitmap(byte[] bufferData) { for (int i = 0, c = bufferData.length; i < c; i++) @@ -124,6 +151,10 @@ private void invertBitmap(byte[] bufferData) protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) throws IOException { - throw new UnsupportedOperationException("CCITTFaxFilter encoding not implemented, use the CCITTFactory methods instead"); + int cols = parameters.getInt(COSName.COLUMNS); + int rows = parameters.getInt(COSName.ROWS); + CCITTFaxEncoderStream ccittFaxEncoderStream = + new CCITTFaxEncoderStream(encoded, cols, rows, TIFFExtension.FILL_LEFT_TO_RIGHT); + IOUtils.copy(input, ccittFaxEncoderStream); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/DCTFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/DCTFilter.java index 59f276ee898..1d8aac66fc6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/DCTFilter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/DCTFilter.java @@ -23,10 +23,10 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.lang.reflect.Field; import javax.imageio.IIOException; import javax.imageio.ImageIO; +import javax.imageio.ImageReadParam; import javax.imageio.ImageReader; import javax.imageio.metadata.IIOMetadata; import javax.imageio.metadata.IIOMetadataNode; @@ -48,9 +48,12 @@ final class DCTFilter extends Filter { private static final Log LOG = LogFactory.getLog(DCTFilter.class); + private static final int POS_TRANSFORM = 11; + private static final String ADOBE = "Adobe"; + @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary + parameters, int index, DecodeOptions options) throws IOException { ImageReader reader = findImageReader("JPEG", "a suitable JAI I/O image filter is not installed"); ImageInputStream iis = null; @@ -65,7 +68,12 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, } reader.setInput(iis); - + ImageReadParam irp = reader.getDefaultReadParam(); + irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), + options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); + irp.setSourceRegion(options.getSourceRegion()); + options.setFilterSubsampled(true); + String numChannels = getNumChannels(reader); // get the raster using horrible JAI workarounds @@ -80,21 +88,21 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, try { // I'd like to use ImageReader#readRaster but it is buggy and can't read RGB correctly - BufferedImage image = reader.read(0); + BufferedImage image = reader.read(0, irp); raster = image.getRaster(); } catch (IIOException e) { // JAI can't read CMYK JPEGs using ImageReader#read or ImageIO.read but // fortunately ImageReader#readRaster isn't buggy when reading 4-channel files - raster = reader.readRaster(0, null); + raster = reader.readRaster(0, irp); } } else { // JAI can't read CMYK JPEGs using ImageReader#read or ImageIO.read but // fortunately ImageReader#readRaster isn't buggy when reading 4-channel files - raster = reader.readRaster(0, null); + raster = reader.readRaster(0, irp); } // special handling for 4-component images @@ -108,42 +116,13 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, } catch (IIOException e) { - // catches the error "Inconsistent metadata read from stream" - // if we're using the Sun decoder then can be caused by either a YCCK - // image or by a CMYK image which the decoder has problems reading - try - { - // if this is Sun's decoder, use reflection to determine if the - // color space is CMYK or YCCK - Field field = reader.getClass().getDeclaredField("colorSpaceCode"); - field.setAccessible(true); - int colorSpaceCode = field.getInt(reader); - - if (colorSpaceCode == 7 || colorSpaceCode == 8 || colorSpaceCode == 9 || colorSpaceCode == 11) - { - // YCCK - transform = 2; - } - else if (colorSpaceCode == 4) - { - // CMYK - transform = 0; - } - else - { - throw new IOException("Unexpected color space: " + colorSpaceCode); - } - } - catch (NoSuchFieldException e1) - { - // error from non-Sun JPEG decoder - throw e; - } - catch (IllegalAccessException e1) - { - // error from non-Sun JPEG decoder - throw e; - } + // we really tried asking nicely, now we're using brute force. + transform = getAdobeTransformByBruteForce(iis); + } + catch (NegativeArraySizeException e) + { + // we really tried asking nicely, now we're using brute force. + transform = getAdobeTransformByBruteForce(iis); } int colorTransform = transform != null ? transform : 0; @@ -154,8 +133,7 @@ else if (colorSpaceCode == 4) // already CMYK break; case 1: - // TODO YCbCr - LOG.warn("YCbCr JPEGs not implemented"); + raster = fromYCbCrtoCMYK(raster); break; case 2: raster = fromYCCKtoCMYK(raster); @@ -184,6 +162,13 @@ else if (raster.getNumBands() == 3) return new DecodeResult(parameters); } + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); + } + // reads the APP14 Adobe transform tag and returns its value, or 0 if unknown private Integer getAdobeTransform(IIOMetadata metadata) { @@ -198,6 +183,50 @@ private Integer getAdobeTransform(IIOMetadata metadata) return 0; } + // See in https://github.com/haraldk/TwelveMonkeys + // com.twelvemonkeys.imageio.plugins.jpeg.AdobeDCT class for structure of APP14 segment + private int getAdobeTransformByBruteForce(ImageInputStream iis) throws IOException + { + int a = 0; + iis.seek(0); + int by; + while ((by = iis.read()) != -1) + { + if (ADOBE.charAt(a) == by) + { + ++a; + if (a != ADOBE.length()) + { + continue; + } + // match + a = 0; + long afterAdobePos = iis.getStreamPosition(); + iis.seek(iis.getStreamPosition() - 9); + int tag = iis.readUnsignedShort(); + if (tag != 0xFFEE) + { + iis.seek(afterAdobePos); + continue; + } + int len = iis.readUnsignedShort(); + if (len >= POS_TRANSFORM + 1) + { + byte[] app14 = new byte[Math.max(len, POS_TRANSFORM + 1)]; + if (iis.read(app14) >= POS_TRANSFORM + 1) + { + return app14[POS_TRANSFORM]; + } + } + } + else + { + a = 0; + } + } + return 0; + } + // converts YCCK image to CMYK. YCCK is an equivalent encoding for // CMYK data, so no color management code is needed here, nor does the // PDF color space have to be consulted @@ -239,6 +268,44 @@ private WritableRaster fromYCCKtoCMYK(Raster raster) return writableRaster; } + private WritableRaster fromYCbCrtoCMYK(Raster raster) + { + WritableRaster writableRaster = raster.createCompatibleWritableRaster(); + + int[] value = new int[4]; + for (int y = 0, height = raster.getHeight(); y < height; y++) + { + for (int x = 0, width = raster.getWidth(); x < width; x++) + { + raster.getPixel(x, y, value); + + // 4-channels 0..255 + float Y = value[0]; + float Cb = value[1]; + float Cr = value[2]; + float K = value[3]; + + // YCbCr to RGB, see http://www.equasys.de/colorconversion.html + int r = clamp( (1.164f * (Y-16)) + (1.596f * (Cr - 128)) ); + int g = clamp( (1.164f * (Y-16)) + (-0.392f * (Cb-128)) + (-0.813f * (Cr-128))); + int b = clamp( (1.164f * (Y-16)) + (2.017f * (Cb-128))); + + // naive RGB to CMYK + int cyan = 255 - r; + int magenta = 255 - g; + int yellow = 255 - b; + + // update new raster + value[0] = cyan; + value[1] = magenta; + value[2] = yellow; + value[3] = (int)K; + writableRaster.setPixel(x, y, value); + } + } + return writableRaster; + } + // converts from BGR to RGB private WritableRaster fromBGRtoRGB(Raster raster) { @@ -282,6 +349,10 @@ private String getNumChannels(ImageReader reader) return numChannelsItem.getAttribute("value"); } catch (IOException e) + { + return ""; + } + catch (NegativeArraySizeException e) { return ""; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/DecodeOptions.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/DecodeOptions.java new file mode 100644 index 00000000000..4875cb13439 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/DecodeOptions.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.filter; + +import java.awt.Rectangle; + +/** + * Options that may be passed to a Filter to request special handling when decoding the stream. + * Filters may not honor some or all of the specified options, and so callers should check the + * honored flag if further processing relies on the options being used. + */ +public class DecodeOptions +{ + /** + * Default decode options. The honored flag for this instance is always true, as it represents + * the default behavior. + */ + public static final DecodeOptions DEFAULT = new FinalDecodeOptions(true); + + private Rectangle sourceRegion = null; + private int subsamplingX = 1, subsamplingY = 1, subsamplingOffsetX = 0, subsamplingOffsetY = 0; + private boolean filterSubsampled = false; + + /** + * Constructs an empty DecodeOptions instance + */ + public DecodeOptions() + { + // this constructor is intentionally left empty + } + + /** + * Constructs an instance specifying the region of the image that should be decoded. The actual + * region will be clipped to the dimensions of the image. + * + * @param sourceRegion Region of the source image that should be decoded + */ + public DecodeOptions(Rectangle sourceRegion) + { + this.sourceRegion = sourceRegion; + } + + /** + * Constructs an instance specifying the region of the image that should be decoded. The actual + * region will be clipped to the dimensions of the image. + * + * @param x x-coordinate of the top-left corner of the region to be decoded + * @param y y-coordinate of the top-left corner of the region to be decoded + * @param width Width of the region to be decoded + * @param height Height of the region to be decoded + */ + public DecodeOptions(int x, int y, int width, int height) + { + this(new Rectangle(x, y, width, height)); + } + + /** + * Constructs an instance specifying the image should be decoded using subsampling. The + * subsampling will be the same for the X and Y axes. + * + * @param subsampling The number of rows and columns to advance in the source for each pixel in + * the decoded image. + */ + public DecodeOptions(int subsampling) + { + subsamplingX = subsampling; + subsamplingY = subsampling; + } + + /** + * When decoding an image, the part of the image that should be decoded, or null if the entire + * image is needed. + * + * @return The source region to decode, or null if the entire image should be decoded + */ + public Rectangle getSourceRegion() + { + return sourceRegion; + } + + /** + * Sets the region of the source image that should be decoded. The region will be clipped to the + * dimensions of the source image. Setting this value to null will result in the entire image + * being decoded. + * + * @param sourceRegion The source region to decode, or null if the entire image should be + * decoded. + */ + public void setSourceRegion(Rectangle sourceRegion) + { + this.sourceRegion = sourceRegion; + } + + /** + * When decoding an image, the number of columns to advance in the source for every pixel + * decoded. + * + * @return The x-axis subsampling value + */ + public int getSubsamplingX() + { + return subsamplingX; + } + + /** + * Sets the number of columns to advance in the source for every pixel decoded + * + * @param ssX The x-axis subsampling value + */ + public void setSubsamplingX(int ssX) + { + this.subsamplingX = ssX; + } + + /** + * When decoding an image, the number of rows to advance in the source for every pixel decoded. + * + * @return The y-axis subsampling value + */ + public int getSubsamplingY() + { + return subsamplingY; + } + + /** + * Sets the number of rows to advance in the source for every pixel decoded + * + * @param ssY The y-axis subsampling value + */ + public void setSubsamplingY(int ssY) + { + this.subsamplingY = ssY; + } + + /** + * When decoding an image, the horizontal offset for subsampling + * + * @return The x-axis subsampling offset + */ + public int getSubsamplingOffsetX() + { + return subsamplingOffsetX; + } + + /** + * Sets the horizontal subsampling offset for decoding images + * + * @param ssOffsetX The x-axis subsampling offset + */ + public void setSubsamplingOffsetX(int ssOffsetX) + { + this.subsamplingOffsetX = ssOffsetX; + } + + /** + * When decoding an image, the vertical offset for subsampling + * + * @return The y-axis subsampling offset + */ + public int getSubsamplingOffsetY() + { + return subsamplingOffsetY; + } + + /** + * Sets the vertical subsampling offset for decoding images + * + * @param ssOffsetY The y-axis subsampling offset + */ + public void setSubsamplingOffsetY(int ssOffsetY) + { + this.subsamplingOffsetY = ssOffsetY; + } + + /** + * Flag used by the filter to specify if it performed subsampling. + * + * Some filters may be unable or unwilling to apply subsampling, and so the caller must check + * this flag after decoding. + * + * @return True if the filter applied the options specified by this instance, false otherwise. + */ + public boolean isFilterSubsampled() + { + return filterSubsampled; + } + + /** + * Used internally by filters to signal they have applied subsampling as requested by this + * options instance. + * + * @param filterSubsampled Value specifying if the filter could meet the requested options. + * Usually a filter will only call this with the value true, as the default value + * for the flag is false. + */ + void setFilterSubsampled(boolean filterSubsampled) + { + this.filterSubsampled = filterSubsampled; + } + + /** + * Helper class for reusable instances which may not be modified. + */ + private static class FinalDecodeOptions extends DecodeOptions + { + FinalDecodeOptions(boolean filterSubsampled) + { + super.setFilterSubsampled(filterSubsampled); + } + + @Override + public void setSourceRegion(Rectangle sourceRegion) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + public void setSubsamplingX(int ssX) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + public void setSubsamplingY(int ssY) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + public void setSubsamplingOffsetX(int ssOffsetX) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + public void setSubsamplingOffsetY(int ssOffsetY) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + void setFilterSubsampled(boolean filterSubsampled) + { + // Silently ignore the request. + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/DecodeResult.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/DecodeResult.java index 6a16461b6c8..96eaa16c9df 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/DecodeResult.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/DecodeResult.java @@ -55,7 +55,7 @@ public COSDictionary getParameters() /** * Returns the embedded JPX color space, if any. - * @return the the embedded JPX color space, or null if there is none. + * @return the embedded JPX color space, or null if there is none. */ public PDJPXColorSpace getJPXColorSpace() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/Filter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/Filter.java index 6b9a7bd1e81..5c57e6699c0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/Filter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/Filter.java @@ -20,6 +20,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.Iterator; +import java.util.zip.Deflater; import javax.imageio.ImageIO; import javax.imageio.ImageReader; @@ -41,6 +42,14 @@ public abstract class Filter { private static final Log LOG = LogFactory.getLog(Filter.class); + /** + * Compression Level System Property. Set this to a value from 0 to 9 to change the zlib deflate + * compression level used to compress /Flate streams. The default value is -1 which is + * {@link Deflater#DEFAULT_COMPRESSION}. To set maximum compression, use + * {@code System.setProperty(Filter.SYSPROP_DEFLATELEVEL, "9");} + */ + public static final String SYSPROP_DEFLATELEVEL = "org.apache.pdfbox.filter.deflatelevel"; + /** * Constructor. */ @@ -58,7 +67,25 @@ protected Filter() * @throws IOException if the stream cannot be decoded */ public abstract DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, - int index) throws IOException; + int index) throws IOException; + + /** + * Decodes data, with optional DecodeOptions. Not all filters support all options, and so + * callers should check the options' honored flag to test if they were applied. + * + * @param encoded the encoded byte stream + * @param decoded the stream where decoded data will be written + * @param parameters the parameters used for decoding + * @param index the index to the filter being decoded + * @param options additional options for decoding + * @return repaired parameters dictionary, or the original parameters dictionary + * @throws IOException if the stream cannot be decoded + */ + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, + int index, DecodeOptions options) throws IOException + { + return decode(encoded, decoded, parameters, index); + } /** * Encodes data. @@ -82,20 +109,28 @@ protected abstract void encode(InputStream input, OutputStream encoded, // normalise the DecodeParams entry so that it is always a dictionary protected COSDictionary getDecodeParams(COSDictionary dictionary, int index) { + COSBase filter = dictionary.getDictionaryObject(COSName.FILTER, COSName.F); COSBase obj = dictionary.getDictionaryObject(COSName.DECODE_PARMS, COSName.DP); - if (obj instanceof COSDictionary) + if (filter instanceof COSName && obj instanceof COSDictionary) { + // PDFBOX-3932: The PDF specification requires "If there is only one filter and that + // filter has parameters, DecodeParms shall be set to the filter’s parameter dictionary" + // but tests show that Adobe means "one filter name object". return (COSDictionary)obj; } - else if (obj instanceof COSArray) + else if (filter instanceof COSArray && obj instanceof COSArray) { COSArray array = (COSArray)obj; if (index < array.size()) { - return (COSDictionary)array.getObject(index); + COSBase objAtIndex = array.getObject(index); + if (objAtIndex instanceof COSDictionary) + { + return (COSDictionary)array.getObject(index); + } } } - else if (obj != null) + else if (obj != null && !(filter instanceof COSArray || obj instanceof COSArray)) { LOG.error("Expected DecodeParams to be an Array or Dictionary but found " + obj.getClass().getName()); @@ -114,20 +149,32 @@ else if (obj != null) protected static ImageReader findImageReader(String formatName, String errorCause) throws MissingImageReaderException { Iterator readers = ImageIO.getImageReadersByFormatName(formatName); - ImageReader reader = null; + ImageReader reader; while (readers.hasNext()) { reader = readers.next(); - if (reader.canReadRaster()) + if (reader != null && reader.canReadRaster()) { - break; + return reader; } } - if (reader == null) + throw new MissingImageReaderException("Cannot read " + formatName + " image: " + errorCause); + } + + /** + * @return the ZIP compression level configured for PDFBox + */ + public static int getCompressionLevel() + { + int compressionLevel = Deflater.DEFAULT_COMPRESSION; + try { - throw new MissingImageReaderException("Cannot read " + formatName + " image: " + errorCause); + compressionLevel = Integer.parseInt(System.getProperty(Filter.SYSPROP_DEFLATELEVEL, "-1")); } - return reader; + catch (NumberFormatException ex) + { + LOG.warn(ex.getMessage(), ex); + } + return Math.max(-1, Math.min(Deflater.BEST_COMPRESSION, compressionLevel)); } - } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java index c01f741b1ac..88e1b0da9ff 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/FlateFilter.java @@ -16,18 +16,16 @@ */ package org.apache.pdfbox.filter; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.zip.DataFormatException; +import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; import java.util.zip.Inflater; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; /** * Decompresses data encoded using the zlib/deflate compression method, @@ -39,40 +37,18 @@ final class FlateFilter extends Filter { private static final Log LOG = LogFactory.getLog(FlateFilter.class); - private static final int BUFFER_SIZE = 16348; + private static final int BUFFER_SIZE = 0x4000; @Override public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, int index) throws IOException { - int predictor = -1; - final COSDictionary decodeParams = getDecodeParams(parameters, index); - if (decodeParams != null) - { - predictor = decodeParams.getInt(COSName.PREDICTOR); - } try { - if (predictor > 1) - { - int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32); - int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8); - int columns = decodeParams.getInt(COSName.COLUMNS, 1); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - decompress(encoded, baos); - ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); - Predictor.decodePredictor(predictor, colors, bitsPerPixel, columns, bais, decoded); - decoded.flush(); - baos.reset(); - bais.reset(); - } - else - { - decompress(encoded, decoded); - } - } + decompress(encoded, Predictor.wrapPredictor(decoded, decodeParams)); + } catch (DataFormatException e) { // if the stream is corrupt a DataFormatException may occur @@ -90,48 +66,56 @@ private void decompress(InputStream in, OutputStream out) throws IOException, Da { byte[] buf = new byte[2048]; // skip zlib header - in.read(buf,0,2); + in.read(); + in.read(); int read = in.read(buf); if (read > 0) { // use nowrap mode to bypass zlib-header and checksum to avoid a DataFormatException Inflater inflater = new Inflater(true); inflater.setInput(buf,0,read); - byte[] res = new byte[1024]; + byte[] res = new byte[1024]; boolean dataWritten = false; - while (true) - { - int resRead = 0; - try - { - resRead = inflater.inflate(res); - } - catch(DataFormatException exception) - { - if (dataWritten) + try + { + while (true) + { + int resRead = 0; + try { - // some data could be read -> don't throw an exception - LOG.warn("FlateFilter: premature end of stream due to a DataFormatException"); - break; + resRead = inflater.inflate(res); } - else + catch(DataFormatException exception) { - // nothing could be read -> re-throw exception - throw exception; + if (dataWritten) + { + // some data could be read -> don't throw an exception + LOG.warn("FlateFilter: premature end of stream due to a DataFormatException"); + break; + } + else + { + // nothing could be read -> re-throw exception + throw exception; + } } + if (resRead != 0) + { + out.write(res,0,resRead); + dataWritten = true; + continue; + } + if (inflater.finished() || inflater.needsDictionary() || in.available() == 0) + { + break; + } + read = in.read(buf); + inflater.setInput(buf,0,read); } - if (resRead != 0) - { - out.write(res,0,resRead); - dataWritten = true; - continue; - } - if (inflater.finished() || inflater.needsDictionary() || in.available() == 0) - { - break; - } - read = in.read(buf); - inflater.setInput(buf,0,read); + } + finally + { + inflater.end(); } } out.flush(); @@ -141,7 +125,9 @@ private void decompress(InputStream in, OutputStream out) throws IOException, Da protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) throws IOException { - DeflaterOutputStream out = new DeflaterOutputStream(encoded); + int compressionLevel = getCompressionLevel(); + Deflater deflater = new Deflater(compressionLevel); + DeflaterOutputStream out = new DeflaterOutputStream(encoded, deflater); int amountRead; int mayRead = input.available(); if (mayRead > 0) @@ -154,5 +140,6 @@ protected void encode(InputStream input, OutputStream encoded, COSDictionary par } out.close(); encoded.flush(); + deflater.end(); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/IdentityFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/IdentityFilter.java index 37d37c33e8a..74b47b2b904 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/IdentityFilter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/IdentityFilter.java @@ -20,6 +20,7 @@ import java.io.InputStream; import java.io.OutputStream; import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.io.IOUtils; /** * The IdentityFilter filter passes the data through without any modifications. @@ -29,19 +30,12 @@ */ final class IdentityFilter extends Filter { - private static final int BUFFER_SIZE = 1024; - @Override public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, int index) throws IOException { - byte[] buffer = new byte[BUFFER_SIZE]; - int amountRead; - while((amountRead = encoded.read(buffer, 0, BUFFER_SIZE)) != -1) - { - decoded.write(buffer, 0, amountRead); - } + IOUtils.copy(encoded, decoded); decoded.flush(); return new DecodeResult(parameters); } @@ -50,12 +44,7 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) throws IOException { - byte[] buffer = new byte[BUFFER_SIZE]; - int amountRead; - while((amountRead = input.read(buffer, 0, BUFFER_SIZE)) != -1) - { - encoded.write(buffer, 0, amountRead); - } + IOUtils.copy(input, encoded); encoded.flush(); } -} +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/JBIG2Filter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/JBIG2Filter.java index b557729312b..d0da9dd0569 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/JBIG2Filter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/JBIG2Filter.java @@ -25,12 +25,12 @@ import java.io.OutputStream; import java.io.SequenceInputStream; import javax.imageio.ImageIO; +import javax.imageio.ImageReadParam; import javax.imageio.ImageReader; import javax.imageio.stream.ImageInputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; @@ -39,8 +39,7 @@ * monochrome (1 bit per pixel) image data (or an approximation of that data). * * Requires a JBIG2 plugin for Java Image I/O to be installed. A known working - * plug-in is jbig2-imageio - * which is available under the GPL v3 license. + * plug-in is the Apache PDFBox JBIG2 plugin. * * @author Timo Boehme */ @@ -48,17 +47,38 @@ final class JBIG2Filter extends Filter { private static final Log LOG = LogFactory.getLog(JBIG2Filter.class); + private static boolean levigoLogged = false; + + private static synchronized void logLevigoDonated() + { + if (!levigoLogged) + { + LOG.info("The Levigo JBIG2 plugin has been donated to the Apache Foundation"); + LOG.info("and an improved version is available for download at " + + "https://pdfbox.apache.org/download.cgi"); + levigoLogged = true; + } + } + @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary + parameters, int index, DecodeOptions options) throws IOException { ImageReader reader = findImageReader("JBIG2", "jbig2-imageio is not installed"); - DecodeResult result = new DecodeResult(new COSDictionary()); - result.getParameters().addAll(parameters); + if (reader.getClass().getName().contains("levigo")) + { + logLevigoDonated(); + } - COSInteger bits = (COSInteger) parameters.getDictionaryObject(COSName.BITS_PER_COMPONENT); + int bits = parameters.getInt(COSName.BITS_PER_COMPONENT, 1); COSDictionary params = getDecodeParams(parameters, index); + ImageReadParam irp = reader.getDefaultReadParam(); + irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), + options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); + irp.setSourceRegion(options.getSourceRegion()); + options.setFilterSubsampled(true); + COSStream globals = null; if (params != null) { @@ -83,7 +103,7 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, BufferedImage image; try { - image = reader.read(0, reader.getDefaultReadParam()); + image = reader.read(0, irp); } catch (Exception e) { @@ -93,9 +113,9 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, // I am assuming since JBIG2 is always black and white // depending on your renderer this might or might be needed - if (image.getColorModel().getPixelSize() != bits.intValue()) + if (image.getColorModel().getPixelSize() != bits) { - if (bits.intValue() != 1) + if (bits != 1) { LOG.warn("Attempting to handle a JBIG2 with more than 1-bit depth"); } @@ -125,16 +145,16 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, } reader.dispose(); } - - // repair missing color space - if (!parameters.containsKey(COSName.COLORSPACE)) - { - result.getParameters().setName(COSName.COLORSPACE, COSName.DEVICEGRAY.getName()); - } - return new DecodeResult(parameters); } + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); + } + @Override protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) throws IOException diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/JPXFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/JPXFilter.java index d0b3e97d9f4..fbad128fc70 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/JPXFilter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/JPXFilter.java @@ -16,16 +16,21 @@ */ package org.apache.pdfbox.filter; +import java.awt.color.ColorSpace; import java.awt.image.BufferedImage; import java.awt.image.DataBuffer; import java.awt.image.DataBufferByte; -import java.awt.image.WritableRaster; +import java.awt.image.DataBufferUShort; +import java.awt.image.IndexColorModel; +import java.awt.image.MultiPixelPackedSampleModel; +import java.awt.image.Raster; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import javax.imageio.ImageIO; +import javax.imageio.ImageReadParam; import javax.imageio.ImageReader; import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.MemoryCacheImageInputStream; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.graphics.color.PDJPXColorSpace; @@ -47,39 +52,84 @@ */ public final class JPXFilter extends Filter { + /** + * {@inheritDoc} + */ @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary + parameters, int index, DecodeOptions options) throws IOException { DecodeResult result = new DecodeResult(new COSDictionary()); result.getParameters().addAll(parameters); - BufferedImage image = readJPX(encoded, result); + BufferedImage image = readJPX(encoded, options, result); - WritableRaster raster = image.getRaster(); - if (raster.getDataBuffer().getDataType() != DataBuffer.TYPE_BYTE) + Raster raster = image.getRaster(); + switch (raster.getDataBuffer().getDataType()) { - throw new IOException("Not implemented: greater than 8-bit depth"); + case DataBuffer.TYPE_BYTE: + DataBufferByte byteBuffer = (DataBufferByte) raster.getDataBuffer(); + decoded.write(byteBuffer.getData()); + return result; + + case DataBuffer.TYPE_USHORT: + DataBufferUShort wordBuffer = (DataBufferUShort) raster.getDataBuffer(); + for (short w : wordBuffer.getData()) + { + decoded.write(w >> 8); + decoded.write(w); + } + return result; + + case DataBuffer.TYPE_INT: + // not yet used (as of October 2018) but works as fallback + // if we decide to convert to BufferedImage.TYPE_INT_RGB + int[] ar = new int[raster.getNumBands()]; + for (int y = 0; y < image.getHeight(); ++y) + { + for (int x = 0; x < image.getWidth(); ++x) + { + raster.getPixel(x, y, ar); + for (int i = 0; i < ar.length; ++i) + { + decoded.write(ar[i]); + } + } + } + return result; + + default: + throw new IOException("Data type " + raster.getDataBuffer().getDataType() + " not implemented"); } - DataBufferByte buffer = (DataBufferByte)raster.getDataBuffer(); - decoded.write(buffer.getData()); + } - return result; + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); } // try to read using JAI Image I/O - private BufferedImage readJPX(InputStream input, DecodeResult result) throws IOException + private BufferedImage readJPX(InputStream input, DecodeOptions options, DecodeResult result) throws IOException { ImageReader reader = findImageReader("JPEG2000", "Java Advanced Imaging (JAI) Image I/O Tools are not installed"); ImageInputStream iis = null; try { - iis = ImageIO.createImageInputStream(input); + // PDFBOX-4121: ImageIO.createImageInputStream() is much slower + iis = new MemoryCacheImageInputStream(input); + reader.setInput(iis, true, true); + ImageReadParam irp = reader.getDefaultReadParam(); + irp.setSourceRegion(options.getSourceRegion()); + irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), + options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); + options.setFilterSubsampled(true); BufferedImage image; try { - image = reader.read(0); + image = reader.read(0, irp); } catch (Exception e) { @@ -103,13 +153,27 @@ private BufferedImage readJPX(InputStream input, DecodeResult result) throws IOE } // override dimensions, see PDFBOX-1735 - parameters.setInt(COSName.WIDTH, image.getWidth()); - parameters.setInt(COSName.HEIGHT, image.getHeight()); + parameters.setInt(COSName.WIDTH, reader.getWidth(0)); + parameters.setInt(COSName.HEIGHT, reader.getHeight(0)); // extract embedded color space if (!parameters.containsKey(COSName.COLORSPACE)) { - result.setColorSpace(new PDJPXColorSpace(image.getColorModel().getColorSpace())); + if (image.getSampleModel() instanceof MultiPixelPackedSampleModel && + image.getColorModel().getPixelSize() == 1 && + image.getRaster().getNumBands() == 1 && + image.getColorModel() instanceof IndexColorModel) + { + // PDFBOX-4326: + // force CS_GRAY colorspace because colorspace in IndexColorModel + // has 3 colors despite that there is only 1 color per pixel + // in raster + result.setColorSpace(new PDJPXColorSpace(ColorSpace.getInstance(ColorSpace.CS_GRAY))); + } + else + { + result.setColorSpace(new PDJPXColorSpace(image.getColorModel().getColorSpace())); + } } return image; @@ -124,6 +188,9 @@ private BufferedImage readJPX(InputStream input, DecodeResult result) throws IOE } } + /** + * {@inheritDoc} + */ @Override protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) throws IOException diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java index 92110eeb76c..c502517fb4b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java @@ -15,8 +15,6 @@ */ package org.apache.pdfbox.filter; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; @@ -67,37 +65,15 @@ public class LZWFilter extends Filter public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, int index) throws IOException { - int predictor = -1; - int earlyChange = 1; - COSDictionary decodeParams = getDecodeParams(parameters, index); - if (decodeParams != null) - { - predictor = decodeParams.getInt(COSName.PREDICTOR); - earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1); - if (earlyChange != 0 && earlyChange != 1) - { - earlyChange = 1; - } - } - if (predictor > 1) - { - @SuppressWarnings("null") - int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32); - int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8); - int columns = decodeParams.getInt(COSName.COLUMNS, 1); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - doLZWDecode(encoded, baos, earlyChange); - ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); - Predictor.decodePredictor(predictor, colors, bitsPerPixel, columns, bais, decoded); - decoded.flush(); - baos.reset(); - bais.reset(); - } - else + int earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1); + + if (earlyChange != 0 && earlyChange != 1) { - doLZWDecode(encoded, decoded, earlyChange); + earlyChange = 1; } + + doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), earlyChange); return new DecodeResult(parameters); } @@ -157,7 +133,7 @@ private void doLZWDecode(InputStream encoded, OutputStream decoded, int earlyCha decoded.flush(); } - private void checkIndexBounds(List codeTable, long index, MemoryCacheImageInputStream in) + private void checkIndexBounds(List codeTable, long index, MemoryCacheImageInputStream in) throws IOException { if (index < 0) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java index 2fe0acb8c1b..7e1364f2585 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/Predictor.java @@ -15,11 +15,13 @@ */ package org.apache.pdfbox.filter; +import java.io.FilterOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; +import java.util.Arrays; -import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; /** * Helper class to contain predictor decoding used by Flate and LZW filter. @@ -31,198 +33,179 @@ public final class Predictor private Predictor() { } - - static void decodePredictor(int predictor, int colors, int bitsPerComponent, int columns, InputStream in, OutputStream out) - throws IOException + + /** + * Decodes a single line of data in-place. + * @param predictor Predictor value for the current line + * @param colors Number of color components, from decode parameters. + * @param bitsPerComponent Number of bits per components, from decode parameters. + * @param columns Number samples in a row, from decode parameters. + * @param actline Current (active) line to decode. Data will be decoded in-place, + * i.e. - the contents of this buffer will be modified. + * @param lastline The previous decoded line. When decoding the first line, this + * parameter should be an empty byte array of the same length as + * actline. + */ + static void decodePredictorRow(int predictor, int colors, int bitsPerComponent, int columns, byte[] actline, byte[] lastline) { if (predictor == 1) { // no prediction - IOUtils.copy(in, out); + return; } - else + final int bitsPerPixel = colors * bitsPerComponent; + final int bytesPerPixel = (bitsPerPixel + 7) / 8; + final int rowlength = actline.length; + switch (predictor) { - // calculate sizes - final int bitsPerPixel = colors * bitsPerComponent; - final int bytesPerPixel = (bitsPerPixel + 7) / 8; - final int rowlength = (columns * bitsPerPixel + 7) / 8; - byte[] actline = new byte[rowlength]; - byte[] lastline = new byte[rowlength]; - - int linepredictor = predictor; - - while (in.available() > 0) - { - // test for PNG predictor; each value >= 10 (not only 15) indicates usage of PNG predictor - if (predictor >= 10) + case 2: + // PRED TIFF SUB + if (bitsPerComponent == 8) { - // PNG predictor; each row starts with predictor type (0, 1, 2, 3, 4) - // read per line predictor - linepredictor = in.read(); - if (linepredictor == -1) + // for 8 bits per component it is the same algorithm as PRED SUB of PNG format + for (int p = bytesPerPixel; p < rowlength; p++) { - return; + int sub = actline[p] & 0xff; + int left = actline[p - bytesPerPixel] & 0xff; + actline[p] = (byte) (sub + left); } - // add 10 to tread value 0 as 10, 1 as 11, ... - linepredictor += 10; + break; } - - // read line - int i, offset = 0; - while (offset < rowlength && ((i = in.read(actline, offset, rowlength - offset)) != -1)) + if (bitsPerComponent == 16) { - offset += i; + for (int p = bytesPerPixel; p < rowlength; p += 2) + { + int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff); + int left = (((actline[p - bytesPerPixel] & 0xff) << 8) + + (actline[p - bytesPerPixel + 1] & 0xff)); + actline[p] = (byte) (((sub + left) >> 8) & 0xff); + actline[p + 1] = (byte) ((sub + left) & 0xff); + } + break; } - - // do prediction as specified in PNG-Specification 1.2 - switch (linepredictor) + if (bitsPerComponent == 1 && colors == 1) { - case 2: - // PRED TIFF SUB - if (bitsPerComponent == 8) - { - // for 8 bits per component it is the same algorithm as PRED SUB of PNG format - for (int p = bytesPerPixel; p < rowlength; p++) - { - int sub = actline[p] & 0xff; - int left = actline[p - bytesPerPixel] & 0xff; - actline[p] = (byte) (sub + left); - } - break; - } - if (bitsPerComponent == 16) + // bytesPerPixel cannot be used: + // "A row shall occupy a whole number of bytes, rounded up if necessary. + // Samples and their components shall be packed into bytes + // from high-order to low-order bits." + for (int p = 0; p < rowlength; p++) + { + for (int bit = 7; bit >= 0; --bit) { - for (int p = bytesPerPixel; p < rowlength; p += 2) + int sub = (actline[p] >> bit) & 1; + if (p == 0 && bit == 7) { - int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff); - int left = (((actline[p - bytesPerPixel] & 0xff) << 8) - + (actline[p - bytesPerPixel + 1] & 0xff)); - actline[p] = (byte) (((sub + left) >> 8) & 0xff); - actline[p + 1] = (byte) ((sub + left) & 0xff); + continue; } - break; - } - if (bitsPerComponent == 1 && colors == 1) - { - // bytesPerPixel cannot be used: - // "A row shall occupy a whole number of bytes, rounded up if necessary. - // Samples and their components shall be packed into bytes - // from high-order to low-order bits." - for (int p = 0; p < rowlength; p++) + int left; + if (bit == 7) { - for (int bit = 7; bit >= 0; --bit) - { - int sub = (actline[p] >> bit) & 1; - if (p == 0 && bit == 7) - { - continue; - } - int left; - if (bit == 7) - { - // use bit #0 from previous byte - left = actline[p - 1] & 1; - } - else - { - // use "previous" bit - left = (actline[p] >> (bit + 1)) & 1; - } - if (((sub + left) & 1) == 0) - { - // reset bit - actline[p] = (byte) (actline[p] & ~(1 << bit)); - } - else - { - // set bit - actline[p] = (byte) (actline[p] | (1 << bit)); - } - } + // use bit #0 from previous byte + left = actline[p - 1] & 1; } - break; - } - // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too - int elements = columns * colors; - for (int p = colors; p < elements; ++p) - { - int bytePosSub = p * bitsPerComponent / 8; - int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent; - int bytePosLeft = (p - colors) * bitsPerComponent / 8; - int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent; - - int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent); - int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent); - actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent, sub + left); - } - break; - case 10: - // PRED NONE - // do nothing - break; - case 11: - // PRED SUB - for (int p = bytesPerPixel; p < rowlength; p++) - { - int sub = actline[p]; - int left = actline[p - bytesPerPixel]; - actline[p] = (byte) (sub + left); - } - break; - case 12: - // PRED UP - for (int p = 0; p < rowlength; p++) - { - int up = actline[p] & 0xff; - int prior = lastline[p] & 0xff; - actline[p] = (byte) ((up + prior) & 0xff); - } - break; - case 13: - // PRED AVG - for (int p = 0; p < rowlength; p++) - { - int avg = actline[p] & 0xff; - int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0; - int up = lastline[p] & 0xff; - actline[p] = (byte) ((avg + (left + up) / 2) & 0xff); - } - break; - case 14: - // PRED PAETH - for (int p = 0; p < rowlength; p++) - { - int paeth = actline[p] & 0xff; - int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left - int b = lastline[p] & 0xff;// upper - int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft - int value = a + b - c; - int absa = Math.abs(value - a); - int absb = Math.abs(value - b); - int absc = Math.abs(value - c); - - if (absa <= absb && absa <= absc) + else { - actline[p] = (byte) ((paeth + a) & 0xff); + // use "previous" bit + left = (actline[p] >> (bit + 1)) & 1; } - else if (absb <= absc) + if (((sub + left) & 1) == 0) { - actline[p] = (byte) ((paeth + b) & 0xff); + // reset bit + actline[p] &= ~(1 << bit); } else { - actline[p] = (byte) ((paeth + c) & 0xff); + // set bit + actline[p] |= 1 << bit; } } - break; - default: - break; + } + break; } - System.arraycopy(actline, 0, lastline, 0, rowlength); - out.write(actline); - } + // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too + int elements = columns * colors; + for (int p = colors; p < elements; ++p) + { + int bytePosSub = p * bitsPerComponent / 8; + int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent; + int bytePosLeft = (p - colors) * bitsPerComponent / 8; + int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent; + + int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent); + int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent); + actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent, sub + left); + } + break; + case 10: + // PRED NONE + // do nothing + break; + case 11: + // PRED SUB + for (int p = bytesPerPixel; p < rowlength; p++) + { + int sub = actline[p]; + int left = actline[p - bytesPerPixel]; + actline[p] = (byte) (sub + left); + } + break; + case 12: + // PRED UP + for (int p = 0; p < rowlength; p++) + { + int up = actline[p] & 0xff; + int prior = lastline[p] & 0xff; + actline[p] = (byte) ((up + prior) & 0xff); + } + break; + case 13: + // PRED AVG + for (int p = 0; p < rowlength; p++) + { + int avg = actline[p] & 0xff; + int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0; + int up = lastline[p] & 0xff; + actline[p] = (byte) ((avg + (left + up) / 2) & 0xff); + } + break; + case 14: + // PRED PAETH + for (int p = 0; p < rowlength; p++) + { + int paeth = actline[p] & 0xff; + int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left + int b = lastline[p] & 0xff;// upper + int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft + int value = a + b - c; + int absa = Math.abs(value - a); + int absb = Math.abs(value - b); + int absc = Math.abs(value - c); + + if (absa <= absb && absa <= absc) + { + actline[p] = (byte) ((paeth + a) & 0xff); + } + else if (absb <= absc) + { + actline[p] = (byte) ((paeth + b) & 0xff); + } + else + { + actline[p] = (byte) ((paeth + c) & 0xff); + } + } + break; + default: + break; } } + + static int calculateRowLength(int colors, int bitsPerComponent, int columns) + { + final int bitsPerPixel = colors * bitsPerComponent; + return (columns * bitsPerPixel + 7) / 8; + } // get value from bit interval from a byte static int getBitSeq(int by, int startBit, int bitSize) @@ -240,4 +223,145 @@ static int calcSetBitSeq(int by, int startBit, int bitSize, int val) return (by & mask) | (truncatedVal << startBit); } + /** + * Wraps and OutputStream in a predictor decoding stream as necessary. + * If no predictor is specified by the parameters, the original stream is returned as is. + * + * @param out The stream to which decoded data should be written + * @param decodeParams Decode parameters for the stream + * @return An OutputStream is returned, which will write decoded data + * into the given stream. If no predictor is specified, the original stream is returned. + */ + static OutputStream wrapPredictor(OutputStream out, COSDictionary decodeParams) + { + int predictor = decodeParams.getInt(COSName.PREDICTOR); + if (predictor > 1) + { + int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32); + int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8); + int columns = decodeParams.getInt(COSName.COLUMNS, 1); + + return new PredictorOutputStream(out, predictor, colors, bitsPerPixel, columns); + } + else + { + return out; + } + } + + /** + * Output stream that implements predictor decoding. Data is buffered until a complete + * row is available, which is then decoded and written to the underlying stream. + * The previous row is retained for decoding the next row. + */ + private static final class PredictorOutputStream extends FilterOutputStream + { + // current predictor type + private int predictor; + // image decode parameters + private final int colors; + private final int bitsPerComponent; + private final int columns; + private final int rowLength; + // PNG predictor (predictor>=10) means every row has a (potentially different) + // predictor value + private final boolean predictorPerRow; + + // data buffers + private byte[] currentRow; + private byte[] lastRow; + // amount of data in the current row + private int currentRowData = 0; + // was the per-row predictor value read for the current row being processed + private boolean predictorRead = false; + + PredictorOutputStream(OutputStream out, int predictor, int colors, int bitsPerComponent, int columns) + { + super(out); + this.predictor = predictor; + this.colors = colors; + this.bitsPerComponent = bitsPerComponent; + this.columns = columns; + this.rowLength = calculateRowLength(colors, bitsPerComponent, columns); + this.predictorPerRow = predictor >= 10; + currentRow = new byte[rowLength]; + lastRow = new byte[rowLength]; + } + + @Override + public void write(byte[] bytes) throws IOException + { + write(bytes, 0, bytes.length); + } + + @Override + public void write(byte[] bytes, int off, int len) throws IOException + { + int currentOffset = off; + int maxOffset = currentOffset + len; + while (currentOffset < maxOffset) + { + if (predictorPerRow && currentRowData == 0 && !predictorRead) + { + // PNG predictor; each row starts with predictor type (0, 1, 2, 3, 4) + // read per line predictor, add 10 to tread value 0 as 10, 1 as 11, ... + predictor = bytes[currentOffset] + 10; + currentOffset++; + predictorRead = true; + } + else + { + int toRead = Math.min(rowLength - currentRowData, maxOffset - currentOffset); + System.arraycopy(bytes, currentOffset, currentRow, currentRowData, toRead); + currentRowData += toRead; + currentOffset += toRead; + + // current row is filled, decode it, write it to underlying stream, + // and reset the state. + if (currentRowData == currentRow.length) + { + decodeAndWriteRow(); + } + } + } + } + + private void decodeAndWriteRow() throws IOException + { + decodePredictorRow(predictor, colors, bitsPerComponent, columns, currentRow, lastRow); + out.write(currentRow); + flipRows(); + } + + /** + * Flips the row buffers (to avoid copying), and resets the current-row index + * and predictorRead flag + */ + private void flipRows() + { + byte[] temp = lastRow; + lastRow = currentRow; + currentRow = temp; + currentRowData = 0; + predictorRead = false; + } + + @Override + public void flush() throws IOException + { + // The last row is allowed to be incomplete, and should be completed with zeros. + if (currentRowData > 0) + { + Arrays.fill(currentRow, currentRowData, rowLength, (byte)0); + decodeAndWriteRow(); + } + super.flush(); + } + + @Override + public void write(int i) throws IOException + { + throw new UnsupportedOperationException("Not supported"); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/RunLengthDecodeFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/RunLengthDecodeFilter.java index 9afc5ebecd9..c9dab1762ed 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/RunLengthDecodeFilter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/RunLengthDecodeFilter.java @@ -46,9 +46,14 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, { int amountToCopy = dupAmount + 1; int compressedRead; - while(amountToCopy > 0) + while (amountToCopy > 0) { compressedRead = encoded.read(buffer, 0, amountToCopy); + // EOF reached? + if (compressedRead == -1) + { + break; + } decoded.write(buffer, 0, compressedRead); amountToCopy -= compressedRead; } @@ -56,6 +61,11 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded, else { int dupByte = encoded.read(); + // EOF reached? + if (dupByte == -1) + { + break; + } for (int i = 0; i < 257 - dupAmount; i++) { decoded.write(dupByte); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/TIFFExtension.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/TIFFExtension.java new file mode 100644 index 00000000000..392f43feb06 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/TIFFExtension.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012, Harald Kuhr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name "TwelveMonkeys" nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.pdfbox.filter; + +/** + * TIFFExtension + * + * @author Harald Kuhr + * @author last modified by $Author: haraldk$ + * @version $Id: TIFFExtension.java,v 1.0 08.05.12 16:45 haraldk Exp$ + */ +interface TIFFExtension { + /** CCITT T.4/Group 3 Fax compression. */ + int COMPRESSION_CCITT_T4 = 3; + /** CCITT T.6/Group 4 Fax compression. */ + int COMPRESSION_CCITT_T6 = 4; + /** LZW Compression. Was baseline, but moved to extension due to license issues in the LZW algorithm. */ + int COMPRESSION_LZW = 5; + /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ + int COMPRESSION_OLD_JPEG = 6; + /** JPEG Compression (lossy). */ + int COMPRESSION_JPEG = 7; + /** Custom: PKZIP-style Deflate. */ + int COMPRESSION_DEFLATE = 32946; + /** Adobe-style Deflate. */ + int COMPRESSION_ZLIB = 8; + + int PHOTOMETRIC_SEPARATED = 5; + int PHOTOMETRIC_YCBCR = 6; + int PHOTOMETRIC_CIELAB = 8; + int PHOTOMETRIC_ICCLAB = 9; + int PHOTOMETRIC_ITULAB = 10; + + int PLANARCONFIG_PLANAR = 2; + + int PREDICTOR_HORIZONTAL_DIFFERENCING = 2; + int PREDICTOR_HORIZONTAL_FLOATINGPOINT = 3; + + int FILL_RIGHT_TO_LEFT = 2; + + int SAMPLEFORMAT_INT = 2; + int SAMPLEFORMAT_FP = 3; + int SAMPLEFORMAT_UNDEFINED = 4; + + int YCBCR_POSITIONING_CENTERED = 1; + int YCBCR_POSITIONING_COSITED = 2; + + /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ + int JPEG_PROC_BASELINE = 1; + /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ + int JPEG_PROC_LOSSLESS = 14; + + /** For use with Photometric: 5 (Separated), when image data is in CMYK color space. */ + int INKSET_CMYK = 1; + + /** + * For use with Photometric: 5 (Separated), when image data is in a color space other than CMYK. + * See {@link com.twelvemonkeys.imageio.metadata.exif.TIFF#TAG_INK_NAMES InkNames} field for a + * description of the inks to be used. + */ + int INKSET_NOT_CMYK = 2; + + int ORIENTATION_TOPRIGHT = 2; + int ORIENTATION_BOTRIGHT = 3; + int ORIENTATION_BOTLEFT = 4; + int ORIENTATION_LEFTTOP = 5; + int ORIENTATION_RIGHTTOP = 6; + int ORIENTATION_RIGHTBOT = 7; + int ORIENTATION_LEFTBOT = 8; + + int GROUP3OPT_2DENCODING = 1; + int GROUP3OPT_UNCOMPRESSED = 2; + int GROUP3OPT_FILLBITS = 4; + int GROUP3OPT_BYTEALIGNED = 8; + int GROUP4OPT_UNCOMPRESSED = 2; + int GROUP4OPT_BYTEALIGNED = 4; + int COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE = 2; + int FILL_LEFT_TO_RIGHT = 1; // Default +} + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/CCITTFaxConstants.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/CCITTFaxConstants.java deleted file mode 100644 index 0ce02b2d45c..00000000000 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/CCITTFaxConstants.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* $Id$ */ - -package org.apache.pdfbox.filter.ccitt; - -/** - * Constants for CCITT Fax Filter. - */ -final class CCITTFaxConstants -{ - /** A constant for group 3 1D encoding (ITU T.4). */ - final int COMPRESSION_GROUP3_1D = 0; - - /** A constant for group 3 2D encoding (ITU T.4). */ - final int COMPRESSION_GROUP3_2D = 1; - - /** A constant for group 4 2D encoding (ITU T.6). */ - final int COMPRESSION_GROUP4_2D = 2; - - //Format: First 8 bits: length of pattern, Second 8 bits: pattern - - /** The white terminating code words. */ - public static final short[] WHITE_TERMINATING = new short[] { - 0x0835, 0x0607, 0x0407, 0x0408, 0x040B, 0x040C, 0x040E, 0x040F, - 0x0513, 0x0514, 0x0507, 0x0508, 0x0608, 0x0603, 0x0634, 0x0635, - 0x062A, 0x062B, 0x0727, 0x070C, 0x0708, 0x0717, 0x0703, 0x0704, - 0x0728, 0x072B, 0x0713, 0x0724, 0x0718, 0x0802, 0x0803, 0x081A, - 0x081B, 0x0812, 0x0813, 0x0814, 0x0815, 0x0816, 0x0817, 0x0828, - 0x0829, 0x082A, 0x082B, 0x082C, 0x082D, 0x0804, 0x0805, 0x080A, - 0x080B, 0x0852, 0x0853, 0x0854, 0x0855, 0x0824, 0x0825, 0x0858, - 0x0859, 0x085A, 0x085B, 0x084A, 0x084B, 0x0832, 0x0833, 0x0834}; - - /** The black terminating code words. */ - public static final short[] BLACK_TERMINATING = new short[] { - 0x0A37, 0x0302, 0x0203, 0x0202, 0x0303, 0x0403, 0x0402, 0x0503, - 0x0605, 0x0604, 0x0704, 0x0705, 0x0707, 0x0804, 0x0807, 0x0918, - 0x0A17, 0x0A18, 0x0A08, 0x0B67, 0x0B68, 0x0B6C, 0x0B37, 0x0B28, - 0x0B17, 0x0B18, 0x0CCA, 0x0CCB, 0x0CCC, 0x0CCD, 0x0C68, 0x0C69, - 0x0C6A, 0x0C6B, 0x0CD2, 0x0CD3, 0x0CD4, 0x0CD5, 0x0CD6, 0x0CD7, - 0x0C6C, 0x0C6D, 0x0CDA, 0x0CDB, 0x0C54, 0x0C55, 0x0C56, 0x0C57, - 0x0C64, 0x0C65, 0x0C52, 0x0C53, 0x0C24, 0x0C37, 0x0C38, 0x0C27, - 0x0C28, 0x0C58, 0x0C59, 0x0C2B, 0x0C2C, 0x0C5A, 0x0C66, 0x0C67}; - - /** The white make-up code words. */ - public static final short[] WHITE_MAKE_UP = new short[] { - 0x051B, 0x0512, 0x0617, 0x0737, 0x0836, 0x0837, 0x0864, 0x0865, - 0x0868, 0x0867, 0x09CC, 0x09CD, 0x09D2, 0x09D3, 0x09D4, 0x09D5, - 0x09D6, 0x09D7, 0x09D8, 0x09D9, 0x09DA, 0x09DB, 0x0998, 0x0999, - 0x099A, 0x0618, 0x099B}; - - /** The black make-up code words. */ - public static final short[] BLACK_MAKE_UP = new short[] { - 0x0A0F, 0x0CC8, 0x0CC9, 0x0C5B, 0x0C33, 0x0C34, 0x0C35, 0x0D6C, - 0x0D6D, 0x0D4A, 0x0D4B, 0x0D4C, 0x0D4D, 0x0D72, 0x0D73, 0x0D74, - 0x0D75, 0x0D76, 0x0D77, 0x0D52, 0x0D53, 0x0D54, 0x0D55, 0x0D5A, - 0x0D5B, 0x0D64, 0x0D65}; - - /** The long make-up code words. */ - public static final short[] LONG_MAKE_UP = new short[] { - 0x0B08, 0x0B0C, 0x0B0D, 0x0C12, 0x0C13, 0x0C14, 0x0C15, 0x0C16, - 0x0C17, 0x0C1C, 0x0C1D, 0x0C1E, 0x0C1F}; - - /** The EOL code word. */ - public static final short EOL_CODE = 0x0C01; -} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/CCITTFaxG31DDecodeInputStream.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/CCITTFaxG31DDecodeInputStream.java deleted file mode 100644 index 5b8f3202544..00000000000 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/CCITTFaxG31DDecodeInputStream.java +++ /dev/null @@ -1,466 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* $Id$ */ - -package org.apache.pdfbox.filter.ccitt; - -import java.io.IOException; -import java.io.InputStream; - -/** - * This is a CCITT Group 3 1D decoder (ITU T.4). - */ -public final class CCITTFaxG31DDecodeInputStream extends InputStream -{ - private static final int CODE_WORD = 0; - private static final int SIGNAL_EOD = -1; - private static final int SIGNAL_EOL = -2; - - private InputStream source; - private int columns; - private int rows; - private boolean encodedByteAlign; - - //for reading compressed bits - private int bits; - private int bitPos = 8; - - //a single decoded line (one line decoded at a time, then read byte by byte) - private PackedBitArray decodedLine; - private int decodedWritePos; //write position in bits (used by the decoder algorithm) - private int decodedReadPos; //read position in bytes (used by the actual InputStream reading) - - //state - private int y = -1; //Current row/line - private int accumulatedRunLength; //Used for make-up codes - - private static final NonLeafLookupTreeNode WHITE_LOOKUP_TREE_ROOT; - private static final NonLeafLookupTreeNode BLACK_LOOKUP_TREE_ROOT; - - static { - WHITE_LOOKUP_TREE_ROOT = new NonLeafLookupTreeNode(); - BLACK_LOOKUP_TREE_ROOT = new NonLeafLookupTreeNode(); - buildLookupTree(); - } - - /** - * Creates a new decoder. - * - * @param source the input stream containing the compressed data. - * @param columns the number of columns - * @param rows the number of rows (0 if undefined) - * @param encodedByteAlign true if each encoded scan line is filled - * to a byte boundary, false if not - */ - public CCITTFaxG31DDecodeInputStream(InputStream source, int columns, int rows, boolean encodedByteAlign) - { - this.source = source; - this.columns = columns; - this.rows = rows; - this.decodedLine = new PackedBitArray(columns); - this.decodedReadPos = this.decodedLine.getByteCount(); - this.encodedByteAlign = encodedByteAlign; - } - - /** - * Creates a new decoder. - * - * @param source the input stream containing the compressed data. - * @param columns the number of columns - * @param encodedByteAlign true if each encoded scan line is filled - * to a byte boundary, false if not - */ - public CCITTFaxG31DDecodeInputStream(InputStream source, int columns, boolean encodedByteAlign) - { - this(source, columns, 0, encodedByteAlign); - } - - /** {@inheritDoc} */ - public boolean markSupported() - { - return false; - } - - /** {@inheritDoc} */ - public int read() throws IOException - { - if (this.decodedReadPos >= this.decodedLine.getByteCount()) - { - boolean hasLine = decodeLine(); - if (!hasLine) - { - return -1; - } - } - byte data = this.decodedLine.getData()[this.decodedReadPos++]; - - return data & 0xFF; - } - - //TODO Implement the other two read methods - - private boolean decodeLine() throws IOException - { - if (encodedByteAlign && this.bitPos != 0) - { - readByte(); - } - if (this.bits < 0) - { - //Shortcut after EOD - return false; - } - this.y++; - int x = 0; - if (this.rows > 0 && this.y >= this.rows) - { - //All rows decoded, ignore further bits - return false; - } - this.decodedLine.clear(); - this.decodedWritePos = 0; - int expectRTC = 6; - boolean white = true; - while (x < this.columns || this.accumulatedRunLength > 0) - { - CodeWord code; - LookupTreeNode root = white ? WHITE_LOOKUP_TREE_ROOT : BLACK_LOOKUP_TREE_ROOT; - code = root.getNextCodeWord(this); - if (code == null) - { - //no more code words (EOD) - if (x > 0) - { - //Have last line - this.decodedReadPos = 0; - return true; - } - else - { - return false; - } - } - else if (code.getType() == SIGNAL_EOL) - { - expectRTC--; - if (expectRTC == 0) - { - //Return to Control = End Of Data - return false; - } - if (x == 0) - { - //Ignore leading EOL - continue; - } - } - else - { - expectRTC = -1; - x += code.execute(this); - if (this.accumulatedRunLength == 0) - { - //Only switch if not using make-up codes - white = !white; - } - } - } - this.decodedReadPos = 0; - return true; - } - - private void writeRun(int bit, int length) - { - this.accumulatedRunLength += length; - - if (bit != 0) - { - this.decodedLine.setBits(this.decodedWritePos, this.accumulatedRunLength); - } - this.decodedWritePos += this.accumulatedRunLength; - this.accumulatedRunLength = 0; - } - - private void writeNonTerminating(int length) - { - this.accumulatedRunLength += length; - } - - private static final int[] BIT_POS_MASKS - = new int[] {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01}; - - private int readBit() throws IOException - { - if (this.bitPos >= 8) - { - readByte(); - if (this.bits < 0) - { - return SIGNAL_EOD; - } - } - return (this.bits & BIT_POS_MASKS[this.bitPos++]) == 0 ? 0 : 1; - } - - private void readByte() throws IOException - { - this.bits = this.source.read(); - this.bitPos = 0; - } - - private static final short EOL_STARTER = 0x0B00; - - private static void buildLookupTree() - { - buildUpTerminating(CCITTFaxConstants.WHITE_TERMINATING, WHITE_LOOKUP_TREE_ROOT, true); - buildUpTerminating(CCITTFaxConstants.BLACK_TERMINATING, BLACK_LOOKUP_TREE_ROOT, false); - buildUpMakeUp(CCITTFaxConstants.WHITE_MAKE_UP, WHITE_LOOKUP_TREE_ROOT); - buildUpMakeUp(CCITTFaxConstants.BLACK_MAKE_UP, BLACK_LOOKUP_TREE_ROOT); - buildUpMakeUpLong(CCITTFaxConstants.LONG_MAKE_UP, WHITE_LOOKUP_TREE_ROOT); - buildUpMakeUpLong(CCITTFaxConstants.LONG_MAKE_UP, BLACK_LOOKUP_TREE_ROOT); - LookupTreeNode eolNode = new EndOfLineTreeNode(); - addLookupTreeNode(EOL_STARTER, WHITE_LOOKUP_TREE_ROOT, eolNode); - addLookupTreeNode(EOL_STARTER, BLACK_LOOKUP_TREE_ROOT, eolNode); - } - - private static void buildUpTerminating(short[] codes, NonLeafLookupTreeNode root, boolean white) - { - for (int len = 0, c = codes.length; len < c; len++) - { - LookupTreeNode leaf = new RunLengthTreeNode(white ? 0 : 1, len); - addLookupTreeNode(codes[len], root, leaf); - } - } - - private static void buildUpMakeUp(short[] codes, NonLeafLookupTreeNode root) - { - for (int len = 0, c = codes.length; len < c; len++) - { - LookupTreeNode leaf = new MakeUpTreeNode((len + 1) * 64); - addLookupTreeNode(codes[len], root, leaf); - } - } - - private static void buildUpMakeUpLong(short[] codes, NonLeafLookupTreeNode root) - { - for (int len = 0, c = codes.length; len < c; len++) - { - LookupTreeNode leaf = new MakeUpTreeNode((len + 28) * 64); - addLookupTreeNode(codes[len], root, leaf); - } - } - - private static void addLookupTreeNode(short code, NonLeafLookupTreeNode root, - LookupTreeNode leaf) - { - int codeLength = code >> 8; - int pattern = code & 0xFF; - NonLeafLookupTreeNode node = root; - for (int p = codeLength - 1; p > 0; p--) - { - int bit = (pattern >> p) & 0x01; - LookupTreeNode child = node.get(bit); - if (child == null) - { - child = new NonLeafLookupTreeNode(); - node.set(bit, child); - } - if (child instanceof NonLeafLookupTreeNode) - { - node = (NonLeafLookupTreeNode)child; - } - else - { - throw new IllegalStateException("NonLeafLookupTreeNode expected, was " - + child.getClass().getName()); - } - } - int bit = pattern & 0x01; - if (node.get(bit) != null) - { - throw new IllegalStateException("Two codes conflicting in lookup tree"); - } - node.set(bit, leaf); - } - - /** Base class for all nodes in the lookup tree for code words. */ - private abstract static class LookupTreeNode - { - - public abstract CodeWord getNextCodeWord(CCITTFaxG31DDecodeInputStream decoder) - throws IOException; - - } - - /** Interface for code words. */ - private interface CodeWord - { - int getType(); - int execute(CCITTFaxG31DDecodeInputStream decoder) throws IOException; - } - - /** Non-leaf nodes that hold a child node for both the 0 and 1 cases for the lookup tree. */ - private static class NonLeafLookupTreeNode extends LookupTreeNode - { - - private LookupTreeNode zero; - private LookupTreeNode one; - - public void set(int bit, LookupTreeNode node) - { - if (bit == 0) - { - this.zero = node; - } - else - { - this.one = node; - } - } - - public LookupTreeNode get(int bit) - { - return (bit == 0) ? this.zero : this.one; - } - - public CodeWord getNextCodeWord(CCITTFaxG31DDecodeInputStream decoder) - throws IOException - { - int bit = decoder.readBit(); - if (bit < 0) - { - return null; - } - LookupTreeNode node = get(bit); - if (node != null) - { - return node.getNextCodeWord(decoder); - } - throw new IOException("Invalid code word encountered"); - } - - } - - /** This node represents a run length of either 0 or 1. */ - private static class RunLengthTreeNode extends LookupTreeNode implements CodeWord - { - - private final int bit; - private final int length; - - RunLengthTreeNode(int bit, int length) - { - this.bit = bit; - this.length = length; - } - - public CodeWord getNextCodeWord(CCITTFaxG31DDecodeInputStream decoder) throws IOException - { - return this; - } - - public int execute(CCITTFaxG31DDecodeInputStream decoder) - { - decoder.writeRun(this.bit, this.length); - return length; - } - - public int getType() - { - return CODE_WORD; - } - - public String toString() - { - return "Run Length for " + length + " bits of " + (bit == 0 ? "white" : "black"); - } - - } - - /** Represents a make-up code word. */ - private static class MakeUpTreeNode extends LookupTreeNode implements CodeWord - { - - private final int length; - - MakeUpTreeNode(int length) - { - this.length = length; - } - - public CodeWord getNextCodeWord(CCITTFaxG31DDecodeInputStream decoder) throws IOException - { - return this; - } - - public int execute(CCITTFaxG31DDecodeInputStream decoder) throws IOException - { - decoder.writeNonTerminating(length); - return length; - } - - public int getType() - { - return CODE_WORD; - } - - public String toString() - { - return "Make up code for length " + length; - } - - } - - /** Represents an EOL code word. */ - private static class EndOfLineTreeNode extends LookupTreeNode implements CodeWord - { - - public CodeWord getNextCodeWord(CCITTFaxG31DDecodeInputStream decoder) throws IOException - { - int bit; - do - { - bit = decoder.readBit(); - //bit 1 finishes the EOL, any number of bit 0 allowed as fillers - } while (bit == 0); - if (bit < 0) - { - return null; - } - return this; - } - - public int execute(CCITTFaxG31DDecodeInputStream decoder) throws IOException - { - //nop - return 0; - } - - public int getType() - { - return SIGNAL_EOL; - } - - public String toString() - { - return "EOL"; - } - - } - -} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/FillOrderChangeInputStream.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/FillOrderChangeInputStream.java deleted file mode 100644 index 95f90aee747..00000000000 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/FillOrderChangeInputStream.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* $Id$ */ - -package org.apache.pdfbox.filter.ccitt; - -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; - -/** - * This filtering input stream does a fill order change required for certain TIFF images. - */ -public final class FillOrderChangeInputStream extends FilterInputStream -{ - /** - * Main constructor. - * @param in the underlying input stream - */ - public FillOrderChangeInputStream(InputStream in) - { - super(in); - } - - /** {@inheritDoc} */ - @Override - public int read(byte[] b, int off, int len) throws IOException - { - int result = super.read(b, off, len); - if (result > 0) - { - int endpos = off + result; - for (int i = off; i < endpos; i++) - { - b[i] = FLIP_TABLE[b[i] & 0xff]; - } - } - return result; - } - - /** {@inheritDoc} */ - @Override - public int read() throws IOException - { - int b = super.read(); - if (b < 0) - { - return b; - } - else - { - return FLIP_TABLE[b] & 0xff; - } - } - - // Table to be used when fillOrder = 2, for flipping bytes. - // Copied from the TIFFFaxDecoder class - private static final byte[] FLIP_TABLE = { - 0, -128, 64, -64, 32, -96, 96, -32, - 16, -112, 80, -48, 48, -80, 112, -16, - 8, -120, 72, -56, 40, -88, 104, -24, - 24, -104, 88, -40, 56, -72, 120, -8, - 4, -124, 68, -60, 36, -92, 100, -28, - 20, -108, 84, -44, 52, -76, 116, -12, - 12, -116, 76, -52, 44, -84, 108, -20, - 28, -100, 92, -36, 60, -68, 124, -4, - 2, -126, 66, -62, 34, -94, 98, -30, - 18, -110, 82, -46, 50, -78, 114, -14, - 10, -118, 74, -54, 42, -86, 106, -22, - 26, -102, 90, -38, 58, -70, 122, -6, - 6, -122, 70, -58, 38, -90, 102, -26, - 22, -106, 86, -42, 54, -74, 118, -10, - 14, -114, 78, -50, 46, -82, 110, -18, - 30, -98, 94, -34, 62, -66, 126, -2, - 1, -127, 65, -63, 33, -95, 97, -31, - 17, -111, 81, -47, 49, -79, 113, -15, - 9, -119, 73, -55, 41, -87, 105, -23, - 25, -103, 89, -39, 57, -71, 121, -7, - 5, -123, 69, -59, 37, -91, 101, -27, - 21, -107, 85, -43, 53, -75, 117, -11, - 13, -115, 77, -51, 45, -83, 109, -19, - 29, -99, 93, -35, 61, -67, 125, -3, - 3, -125, 67, -61, 35, -93, 99, -29, - 19, -109, 83, -45, 51, -77, 115, -13, - 11, -117, 75, -53, 43, -85, 107, -21, - 27, -101, 91, -37, 59, -69, 123, -5, - 7, -121, 71, -57, 39, -89, 103, -25, - 23, -105, 87, -41, 55, -73, 119, -9, - 15, -113, 79, -49, 47, -81, 111, -17, - 31, -97, 95, -33, 63, -65, 127, -1, - }; - // end -} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/PackedBitArray.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/PackedBitArray.java deleted file mode 100644 index ec316556a06..00000000000 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/PackedBitArray.java +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* $Id$ */ - -package org.apache.pdfbox.filter.ccitt; - -/** - * Represents an array of bits packed in a byte array of fixed size. - */ -final class PackedBitArray -{ - private int bitCount; - private byte[] data; - - /** - * Constructs a new bit array. - * @param bitCount the number of bits to maintain - */ - PackedBitArray(int bitCount) - { - this.bitCount = bitCount; - int byteCount = (bitCount + 7) / 8; - this.data = new byte[byteCount]; - } - - private int byteOffset(int offset) - { - return offset / 8; - } - - private int bitOffset(int offset) - { - return offset % 8; - } - - /** - * Sets a bit at the given offset. - * @param offset the offset - */ - public void set(int offset) - { - int byteOffset = byteOffset(offset); - this.data[byteOffset] |= 1 << bitOffset(offset); - } - - /** - * Clears a bit at the given offset. - * @param offset the offset - */ - public void clear(int offset) - { - int byteOffset = byteOffset(offset); - int bitOffset = bitOffset(offset); - this.data[byteOffset] &= ~(1 << bitOffset); - } - - /** - * Sets a run of bits at the given offset to either 1 or 0. - * @param offset the offset - * @param length the number of bits to set - * @param bit 1 to set the bit, 0 to clear it - */ - public void setBits(int offset, int length, int bit) - { - if (bit == 0) - { - clearBits(offset, length); - } - else - { - setBits(offset, length); - } - } - - /** - * Sets a run of bits at the given offset to either 1. - * @param offset the offset - * @param length the number of bits to set - */ - public void setBits(int offset, int length) - { - if (length == 0) - { - return; - } - int startBitOffset = bitOffset(offset); - int firstByte = byteOffset(offset); - int lastBitOffset = offset + length; - if (lastBitOffset > getBitCount()) - { - throw new IndexOutOfBoundsException("offset + length > bit count"); - } - int lastByte = byteOffset(lastBitOffset); - int endBitOffset = bitOffset(lastBitOffset); - - if (firstByte == lastByte) - { - //Only one byte affected - int mask = (1 << endBitOffset) - (1 << startBitOffset); - this.data[firstByte] |= mask; - } - else - { - //Bits spanning multiple bytes - this.data[firstByte] |= 0xFF << startBitOffset; - for (int i = firstByte + 1; i < lastByte; i++) - { - this.data[i] = (byte)0xFF; - } - if (endBitOffset > 0) - { - this.data[lastByte] |= 0xFF >> (8 - endBitOffset); - } - } - } - - /** - * Clears a run of bits at the given offset. - * @param offset the offset - * @param length the number of bits to clear - */ - public void clearBits(int offset, int length) - { - if (length == 0) - { - return; - } - int startBitOffset = offset % 8; - int firstByte = byteOffset(offset); - int lastBitOffset = offset + length; - int lastByte = byteOffset(lastBitOffset); - int endBitOffset = lastBitOffset % 8; - - if (firstByte == lastByte) - { - //Only one byte affected - int mask = (1 << endBitOffset) - (1 << startBitOffset); - this.data[firstByte] &= ~mask; - } - else - { - //Bits spanning multiple bytes - this.data[firstByte] &= ~(0xFF << startBitOffset); - for (int i = firstByte + 1; i < lastByte; i++) - { - this.data[i] = 0x00; - } - if (endBitOffset > 0) - { - this.data[lastByte] &= ~(0xFF >> (8 - endBitOffset)); - } - } - } - - /** - * Clear all bits in the array. - */ - public void clear() - { - clearBits(0, getBitCount()); - } - - /** - * Returns the number of bits maintained by this array. - * @return the number of bits - */ - public int getBitCount() - { - return this.bitCount; - } - - /** - * Returns the size of the byte buffer for this array. - * @return the size of the byte buffer - */ - public int getByteCount() - { - return this.data.length; - } - - /** - * Returns the underlying byte buffer. - *

- * Note: the actual buffer is returned. If it's manipulated - * the content of the bit array changes. - * @return the underlying data buffer - */ - public byte[] getData() - { - return this.data; - } - - /** {@inheritDoc} */ - public String toString() - { - return toBitString(this.data).substring(0, this.bitCount); - } - - /** - * Converts a byte to a "binary" String of 0s and 1s. - * @param data the value to convert - * @return the binary string - */ - public static String toBitString(byte data) - { - byte[] buf = new byte[] {data}; - return toBitString(buf); - } - - /** - * Converts a series of bytes to a "binary" String of 0s and 1s. - * @param data the data - * @return the binary string - */ - public static String toBitString(byte[] data) - { - return toBitString(data, 0, data.length); - } - - /** - * Converts a series of bytes to a "binary" String of 0s and 1s. - * @param data the data - * @param start the start offset - * @param len the number of bytes to convert - * @return the binary string - */ - public static String toBitString(byte[] data, int start, int len) - { - StringBuffer sb = new StringBuffer(); - for (int x = start, end = start + len; x < end; x++) - { - for (int i = 0; i < 8; i++) - { - int mask = 1 << i; - int value = data[x] & mask; - sb.append(value != 0 ? '1' : '0'); - } - } - return sb.toString(); - } - -} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/TIFFFaxDecoder.java b/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/TIFFFaxDecoder.java deleted file mode 100644 index 717fb8a8088..00000000000 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/TIFFFaxDecoder.java +++ /dev/null @@ -1,1599 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* $Id$ */ - -package org.apache.pdfbox.filter.ccitt; - -import java.io.IOException; - -/** - * CCITT Fax decoder. - */ -public final class TIFFFaxDecoder -{ - - private int bitPointer, bytePointer; - private byte[] data; - private int w, h; - private int fillOrder; - - // Data structures needed to store changing elements for the previous - // and the current scanline - private int changingElemSize = 0; - private int[] prevChangingElems; - private int[] currChangingElems; - - // Element at which to start search in getNextChangingElement - private int lastChangingElement = 0; - - private int compression = 2; - - // Variables set by T4Options - private int uncompressedMode = 0; - private int fillBits = 0; - private int oneD; - - private static final int[] TABLE1 = { 0x00, // 0 bits are left in first byte - SHOULD NOT HAPPEN - 0x01, // 1 bits are left in first byte - 0x03, // 2 bits are left in first byte - 0x07, // 3 bits are left in first byte - 0x0f, // 4 bits are left in first byte - 0x1f, // 5 bits are left in first byte - 0x3f, // 6 bits are left in first byte - 0x7f, // 7 bits are left in first byte - 0xff // 8 bits are left in first byte - }; - - private static final int[] TABLE2 = { 0x00, // 0 - 0x80, // 1 - 0xc0, // 2 - 0xe0, // 3 - 0xf0, // 4 - 0xf8, // 5 - 0xfc, // 6 - 0xfe, // 7 - 0xff // 8 - }; - - // Table to be used when fillOrder = 2, for flipping bytes. - private static final byte[] FLIP_TABLE = {}; - - // The main 10 bit white runs lookup table - private static final short[] WHITE = { - // 0 - 7 - 6430, 6400, 6400, 6400, 3225, 3225, 3225, 3225, - // 8 - 15 - 944, 944, 944, 944, 976, 976, 976, 976, - // 16 - 23 - 1456, 1456, 1456, 1456, 1488, 1488, 1488, 1488, - // 24 - 31 - 718, 718, 718, 718, 718, 718, 718, 718, - // 32 - 39 - 750, 750, 750, 750, 750, 750, 750, 750, - // 40 - 47 - 1520, 1520, 1520, 1520, 1552, 1552, 1552, 1552, - // 48 - 55 - 428, 428, 428, 428, 428, 428, 428, 428, - // 56 - 63 - 428, 428, 428, 428, 428, 428, 428, 428, - // 64 - 71 - 654, 654, 654, 654, 654, 654, 654, 654, - // 72 - 79 - 1072, 1072, 1072, 1072, 1104, 1104, 1104, 1104, - // 80 - 87 - 1136, 1136, 1136, 1136, 1168, 1168, 1168, 1168, - // 88 - 95 - 1200, 1200, 1200, 1200, 1232, 1232, 1232, 1232, - // 96 - 103 - 622, 622, 622, 622, 622, 622, 622, 622, - // 104 - 111 - 1008, 1008, 1008, 1008, 1040, 1040, 1040, 1040, - // 112 - 119 - 44, 44, 44, 44, 44, 44, 44, 44, - // 120 - 127 - 44, 44, 44, 44, 44, 44, 44, 44, - // 128 - 135 - 396, 396, 396, 396, 396, 396, 396, 396, - // 136 - 143 - 396, 396, 396, 396, 396, 396, 396, 396, - // 144 - 151 - 1712, 1712, 1712, 1712, 1744, 1744, 1744, 1744, - // 152 - 159 - 846, 846, 846, 846, 846, 846, 846, 846, - // 160 - 167 - 1264, 1264, 1264, 1264, 1296, 1296, 1296, 1296, - // 168 - 175 - 1328, 1328, 1328, 1328, 1360, 1360, 1360, 1360, - // 176 - 183 - 1392, 1392, 1392, 1392, 1424, 1424, 1424, 1424, - // 184 - 191 - 686, 686, 686, 686, 686, 686, 686, 686, - // 192 - 199 - 910, 910, 910, 910, 910, 910, 910, 910, - // 200 - 207 - 1968, 1968, 1968, 1968, 2000, 2000, 2000, 2000, - // 208 - 215 - 2032, 2032, 2032, 2032, 16, 16, 16, 16, - // 216 - 223 - 10257, 10257, 10257, 10257, 12305, 12305, 12305, 12305, - // 224 - 231 - 330, 330, 330, 330, 330, 330, 330, 330, - // 232 - 239 - 330, 330, 330, 330, 330, 330, 330, 330, - // 240 - 247 - 330, 330, 330, 330, 330, 330, 330, 330, - // 248 - 255 - 330, 330, 330, 330, 330, 330, 330, 330, - // 256 - 263 - 362, 362, 362, 362, 362, 362, 362, 362, - // 264 - 271 - 362, 362, 362, 362, 362, 362, 362, 362, - // 272 - 279 - 362, 362, 362, 362, 362, 362, 362, 362, - // 280 - 287 - 362, 362, 362, 362, 362, 362, 362, 362, - // 288 - 295 - 878, 878, 878, 878, 878, 878, 878, 878, - // 296 - 303 - 1904, 1904, 1904, 1904, 1936, 1936, 1936, 1936, - // 304 - 311 - -18413, -18413, -16365, -16365, -14317, -14317, -10221, -10221, - // 312 - 319 - 590, 590, 590, 590, 590, 590, 590, 590, - // 320 - 327 - 782, 782, 782, 782, 782, 782, 782, 782, - // 328 - 335 - 1584, 1584, 1584, 1584, 1616, 1616, 1616, 1616, - // 336 - 343 - 1648, 1648, 1648, 1648, 1680, 1680, 1680, 1680, - // 344 - 351 - 814, 814, 814, 814, 814, 814, 814, 814, - // 352 - 359 - 1776, 1776, 1776, 1776, 1808, 1808, 1808, 1808, - // 360 - 367 - 1840, 1840, 1840, 1840, 1872, 1872, 1872, 1872, - // 368 - 375 - 6157, 6157, 6157, 6157, 6157, 6157, 6157, 6157, - // 376 - 383 - 6157, 6157, 6157, 6157, 6157, 6157, 6157, 6157, - // 384 - 391 - -12275, -12275, -12275, -12275, -12275, -12275, -12275, -12275, - // 392 - 399 - -12275, -12275, -12275, -12275, -12275, -12275, -12275, -12275, - // 400 - 407 - 14353, 14353, 14353, 14353, 16401, 16401, 16401, 16401, - // 408 - 415 - 22547, 22547, 24595, 24595, 20497, 20497, 20497, 20497, - // 416 - 423 - 18449, 18449, 18449, 18449, 26643, 26643, 28691, 28691, - // 424 - 431 - 30739, 30739, -32749, -32749, -30701, -30701, -28653, -28653, - // 432 - 439 - -26605, -26605, -24557, -24557, -22509, -22509, -20461, -20461, - // 440 - 447 - 8207, 8207, 8207, 8207, 8207, 8207, 8207, 8207, - // 448 - 455 - 72, 72, 72, 72, 72, 72, 72, 72, - // 456 - 463 - 72, 72, 72, 72, 72, 72, 72, 72, - // 464 - 471 - 72, 72, 72, 72, 72, 72, 72, 72, - // 472 - 479 - 72, 72, 72, 72, 72, 72, 72, 72, - // 480 - 487 - 72, 72, 72, 72, 72, 72, 72, 72, - // 488 - 495 - 72, 72, 72, 72, 72, 72, 72, 72, - // 496 - 503 - 72, 72, 72, 72, 72, 72, 72, 72, - // 504 - 511 - 72, 72, 72, 72, 72, 72, 72, 72, - // 512 - 519 - 104, 104, 104, 104, 104, 104, 104, 104, - // 520 - 527 - 104, 104, 104, 104, 104, 104, 104, 104, - // 528 - 535 - 104, 104, 104, 104, 104, 104, 104, 104, - // 536 - 543 - 104, 104, 104, 104, 104, 104, 104, 104, - // 544 - 551 - 104, 104, 104, 104, 104, 104, 104, 104, - // 552 - 559 - 104, 104, 104, 104, 104, 104, 104, 104, - // 560 - 567 - 104, 104, 104, 104, 104, 104, 104, 104, - // 568 - 575 - 104, 104, 104, 104, 104, 104, 104, 104, - // 576 - 583 - 4107, 4107, 4107, 4107, 4107, 4107, 4107, 4107, - // 584 - 591 - 4107, 4107, 4107, 4107, 4107, 4107, 4107, 4107, - // 592 - 599 - 4107, 4107, 4107, 4107, 4107, 4107, 4107, 4107, - // 600 - 607 - 4107, 4107, 4107, 4107, 4107, 4107, 4107, 4107, - // 608 - 615 - 266, 266, 266, 266, 266, 266, 266, 266, - // 616 - 623 - 266, 266, 266, 266, 266, 266, 266, 266, - // 624 - 631 - 266, 266, 266, 266, 266, 266, 266, 266, - // 632 - 639 - 266, 266, 266, 266, 266, 266, 266, 266, - // 640 - 647 - 298, 298, 298, 298, 298, 298, 298, 298, - // 648 - 655 - 298, 298, 298, 298, 298, 298, 298, 298, - // 656 - 663 - 298, 298, 298, 298, 298, 298, 298, 298, - // 664 - 671 - 298, 298, 298, 298, 298, 298, 298, 298, - // 672 - 679 - 524, 524, 524, 524, 524, 524, 524, 524, - // 680 - 687 - 524, 524, 524, 524, 524, 524, 524, 524, - // 688 - 695 - 556, 556, 556, 556, 556, 556, 556, 556, - // 696 - 703 - 556, 556, 556, 556, 556, 556, 556, 556, - // 704 - 711 - 136, 136, 136, 136, 136, 136, 136, 136, - // 712 - 719 - 136, 136, 136, 136, 136, 136, 136, 136, - // 720 - 727 - 136, 136, 136, 136, 136, 136, 136, 136, - // 728 - 735 - 136, 136, 136, 136, 136, 136, 136, 136, - // 736 - 743 - 136, 136, 136, 136, 136, 136, 136, 136, - // 744 - 751 - 136, 136, 136, 136, 136, 136, 136, 136, - // 752 - 759 - 136, 136, 136, 136, 136, 136, 136, 136, - // 760 - 767 - 136, 136, 136, 136, 136, 136, 136, 136, - // 768 - 775 - 168, 168, 168, 168, 168, 168, 168, 168, - // 776 - 783 - 168, 168, 168, 168, 168, 168, 168, 168, - // 784 - 791 - 168, 168, 168, 168, 168, 168, 168, 168, - // 792 - 799 - 168, 168, 168, 168, 168, 168, 168, 168, - // 800 - 807 - 168, 168, 168, 168, 168, 168, 168, 168, - // 808 - 815 - 168, 168, 168, 168, 168, 168, 168, 168, - // 816 - 823 - 168, 168, 168, 168, 168, 168, 168, 168, - // 824 - 831 - 168, 168, 168, 168, 168, 168, 168, 168, - // 832 - 839 - 460, 460, 460, 460, 460, 460, 460, 460, - // 840 - 847 - 460, 460, 460, 460, 460, 460, 460, 460, - // 848 - 855 - 492, 492, 492, 492, 492, 492, 492, 492, - // 856 - 863 - 492, 492, 492, 492, 492, 492, 492, 492, - // 864 - 871 - 2059, 2059, 2059, 2059, 2059, 2059, 2059, 2059, - // 872 - 879 - 2059, 2059, 2059, 2059, 2059, 2059, 2059, 2059, - // 880 - 887 - 2059, 2059, 2059, 2059, 2059, 2059, 2059, 2059, - // 888 - 895 - 2059, 2059, 2059, 2059, 2059, 2059, 2059, 2059, - // 896 - 903 - 200, 200, 200, 200, 200, 200, 200, 200, - // 904 - 911 - 200, 200, 200, 200, 200, 200, 200, 200, - // 912 - 919 - 200, 200, 200, 200, 200, 200, 200, 200, - // 920 - 927 - 200, 200, 200, 200, 200, 200, 200, 200, - // 928 - 935 - 200, 200, 200, 200, 200, 200, 200, 200, - // 936 - 943 - 200, 200, 200, 200, 200, 200, 200, 200, - // 944 - 951 - 200, 200, 200, 200, 200, 200, 200, 200, - // 952 - 959 - 200, 200, 200, 200, 200, 200, 200, 200, - // 960 - 967 - 232, 232, 232, 232, 232, 232, 232, 232, - // 968 - 975 - 232, 232, 232, 232, 232, 232, 232, 232, - // 976 - 983 - 232, 232, 232, 232, 232, 232, 232, 232, - // 984 - 991 - 232, 232, 232, 232, 232, 232, 232, 232, - // 992 - 999 - 232, 232, 232, 232, 232, 232, 232, 232, - // 1000 - 1007 - 232, 232, 232, 232, 232, 232, 232, 232, - // 1008 - 1015 - 232, 232, 232, 232, 232, 232, 232, 232, - // 1016 - 1023 - 232, 232, 232, 232, 232, 232, 232, 232, }; - - // Additional make up codes for both White and Black runs - private static final short[] ADDITIONAL_MAKEUP = { 28679, 28679, 31752, (short) 32777, - (short) 33801, (short) 34825, (short) 35849, (short) 36873, (short) 29703, - (short) 29703, (short) 30727, (short) 30727, (short) 37897, (short) 38921, - (short) 39945, (short) 40969 }; - - // Initial black run look up table, uses the first 4 bits of a code - private static final short[] INIT_BLACK = { - // 0 - 7 - 3226, 6412, 200, 168, 38, 38, 134, 134, - // 8 - 15 - 100, 100, 100, 100, 68, 68, 68, 68 }; - - // - private static final short[] TWO_BIT_BLACK = { 292, 260, 226, 226 }; // 0 - 3 - - // Main black run table, using the last 9 bits of possible 13 bit code - private static final short[] BLACK = { - // 0 - 7 - 62, 62, 30, 30, 0, 0, 0, 0, - // 8 - 15 - 0, 0, 0, 0, 0, 0, 0, 0, - // 16 - 23 - 0, 0, 0, 0, 0, 0, 0, 0, - // 24 - 31 - 0, 0, 0, 0, 0, 0, 0, 0, - // 32 - 39 - 3225, 3225, 3225, 3225, 3225, 3225, 3225, 3225, - // 40 - 47 - 3225, 3225, 3225, 3225, 3225, 3225, 3225, 3225, - // 48 - 55 - 3225, 3225, 3225, 3225, 3225, 3225, 3225, 3225, - // 56 - 63 - 3225, 3225, 3225, 3225, 3225, 3225, 3225, 3225, - // 64 - 71 - 588, 588, 588, 588, 588, 588, 588, 588, - // 72 - 79 - 1680, 1680, 20499, 22547, 24595, 26643, 1776, 1776, - // 80 - 87 - 1808, 1808, -24557, -22509, -20461, -18413, 1904, 1904, - // 88 - 95 - 1936, 1936, -16365, -14317, 782, 782, 782, 782, - // 96 - 103 - 814, 814, 814, 814, -12269, -10221, 10257, 10257, - // 104 - 111 - 12305, 12305, 14353, 14353, 16403, 18451, 1712, 1712, - // 112 - 119 - 1744, 1744, 28691, 30739, -32749, -30701, -28653, -26605, - // 120 - 127 - 2061, 2061, 2061, 2061, 2061, 2061, 2061, 2061, - // 128 - 135 - 424, 424, 424, 424, 424, 424, 424, 424, - // 136 - 143 - 424, 424, 424, 424, 424, 424, 424, 424, - // 144 - 151 - 424, 424, 424, 424, 424, 424, 424, 424, - // 152 - 159 - 424, 424, 424, 424, 424, 424, 424, 424, - // 160 - 167 - 750, 750, 750, 750, 1616, 1616, 1648, 1648, - // 168 - 175 - 1424, 1424, 1456, 1456, 1488, 1488, 1520, 1520, - // 176 - 183 - 1840, 1840, 1872, 1872, 1968, 1968, 8209, 8209, - // 184 - 191 - 524, 524, 524, 524, 524, 524, 524, 524, - // 192 - 199 - 556, 556, 556, 556, 556, 556, 556, 556, - // 200 - 207 - 1552, 1552, 1584, 1584, 2000, 2000, 2032, 2032, - // 208 - 215 - 976, 976, 1008, 1008, 1040, 1040, 1072, 1072, - // 216 - 223 - 1296, 1296, 1328, 1328, 718, 718, 718, 718, - // 224 - 231 - 456, 456, 456, 456, 456, 456, 456, 456, - // 232 - 239 - 456, 456, 456, 456, 456, 456, 456, 456, - // 240 - 247 - 456, 456, 456, 456, 456, 456, 456, 456, - // 248 - 255 - 456, 456, 456, 456, 456, 456, 456, 456, - // 256 - 263 - 326, 326, 326, 326, 326, 326, 326, 326, - // 264 - 271 - 326, 326, 326, 326, 326, 326, 326, 326, - // 272 - 279 - 326, 326, 326, 326, 326, 326, 326, 326, - // 280 - 287 - 326, 326, 326, 326, 326, 326, 326, 326, - // 288 - 295 - 326, 326, 326, 326, 326, 326, 326, 326, - // 296 - 303 - 326, 326, 326, 326, 326, 326, 326, 326, - // 304 - 311 - 326, 326, 326, 326, 326, 326, 326, 326, - // 312 - 319 - 326, 326, 326, 326, 326, 326, 326, 326, - // 320 - 327 - 358, 358, 358, 358, 358, 358, 358, 358, - // 328 - 335 - 358, 358, 358, 358, 358, 358, 358, 358, - // 336 - 343 - 358, 358, 358, 358, 358, 358, 358, 358, - // 344 - 351 - 358, 358, 358, 358, 358, 358, 358, 358, - // 352 - 359 - 358, 358, 358, 358, 358, 358, 358, 358, - // 360 - 367 - 358, 358, 358, 358, 358, 358, 358, 358, - // 368 - 375 - 358, 358, 358, 358, 358, 358, 358, 358, - // 376 - 383 - 358, 358, 358, 358, 358, 358, 358, 358, - // 384 - 391 - 490, 490, 490, 490, 490, 490, 490, 490, - // 392 - 399 - 490, 490, 490, 490, 490, 490, 490, 490, - // 400 - 407 - 4113, 4113, 6161, 6161, 848, 848, 880, 880, - // 408 - 415 - 912, 912, 944, 944, 622, 622, 622, 622, - // 416 - 423 - 654, 654, 654, 654, 1104, 1104, 1136, 1136, - // 424 - 431 - 1168, 1168, 1200, 1200, 1232, 1232, 1264, 1264, - // 432 - 439 - 686, 686, 686, 686, 1360, 1360, 1392, 1392, - // 440 - 447 - 12, 12, 12, 12, 12, 12, 12, 12, - // 448 - 455 - 390, 390, 390, 390, 390, 390, 390, 390, - // 456 - 463 - 390, 390, 390, 390, 390, 390, 390, 390, - // 464 - 471 - 390, 390, 390, 390, 390, 390, 390, 390, - // 472 - 479 - 390, 390, 390, 390, 390, 390, 390, 390, - // 480 - 487 - 390, 390, 390, 390, 390, 390, 390, 390, - // 488 - 495 - 390, 390, 390, 390, 390, 390, 390, 390, - // 496 - 503 - 390, 390, 390, 390, 390, 390, 390, 390, - // 504 - 511 - 390, 390, 390, 390, 390, 390, 390, 390, }; - - private static final byte[] TWO_DCODES = { - // 0 - 7 - 80, 88, 23, 71, 30, 30, 62, 62, - // 8 - 15 - 4, 4, 4, 4, 4, 4, 4, 4, - // 16 - 23 - 11, 11, 11, 11, 11, 11, 11, 11, - // 24 - 31 - 11, 11, 11, 11, 11, 11, 11, 11, - // 32 - 39 - 35, 35, 35, 35, 35, 35, 35, 35, - // 40 - 47 - 35, 35, 35, 35, 35, 35, 35, 35, - // 48 - 55 - 51, 51, 51, 51, 51, 51, 51, 51, - // 56 - 63 - 51, 51, 51, 51, 51, 51, 51, 51, - // 64 - 71 - 41, 41, 41, 41, 41, 41, 41, 41, - // 72 - 79 - 41, 41, 41, 41, 41, 41, 41, 41, - // 80 - 87 - 41, 41, 41, 41, 41, 41, 41, 41, - // 88 - 95 - 41, 41, 41, 41, 41, 41, 41, 41, - // 96 - 103 - 41, 41, 41, 41, 41, 41, 41, 41, - // 104 - 111 - 41, 41, 41, 41, 41, 41, 41, 41, - // 112 - 119 - 41, 41, 41, 41, 41, 41, 41, 41, - // 120 - 127 - 41, 41, 41, 41, 41, 41, 41, 41, }; - - /** - * @param fillOrderValue The fill order of the compressed data bytes. - * @param width The width of the image in pixels - * @param height The height of the image in pixels - */ - public TIFFFaxDecoder(int fillOrderValue, int width, int height) - { - fillOrder = fillOrderValue; - w = width; - h = height; - - bitPointer = 0; - bytePointer = 0; - prevChangingElems = new int[width + 1]; - currChangingElems = new int[width + 1]; - } - - // One-dimensional decoding methods - - public void decode1D(byte[] buffer, byte[] compData, int startX, int height) throws IOException - { - this.data = compData; - - int lineOffset = 0; - int scanlineStride = (w + 7) / 8; - - bitPointer = 0; - bytePointer = 0; - - for (int i = 0; i < height; i++) - { - decodeNextScanline(buffer, lineOffset, startX); - lineOffset += scanlineStride; - } - } - - public void decodeNextScanline(byte[] buffer, int lineOffset, int bitOffset) throws IOException - { - int bits = 0, code = 0, isT = 0; - int current, entry, twoBits; - boolean isWhite = true; - - // Initialize starting of the changing elements array - changingElemSize = 0; - - // While scanline not complete - while (bitOffset < w) - { - while (isWhite) - { - // White run - current = nextNBits(10); - entry = WHITE[current]; - - // Get the 3 fields from the entry - isT = entry & 0x0001; - bits = (entry >>> 1) & 0x0f; - - if (bits == 12) - { // Additional Make up code - // Get the next 2 bits - twoBits = nextLesserThan8Bits(2); - // Consolidate the 2 new bits and last 2 bits into 4 bits - current = ((current << 2) & 0x000c) | twoBits; - entry = ADDITIONAL_MAKEUP[current]; - bits = (entry >>> 1) & 0x07; // 3 bits 0000 0111 - code = (entry >>> 4) & 0x0fff; // 12 bits - bitOffset += code; // Skip white run - - updatePointer(4 - bits); - } - else if (bits == 0) - { // ERROR - throw new IOException("TIFFFaxDecoder: Invalid code encountered."); - } - else if (bits == 15) - { // EOL - throw new IOException("TIFFFaxDecoder: EOL encountered in white run."); - } - else - { - // 11 bits - 0000 0111 1111 1111 = 0x07ff - code = (entry >>> 5) & 0x07ff; - bitOffset += code; - - updatePointer(10 - bits); - if (isT == 0) - { - isWhite = false; - currChangingElems[changingElemSize++] = bitOffset; - } - } - } - - // Check whether this run completed one width, if so - // advance to next byte boundary for compression = 2. - if (bitOffset == w) - { - if (compression == 2) - { - advancePointer(); - } - break; - } - - while (!isWhite) - { - // Black run - current = nextLesserThan8Bits(4); - entry = INIT_BLACK[current]; - - // Get the 3 fields from the entry - isT = entry & 0x0001; - bits = (entry >>> 1) & 0x000f; - code = (entry >>> 5) & 0x07ff; - - if (code == 100) - { - current = nextNBits(9); - entry = BLACK[current]; - - // Get the 3 fields from the entry - isT = entry & 0x0001; - bits = (entry >>> 1) & 0x000f; - code = (entry >>> 5) & 0x07ff; - - if (bits == 12) - { - // Additional makeup codes - updatePointer(5); - current = nextLesserThan8Bits(4); - entry = ADDITIONAL_MAKEUP[current]; - bits = (entry >>> 1) & 0x07; // 3 bits 0000 0111 - code = (entry >>> 4) & 0x0fff; // 12 bits - - setToBlack(buffer, lineOffset, bitOffset, code); - bitOffset += code; - - updatePointer(4 - bits); - } - else if (bits == 15) - { - // EOL code - throw new IOException("TIFFFaxDecoder: EOL encountered in black run."); - } - else - { - setToBlack(buffer, lineOffset, bitOffset, code); - bitOffset += code; - - updatePointer(9 - bits); - if (isT == 0) - { - isWhite = true; - currChangingElems[changingElemSize++] = bitOffset; - } - } - } - else if (code == 200) - { - // Is a Terminating code - current = nextLesserThan8Bits(2); - entry = TWO_BIT_BLACK[current]; - code = (entry >>> 5) & 0x07ff; - bits = (entry >>> 1) & 0x0f; - - setToBlack(buffer, lineOffset, bitOffset, code); - bitOffset += code; - - updatePointer(2 - bits); - isWhite = true; - currChangingElems[changingElemSize++] = bitOffset; - } - else - { - // Is a Terminating code - setToBlack(buffer, lineOffset, bitOffset, code); - bitOffset += code; - - updatePointer(4 - bits); - isWhite = true; - currChangingElems[changingElemSize++] = bitOffset; - } - } - - // Check whether this run completed one width - if (bitOffset == w) - { - if (compression == 2) - { - advancePointer(); - } - break; - } - } - - currChangingElems[changingElemSize++] = bitOffset; - } - - // Two-dimensional decoding methods - - public void decode2D(byte[] buffer, byte[] compData, int startX, int height, long tiffT4Options) - throws IOException - { - this.data = compData; - compression = 3; - - bitPointer = 0; - bytePointer = 0; - - int scanlineStride = (w + 7) / 8; - - int a0, a1, b1, b2; - int[] b = new int[2]; - int entry, code, bits; - boolean isWhite; - int currIndex = 0; - int[] temp; - - // fillBits - dealt with this in readEOL - // 1D/2D encoding - dealt with this in readEOL - - // uncompressedMode - haven't dealt with this yet. - - oneD = (int) (tiffT4Options & 0x01); - uncompressedMode = (int) ((tiffT4Options & 0x02) >> 1); - fillBits = (int) ((tiffT4Options & 0x04) >> 2); - - // The data must start with an EOL code - if (readEOL() != 1) - { - throw new IOException("TIFFFaxDecoder: First scanline must be 1D encoded."); - } - - int lineOffset = 0; - int bitOffset; - - // Then the 1D encoded scanline data will occur, changing elements - // array gets set. - decodeNextScanline(buffer, lineOffset, startX); - lineOffset += scanlineStride; - - for (int lines = 1; lines < height; lines++) - { - - // Every line must begin with an EOL followed by a bit which - // indicates whether the following scanline is 1D or 2D encoded. - if (readEOL() == 0) - { - // 2D encoded scanline follows - - // Initialize previous scanlines changing elements, and - // initialize current scanline's changing elements array - temp = prevChangingElems; - prevChangingElems = currChangingElems; - currChangingElems = temp; - currIndex = 0; - - // a0 has to be set just before the start of this scanline. - a0 = -1; - isWhite = true; - bitOffset = startX; - - lastChangingElement = 0; - - while (bitOffset < w) - { - // Get the next changing element - getNextChangingElement(a0, isWhite, b); - - b1 = b[0]; - b2 = b[1]; - - // Get the next seven bits - entry = nextLesserThan8Bits(7); - - // Run these through the 2DCodes table - entry = (TWO_DCODES[entry] & 0xff); - - // Get the code and the number of bits used up - code = (entry & 0x78) >>> 3; - bits = entry & 0x07; - - if (code == 0) - { - if (!isWhite) - { - setToBlack(buffer, lineOffset, bitOffset, b2 - bitOffset); - } - bitOffset = a0 = b2; - - // Set pointer to consume the correct number of bits. - updatePointer(7 - bits); - } - else if (code == 1) - { - // Horizontal - updatePointer(7 - bits); - - // identify the next 2 codes. - int number; - if (isWhite) - { - number = decodeWhiteCodeWord(); - bitOffset += number; - currChangingElems[currIndex++] = bitOffset; - - number = decodeBlackCodeWord(); - setToBlack(buffer, lineOffset, bitOffset, number); - bitOffset += number; - currChangingElems[currIndex++] = bitOffset; - } - else - { - number = decodeBlackCodeWord(); - setToBlack(buffer, lineOffset, bitOffset, number); - bitOffset += number; - currChangingElems[currIndex++] = bitOffset; - - number = decodeWhiteCodeWord(); - bitOffset += number; - currChangingElems[currIndex++] = bitOffset; - } - - a0 = bitOffset; - } - else if (code <= 8) - { - // Vertical - a1 = b1 + (code - 5); - - currChangingElems[currIndex++] = a1; - - // We write the current color till a1 - 1 pos, - // since a1 is where the next color starts - if (!isWhite) - { - setToBlack(buffer, lineOffset, bitOffset, a1 - bitOffset); - } - bitOffset = a0 = a1; - isWhite = !isWhite; - - updatePointer(7 - bits); - } - else - { - throw new IOException( - "TIFFFaxDecoder: Invalid code encountered while decoding 2D group 3 compressed data."); - } - } - - // Add the changing element beyond the current scanline for the - // other color too - currChangingElems[currIndex++] = bitOffset; - changingElemSize = currIndex; - } - else - { - // 1D encoded scanline follows - decodeNextScanline(buffer, lineOffset, startX); - } - - lineOffset += scanlineStride; - } - } - - public synchronized void decodeT6(byte[] buffer, byte[] compData, int startX, int height, - long tiffT6Options, boolean encodedByteAlign) throws IOException - { - this.data = compData; - compression = 4; - - bitPointer = 0; - bytePointer = 0; - - int scanlineStride = (w + 7) / 8; - - int a0, a1, b1, b2; - int entry, code, bits; - boolean isWhite; - int currIndex; - int[] temp; - - // Return values from getNextChangingElement - int[] b = new int[2]; - - // uncompressedMode - have written some code for this, but this - // has not been tested due to lack of test images using this optional - - uncompressedMode = (int) ((tiffT6Options & 0x02) >> 1); - - // Local cached reference - int[] cce = currChangingElems; - - // Assume invisible preceding row of all white pixels and insert - // both black and white changing elements beyond the end of this - // imaginary scanline. - changingElemSize = 0; - cce[changingElemSize++] = w; - cce[changingElemSize++] = w; - - int lineOffset = 0; - int bitOffset; - - for (int lines = 0; lines < height; lines++) - { - if (encodedByteAlign && bitPointer != 0) - { - bitPointer = 0; - bytePointer++; - } - // a0 has to be set just before the start of the scanline. - a0 = -1; - isWhite = true; - - // Assign the changing elements of the previous scanline to - // prevChangingElems and start putting this new scanline's - // changing elements into the currChangingElems. - temp = prevChangingElems; - prevChangingElems = currChangingElems; - cce = currChangingElems = temp; - currIndex = 0; - - // Start decoding the scanline at startX in the raster - bitOffset = startX; - - // Reset search start position for getNextChangingElement - lastChangingElement = 0; - - // Till one whole scanline is decoded - while (bitOffset < w) - { - // Get the next changing element - getNextChangingElement(a0, isWhite, b); - b1 = b[0]; - b2 = b[1]; - - // Get the next seven bits - entry = nextLesserThan8Bits(7); - // Run these through the 2DCodes table - entry = (TWO_DCODES[entry] & 0xff); - - // Get the code and the number of bits used up - code = (entry & 0x78) >>> 3; - bits = entry & 0x07; - - if (code == 0) - { // Pass - // We always assume WhiteIsZero format for fax. - if (!isWhite) - { - setToBlack(buffer, lineOffset, bitOffset, b2 - bitOffset); - } - bitOffset = a0 = b2; - - // Set pointer to only consume the correct number of bits. - updatePointer(7 - bits); - } - else if (code == 1) - { // Horizontal - // Set pointer to only consume the correct number of bits. - updatePointer(7 - bits); - - // identify the next 2 alternating color codes. - int number; - if (isWhite) - { - // Following are white and black runs - number = decodeWhiteCodeWord(); - bitOffset += number; - cce[currIndex++] = bitOffset; - - number = decodeBlackCodeWord(); - setToBlack(buffer, lineOffset, bitOffset, number); - bitOffset += number; - cce[currIndex++] = bitOffset; - } - else - { - // First a black run and then a white run follows - number = decodeBlackCodeWord(); - setToBlack(buffer, lineOffset, bitOffset, number); - bitOffset += number; - cce[currIndex++] = bitOffset; - - number = decodeWhiteCodeWord(); - bitOffset += number; - cce[currIndex++] = bitOffset; - } - - a0 = bitOffset; - } - else if (code <= 8) - { // Vertical - a1 = b1 + (code - 5); - cce[currIndex++] = a1; - - // We write the current color till a1 - 1 pos, - // since a1 is where the next color starts - if (!isWhite) - { - setToBlack(buffer, lineOffset, bitOffset, a1 - bitOffset); - } - bitOffset = a0 = a1; - isWhite = !isWhite; - - updatePointer(7 - bits); - } - else if (code == 11) - { - if (nextLesserThan8Bits(3) != 7) - { - throw new IOException( - "TIFFFaxDecoder: Invalid code encountered while decoding 2D group 4 compressed data."); - } - - int zeros = 0; - boolean exit = false; - - while (!exit) - { - while (nextLesserThan8Bits(1) != 1) - { - zeros++; - } - - if (zeros > 5) - { - // Exit code - - // Zeros before exit code - zeros = zeros - 6; - - if (!isWhite && (zeros > 0)) - { - cce[currIndex++] = bitOffset; - } - - // Zeros before the exit code - bitOffset += zeros; - if (zeros > 0) - { - // Some zeros have been written - isWhite = true; - } - - // Read in the bit which specifies the color of - // the following run - if (nextLesserThan8Bits(1) == 0) - { - if (!isWhite) - { - cce[currIndex++] = bitOffset; - } - isWhite = true; - } - else - { - if (isWhite) - { - cce[currIndex++] = bitOffset; - } - isWhite = false; - } - - exit = true; - } - - if (zeros == 5) - { - if (!isWhite) - { - cce[currIndex++] = bitOffset; - } - bitOffset += zeros; - - // Last thing written was white - isWhite = true; - } - else - { - bitOffset += zeros; - - cce[currIndex++] = bitOffset; - setToBlack(buffer, lineOffset, bitOffset, 1); - ++bitOffset; - - // Last thing written was black - isWhite = false; - } - - } - } - else - { - throw new IOException( - "TIFFFaxDecoder: Invalid code encountered while decoding 2D group 4 compressed data."); - } - } - - // workaround for PDFBOX-1916, it is not clear whether the - // code in the class is to blame or if the PDF is corrupt - if (cce.length == currIndex) - { - break; - } - - // Add the changing element beyond the current scanline for the - // other color too - cce[currIndex++] = bitOffset; - - // Number of changing elements in this scanline. - changingElemSize = currIndex; - - lineOffset += scanlineStride; - } - } - - private void setToBlack(byte[] buffer, int lineOffset, int bitOffset, int numBits) - { - int bitNum = 8 * lineOffset + bitOffset; - int lastBit = bitNum + numBits; - - int byteNum = bitNum >> 3; - - // Handle bits in first byte - int shift = bitNum & 0x7; - if (shift > 0) - { - int maskVal = 1 << (7 - shift); - byte val = buffer[byteNum]; - while (maskVal > 0 && bitNum < lastBit) - { - val |= maskVal; - maskVal >>= 1; - ++bitNum; - } - buffer[byteNum] = val; - } - - // Fill in 8 bits at a time - byteNum = bitNum >> 3; - while (bitNum < lastBit - 7) - { - buffer[byteNum++] = (byte) 255; - bitNum += 8; - } - - // Fill in remaining bits - while (bitNum < lastBit) - { - byteNum = bitNum >> 3; - buffer[byteNum] |= 1 << (7 - (bitNum & 0x7)); - ++bitNum; - } - } - - // Returns run length - private int decodeWhiteCodeWord() throws IOException - { - int current, entry, bits, isT, twoBits, code = -1; - int runLength = 0; - boolean isWhite = true; - - while (isWhite) - { - current = nextNBits(10); - entry = WHITE[current]; - - // Get the 3 fields from the entry - isT = entry & 0x0001; - bits = (entry >>> 1) & 0x0f; - - if (bits == 12) - { // Additional Make up code - // Get the next 2 bits - twoBits = nextLesserThan8Bits(2); - // Consolidate the 2 new bits and last 2 bits into 4 bits - current = ((current << 2) & 0x000c) | twoBits; - entry = ADDITIONAL_MAKEUP[current]; - bits = (entry >>> 1) & 0x07; // 3 bits 0000 0111 - code = (entry >>> 4) & 0x0fff; // 12 bits - runLength += code; - updatePointer(4 - bits); - } - else if (bits == 0) - { // ERROR - throw new IOException("TIFFFaxDecoder: Invalid code encountered."); - } - else if (bits == 15) - { // EOL - throw new IOException("TIFFFaxDecoder: EOL encountered in white run."); - } - else - { - // 11 bits - 0000 0111 1111 1111 = 0x07ff - code = (entry >>> 5) & 0x07ff; - runLength += code; - updatePointer(10 - bits); - if (isT == 0) - { - isWhite = false; - } - } - } - - return runLength; - } - - // Returns run length - private int decodeBlackCodeWord() throws IOException - { - int current, entry, bits, isT, code = -1; - int runLength = 0; - boolean isWhite = false; - - while (!isWhite) - { - current = nextLesserThan8Bits(4); - entry = INIT_BLACK[current]; - - // Get the 3 fields from the entry - isT = entry & 0x0001; - bits = (entry >>> 1) & 0x000f; - code = (entry >>> 5) & 0x07ff; - - if (code == 100) - { - current = nextNBits(9); - entry = BLACK[current]; - - // Get the 3 fields from the entry - isT = entry & 0x0001; - bits = (entry >>> 1) & 0x000f; - code = (entry >>> 5) & 0x07ff; - - if (bits == 12) - { - // Additional makeup codes - updatePointer(5); - current = nextLesserThan8Bits(4); - entry = ADDITIONAL_MAKEUP[current]; - bits = (entry >>> 1) & 0x07; // 3 bits 0000 0111 - code = (entry >>> 4) & 0x0fff; // 12 bits - runLength += code; - - updatePointer(4 - bits); - } - else if (bits == 15) - { - // EOL code - throw new IOException("TIFFFaxDecoder: EOL encountered in black run."); - } - else - { - runLength += code; - updatePointer(9 - bits); - if (isT == 0) - { - isWhite = true; - } - } - } - else if (code == 200) - { - // Is a Terminating code - current = nextLesserThan8Bits(2); - entry = TWO_BIT_BLACK[current]; - code = (entry >>> 5) & 0x07ff; - runLength += code; - bits = (entry >>> 1) & 0x0f; - updatePointer(2 - bits); - isWhite = true; - } - else - { - // Is a Terminating code - runLength += code; - updatePointer(4 - bits); - isWhite = true; - } - } - - return runLength; - } - - private int readEOL() throws IOException - { - if (fillBits == 0) - { - if (nextNBits(12) != 1) - { - throw new IOException("TIFFFaxDecoder: Scanline must begin with EOL."); - } - } - else if (fillBits == 1) - { - - // First EOL code word xxxx 0000 0000 0001 will occur - // As many fill bits will be present as required to make - // the EOL code of 12 bits end on a byte boundary. - - int bitsLeft = 8 - bitPointer; - - if (nextNBits(bitsLeft) != 0) - { - throw new IOException("TIFFFaxDecoder: All fill bits preceding EOL code must be 0."); - } - - // If the number of bitsLeft is less than 8, then to have a 12 - // bit EOL sequence, two more bytes are certainly going to be - // required. The first of them has to be all zeros, so ensure - // that. - if (bitsLeft < 4) - { - if (nextNBits(8) != 0) - { - throw new IOException( - "TIFFFaxDecoder: All fill bits preceding EOL code must be 0."); - } - } - - // There might be a random number of fill bytes with 0s, so - // loop till the EOL of 0000 0001 is found, as long as all - // the bytes preceding it are 0's. - int n; - while ((n = nextNBits(8)) != 1) - { - - // If not all zeros - if (n != 0) - { - throw new IOException( - "TIFFFaxDecoder: All fill bits preceding EOL code must be 0."); - } - } - } - - // If one dimensional encoding mode, then always return 1 - if (oneD == 0) - { - return 1; - } - else - { - // Otherwise for 2D encoding mode, - // The next one bit signifies 1D/2D encoding of next line. - return nextLesserThan8Bits(1); - } - } - - private void getNextChangingElement(int a0, boolean isWhite, int[] ret) - { - // Local copies of instance variables - int[] pce = this.prevChangingElems; - int ces = this.changingElemSize; - - // If the previous match was at an odd element, we still - // have to search the preceeding element. - // int start = lastChangingElement & ~0x1; - int start = lastChangingElement > 0 ? lastChangingElement - 1 : 0; - if (isWhite) - { - start &= ~0x1; // Search even numbered elements - } - else - { - start |= 0x1; // Search odd numbered elements - } - - int i = start; - for (; i < ces; i += 2) - { - int temp = pce[i]; - if (temp > a0) - { - lastChangingElement = i; - ret[0] = temp; - break; - } - } - - if (i + 1 < ces) - { - ret[1] = pce[i + 1]; - } - } - - private int nextNBits(int bitsToGet) throws IOException - { - byte b, next, next2next; - int l = data.length - 1; - int bp = this.bytePointer; - - if (fillOrder == 1) - { - b = data[bp]; - - if (bp == l) - { - next = 0x00; - next2next = 0x00; - } - else if ((bp + 1) == l) - { - next = data[bp + 1]; - next2next = 0x00; - } - else - { - next = data[bp + 1]; - next2next = data[bp + 2]; - } - } - else if (fillOrder == 2) - { - b = FLIP_TABLE[data[bp] & 0xff]; - - if (bp == l) - { - next = 0x00; - next2next = 0x00; - } - else if ((bp + 1) == l) - { - next = FLIP_TABLE[data[bp + 1] & 0xff]; - next2next = 0x00; - } - else - { - next = FLIP_TABLE[data[bp + 1] & 0xff]; - next2next = FLIP_TABLE[data[bp + 2] & 0xff]; - } - } - else - { - throw new IOException("TIFFFaxDecoder: TIFF_FILL_ORDER tag must be either 1 or 2."); - } - - int bitsLeft = 8 - bitPointer; - int bitsFromNextByte = bitsToGet - bitsLeft; - int bitsFromNext2NextByte = 0; - if (bitsFromNextByte > 8) - { - bitsFromNext2NextByte = bitsFromNextByte - 8; - bitsFromNextByte = 8; - } - - bytePointer++; - - int i1 = (b & TABLE1[bitsLeft]) << (bitsToGet - bitsLeft); - int i2 = (next & TABLE2[bitsFromNextByte]) >>> (8 - bitsFromNextByte); - - int i3 = 0; - if (bitsFromNext2NextByte != 0) - { - i2 <<= bitsFromNext2NextByte; - i3 = (next2next & TABLE2[bitsFromNext2NextByte]) >>> (8 - bitsFromNext2NextByte); - i2 |= i3; - bytePointer++; - bitPointer = bitsFromNext2NextByte; - } - else - { - if (bitsFromNextByte == 8) - { - bitPointer = 0; - bytePointer++; - } - else - { - bitPointer = bitsFromNextByte; - } - } - - int i = i1 | i2; - return i; - } - - private int nextLesserThan8Bits(int bitsToGet) throws IOException - { - byte b, next; - int l = data.length - 1; - int bp = this.bytePointer; - - if (fillOrder == 1) - { - b = data[bp]; - if (bp == l) - { - next = 0x00; - } - else - { - next = data[bp + 1]; - } - } - else if (fillOrder == 2) - { - b = FLIP_TABLE[data[bp] & 0xff]; - if (bp == l) - { - next = 0x00; - } - else - { - next = FLIP_TABLE[data[bp + 1] & 0xff]; - } - } - else - { - throw new IOException("TIFFFaxDecoder: TIFF_FILL_ORDER tag must be either 1 or 2."); - } - - int bitsLeft = 8 - bitPointer; - int bitsFromNextByte = bitsToGet - bitsLeft; - - int shift = bitsLeft - bitsToGet; - int i1, i2; - if (shift >= 0) - { - i1 = (b & TABLE1[bitsLeft]) >>> shift; - bitPointer += bitsToGet; - if (bitPointer == 8) - { - bitPointer = 0; - bytePointer++; - } - } - else - { - i1 = (b & TABLE1[bitsLeft]) << (-shift); - i2 = (next & TABLE2[bitsFromNextByte]) >>> (8 - bitsFromNextByte); - - i1 |= i2; - bytePointer++; - bitPointer = bitsFromNextByte; - } - - return i1; - } - - // Move pointer backwards by given amount of bits - private void updatePointer(int bitsToMoveBack) - { - int i = bitPointer - bitsToMoveBack; - - if (i < 0) - { - bytePointer--; - bitPointer = 8 + i; - } - else - { - bitPointer = i; - } - } - - // Move to the next byte boundary - private boolean advancePointer() - { - if (bitPointer != 0) - { - bytePointer++; - bitPointer = 0; - } - - return true; - } -} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/package.html b/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/package.html deleted file mode 100644 index cd5971ae3e9..00000000000 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/ccitt/package.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - -This package contains CCITT encoders and decoders. -This refers to the ITU T.4 (Group 3 Fax) and T.6 (Group 4 Fax) specifications. - - diff --git a/pdfbox/src/main/java/org/apache/pdfbox/filter/package.html b/pdfbox/src/main/java/org/apache/pdfbox/filter/package.html index 5576f9200ab..1d1f771a2b2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/filter/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/filter/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/io/IOUtils.java b/pdfbox/src/main/java/org/apache/pdfbox/io/IOUtils.java index 576031bea91..85223e66239 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/io/IOUtils.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/io/IOUtils.java @@ -25,6 +25,8 @@ import java.io.InputStream; import java.io.OutputStream; +import org.apache.commons.logging.Log; + /** * This class contains various I/O-related methods. */ @@ -115,4 +117,33 @@ public static void closeQuietly(Closeable closeable) // ignore } } + + /** + * Try to close an IO resource and log and return if there was an exception. + * + *

An exception is only returned if the IOException passed in is null. + * + * @param closeable to be closed + * @param logger the logger to be used so that logging appears under that log instance + * @param resourceName the name to appear in the log output + * @param initialException if set, this exception will be returned even where there is another + * exception while closing the IO resource * @return the IOException is there was any but only + * if initialException is null + */ + public static IOException closeAndLogException(Closeable closeable, Log logger, String resourceName, IOException initialException) + { + try + { + closeable.close(); + } + catch (IOException ioe) + { + logger.warn("Error closing " + resourceName, ioe); + if (initialException == null) + { + return ioe; + } + } + return initialException; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java b/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java index f2b3c207a7a..951a154b466 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBuffer.java @@ -16,6 +16,7 @@ */ package org.apache.pdfbox.io; +import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -243,7 +244,7 @@ public int read(byte[] b, int offset, int length) throws IOException checkClosed(); if (pointer >= size) { - return 0; + return -1; } int bytesRead = readRemainingBytes(b, offset, length); while (bytesRead < length && available() > 0) @@ -257,12 +258,8 @@ public int read(byte[] b, int offset, int length) throws IOException return bytesRead; } - private int readRemainingBytes(byte[] b, int offset, int length) throws IOException + private int readRemainingBytes(byte[] b, int offset, int length) { - if (pointer >= size) - { - return 0; - } int maxLength = (int) Math.min(length, size-pointer); int remainingBytes = chunkSize - currentBufferPointer; // no more bytes left @@ -430,7 +427,7 @@ private void nextBuffer() throws IOException /** * Ensure that the RandomAccessBuffer is not closed - * @throws IOException + * @throws IOException if the RandomAccessBuffer is already closed */ private void checkClosed() throws IOException { @@ -500,13 +497,18 @@ public void rewind(int bytes) throws IOException @Override public byte[] readFully(int length) throws IOException { - byte[] b = new byte[length]; - int bytesRead = read(b); - while (bytesRead < length) + byte[] bytes = new byte[length]; + int bytesRead = 0; + do { - bytesRead += read(b, bytesRead, length - bytesRead); - } - return b; + int count = read(bytes, bytesRead, length - bytesRead); + if (count < 0) + { + throw new EOFException(); + } + bytesRead += count; + } while (bytesRead < length); + return bytes; } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java b/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java index 4957d9ca5ed..68cf9adbe95 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java @@ -16,6 +16,7 @@ */ package org.apache.pdfbox.io; +import java.io.EOFException; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -336,13 +337,18 @@ public void rewind(int bytes) throws IOException @Override public byte[] readFully(int length) throws IOException { - byte[] b = new byte[length]; - int bytesRead = read(b); - while(bytesRead < length) + byte[] bytes = new byte[length]; + int bytesRead = 0; + do { - bytesRead += read(b, bytesRead, length-bytesRead); - } - return b; + int count = read(bytes, bytesRead, length - bytesRead); + if (count < 0) + { + throw new EOFException(); + } + bytesRead += count; + } while (bytesRead < length); + return bytes; } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessInputStream.java b/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessInputStream.java index 2d962f41117..cfae6c6c16a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessInputStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessInputStream.java @@ -19,6 +19,9 @@ import java.io.InputStream; import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + /** * An InputStream which reads from a RandomAccessRead. * @@ -27,6 +30,8 @@ */ public class RandomAccessInputStream extends InputStream { + private static final Log LOG = LogFactory.getLog(RandomAccessInputStream.class); + private final RandomAccessRead input; private long position; @@ -68,7 +73,17 @@ public int read() throws IOException return -1; } int b = input.read(); - position += 1; + if (b != -1) + { + position += 1; + } + else + { + // should never happen due to prior isEOF() check + // unless there is an unsynchronized concurrent access + LOG.error("read() returns -1, assumed position: " + + position + ", actual position: " + input.getPosition()); + } return b; } @@ -81,7 +96,17 @@ public int read(byte[] b, int off, int len) throws IOException return -1; } int n = input.read(b, off, len); - position += n; + if (n != -1) + { + position += n; + } + else + { + // should never happen due to prior isEOF() check + // unless there is an unsynchronized concurrent access + LOG.error("read() returns -1, assumed position: " + + position + ", actual position: " + input.getPosition()); + } return n; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java b/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java index f30c9dc6a3a..6fc88564f9c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java @@ -193,7 +193,7 @@ int getNewPage() throws IOException * if no new pages could be added because we reached the maximum of * {@link Integer#MAX_VALUE} pages. * - *

If scratch file uage is allowed and scratch file does not exist already + *

If scratch file usage is allowed and scratch file does not exist already * it will be created.

* *

Only to be called under synchronization on {@link #freePages}.

@@ -234,16 +234,37 @@ private void enlarge() throws IOException if (expectedFileLen != fileLen) { - throw new IOException("Expected scratch file size of " + expectedFileLen + " but found " + fileLen); + throw new IOException("Expected scratch file size of " + expectedFileLen + + " but found " + fileLen + " in file " + file); } - // enlarge if we do not overflow + // enlarge if we do not int overflow if (pageCount + ENLARGE_PAGE_COUNT > pageCount) { + if (LOG.isDebugEnabled()) + { + LOG.debug("file: " + file); + LOG.debug("fileLen before: " + fileLen + ", raf length: " + raf.length() + + ", file length: " + file.length()); + } fileLen += ENLARGE_PAGE_COUNT * PAGE_SIZE; - + raf.setLength(fileLen); - + if (LOG.isDebugEnabled()) + { + LOG.debug("fileLen after1: " + fileLen + ", raf length: " + raf.length() + + ", file length: " + file.length()); + } + if (fileLen != raf.length()) + { + // PDFBOX-4601 possible AWS lambda bug that setLength() doesn't throw + // if not enough space + long origFilePointer = raf.getFilePointer(); + raf.seek(fileLen - 1); + raf.write(0); + raf.seek(origFilePointer); + LOG.debug("fileLen after2: " + fileLen + ", raf length: " + raf.length() + ", file length: " + file.length()); + } freePages.set(pageCount, pageCount + ENLARGE_PAGE_COUNT); } } @@ -408,6 +429,7 @@ public RandomAccess createBuffer() throws IOException * data read from provided input stream (input stream is copied to buffer). * The buffer data pointer is reset to point to first byte. * + * @param input The input stream that is to be copied into the buffer. * @return A new buffer containing data read from input stream. * * @throws IOException If an error occurred. @@ -417,7 +439,7 @@ public RandomAccess createBuffer(InputStream input) throws IOException ScratchFileBuffer buf = new ScratchFileBuffer(this); byte[] byteBuffer = new byte[8192]; - int bytesRead = 0; + int bytesRead; while ((bytesRead = input.read(byteBuffer)) > -1) { buf.write(byteBuffer, 0, bytesRead); @@ -487,15 +509,9 @@ public void close() throws IOException } } - if (file != null) + if (file != null && !file.delete() && file.exists() && ioexc == null) { - if (!file.delete()) - { - if (file.exists() && (ioexc == null)) - { - ioexc = new IOException("Error deleting scratch file: " + file.getAbsolutePath()); - } - } + ioexc = new IOException("Error deleting scratch file: " + file.getAbsolutePath()); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java b/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java index 1bee5421a4b..c43b80f2421 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java @@ -324,6 +324,10 @@ public void seek(long seekToPosition) throws IOException } int newPagePosition = (int) (seekToPosition / pageSize); + if (seekToPosition % pageSize == 0 && seekToPosition == size) + { + newPagePosition--; // PDFBOX-4756: Prevent seeking a non-yet-existent page... + } currentPage = pageHandler.readPage(pageIndexes[newPagePosition]); currentPagePositionInPageIndexes = newPagePosition; @@ -368,22 +372,20 @@ public void rewind(int bytes) throws IOException * {@inheritDoc} */ @Override - public byte[] readFully(int len) throws IOException + public byte[] readFully(int length) throws IOException { - byte[] b = new byte[len]; - - int n = 0; + byte[] bytes = new byte[length]; + int bytesRead = 0; do { - int count = read(b, n, len - n); + int count = read(bytes, bytesRead, length - bytesRead); if (count < 0) { throw new EOFException(); } - n += count; - } while (n < len); - - return b; + bytesRead += count; + } while (bytesRead < length); + return bytes; } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/io/package.html b/pdfbox/src/main/java/org/apache/pdfbox/io/package.html index 5ac24bd748e..7baf9b09af5 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/io/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/io/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/LayerUtility.java b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/LayerUtility.java index 8ed5b7d67c4..0d8d09e0db0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/LayerUtility.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/LayerUtility.java @@ -20,8 +20,11 @@ import java.io.IOException; import java.io.OutputStream; import java.util.Arrays; +import java.util.HashSet; import java.util.Map; import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.cos.COSArray; @@ -43,12 +46,14 @@ import org.apache.pdfbox.util.Matrix; /** - * This class allows to import pages as Form XObjects into a PDF file and use them to create - * layers (optional content groups). - * + * This class allows to import pages as Form XObjects into a document and use them to create layers + * (optional content groups). It should used only on loaded documents, not on generated documents + * because these can contain unfinished parts, e.g. font subsetting information. */ public class LayerUtility { + private static final Log LOG = LogFactory.getLog(LayerUtility.class); + private static final boolean DEBUG = true; private final PDDocument targetDoc; @@ -123,6 +128,10 @@ else if( contents instanceof COSArray ) /** * Imports a page from some PDF file as a Form XObject so it can be placed on another page * in the target document. + *

+ * You may want to call {@link #wrapInSaveRestore(PDPage) wrapInSaveRestore(PDPage)} before invoking the Form XObject to + * make sure that the graphics state is reset. + * * @param sourceDoc the source PDF document that contains the page to be copied * @param pageNumber the page number of the page to be copied * @return a Form XObject containing the original page's content @@ -134,12 +143,16 @@ public PDFormXObject importPageAsForm(PDDocument sourceDoc, int pageNumber) thro return importPageAsForm(sourceDoc, page); } - private static final Set PAGE_TO_FORM_FILTER = new java.util.HashSet( - Arrays.asList(new String[] {"Group", "LastModified", "Metadata"})); + private static final Set PAGE_TO_FORM_FILTER = + new HashSet(Arrays.asList("Group", "LastModified", "Metadata")); /** * Imports a page from some PDF file as a Form XObject so it can be placed on another page * in the target document. + *

+ * You may want to call {@link #wrapInSaveRestore(PDPage) wrapInSaveRestore(PDPage)} before invoking the Form XObject to + * make sure that the graphics state is reset. + * * @param sourceDoc the source PDF document that contains the page to be copied * @param page the page in the source PDF document to be copied * @return a Form XObject containing the original page's content @@ -147,6 +160,8 @@ public PDFormXObject importPageAsForm(PDDocument sourceDoc, int pageNumber) thro */ public PDFormXObject importPageAsForm(PDDocument sourceDoc, PDPage page) throws IOException { + importOcProperties(sourceDoc); + PDStream newStream = new PDStream(targetDoc, page.getContents(), COSName.FLATE_DECODE); PDFormXObject form = new PDFormXObject(newStream); @@ -187,6 +202,7 @@ public PDFormXObject importPageAsForm(PDDocument sourceDoc, PDPage page) throws at.scale(viewBox.getWidth() / viewBox.getHeight(), viewBox.getHeight() / viewBox.getWidth()); at.translate(viewBox.getHeight(), 0); at.rotate(-Math.PI * 1.5); + break; default: //no additional transformations necessary } @@ -212,9 +228,15 @@ public PDFormXObject importPageAsForm(PDDocument sourceDoc, PDPage page) throws * The form is enveloped in a marked content section to indicate that it's part of an * optional content group (OCG), here used as a layer. This optional group is returned and * can be enabled and disabled through methods on {@link PDOptionalContentProperties}. + *

+ * You may want to call {@link #wrapInSaveRestore(PDPage) wrapInSaveRestore(PDPage)} before calling this method to make + * sure that the graphics state is reset. + * * @param targetPage the target page * @param form the form to place - * @param transform the transformation matrix that controls the placement + * @param transform the transformation matrix that controls the placement of your form. You'll + * need this if your page has a crop box different than the media box, or if these have negative + * coordinates, or if you want to scale or adjust your form. * @param layerName the name for the layer/OCG to produce * @return the optional content group that was generated for the form usage * @throws IOException if an I/O error occurs @@ -235,6 +257,14 @@ public PDOptionalContentGroup appendFormAsLayer(PDPage targetPage, throw new IllegalArgumentException("Optional group (layer) already exists: " + layerName); } + PDRectangle cropBox = targetPage.getCropBox(); + if ((cropBox.getLowerLeftX() < 0 || cropBox.getLowerLeftY() < 0) && transform.isIdentity()) + { + // PDFBOX-4044 + LOG.warn("Negative cropBox " + cropBox + + " and identity transform may make your form invisible"); + } + PDOptionalContentGroup layer = new PDOptionalContentGroup(layerName); ocprops.addGroup(layer); @@ -269,4 +299,34 @@ else if (!inclusive && filter.contains(key.getName())) cloner.cloneForNewDocument(entry.getValue())); } } + + /** + * Imports OCProperties from source document to target document so hidden layers can still be + * hidden after import. + * + * @param sourceDoc The source PDF document that contains the /OCProperties to be copied. + * @throws IOException If an I/O error occurs. + */ + private void importOcProperties(PDDocument srcDoc) throws IOException + { + PDDocumentCatalog srcCatalog = srcDoc.getDocumentCatalog(); + PDOptionalContentProperties srcOCProperties = srcCatalog.getOCProperties(); + if (srcOCProperties == null) + { + return; + } + + PDDocumentCatalog dstCatalog = targetDoc.getDocumentCatalog(); + PDOptionalContentProperties dstOCProperties = dstCatalog.getOCProperties(); + + if (dstOCProperties == null) + { + dstCatalog.setOCProperties(new PDOptionalContentProperties( + (COSDictionary) cloner.cloneForNewDocument(srcOCProperties))); + } + else + { + cloner.cloneMerge(srcOCProperties, dstOCProperties); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Overlay.java b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Overlay.java index 08d50787188..d742db75f84 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Overlay.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Overlay.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.multipdf; import java.awt.geom.AffineTransform; +import java.io.Closeable; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -24,16 +25,20 @@ import java.math.BigDecimal; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageTree; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; @@ -44,15 +49,15 @@ * Based on code contributed by Balazs Jerk. * */ -public class Overlay +public class Overlay implements Closeable { /** - * Possible location of the overlayed pages: foreground or background. + * Possible location of the overlaid pages: foreground or background. */ public enum Position { FOREGROUND, BACKGROUND - }; + } private LayoutPage defaultOverlayPage; private LayoutPage firstPageOverlayPage; @@ -60,7 +65,7 @@ public enum Position private LayoutPage oddPageOverlayPage; private LayoutPage evenPageOverlayPage; - private final Map specificPageOverlay = new HashMap(); + private final Set openDocuments = new HashSet(); private Map specificPageOverlayPage = new HashMap(); private Position position = Position.BACKGROUND; @@ -86,38 +91,77 @@ public enum Position private String evenPageOverlayFilename = null; private PDDocument evenPageOverlay = null; - private int numberOfOverlayPages = 0; private boolean useAllOverlayPages = false; /** - * This will add overlays to a documents. - * - * @param specificPageOverlayFile map of overlay files for specific pages - * - * @return the resulting pdf, which has to be saved and closed be the caller - * + * This will add overlays to a document. + * + * @param specificPageOverlayFile Optional map of overlay files for specific pages. The page + * numbers are 1-based. The map must be empty (but not null) if no specific mappings are used. + * + * @return The modified input PDF document, which has to be saved and closed by the caller. If + * the input document was passed by {@link #setInputPDF(PDDocument) setInputPDF(PDDocument)} + * then it is that object that is returned. + * * @throws IOException if something went wrong */ - public PDDocument overlay(Map specificPageOverlayFile) - throws IOException + public PDDocument overlay(Map specificPageOverlayFile) throws IOException { + Map loadedDocuments = new HashMap(); + Map layouts = new HashMap(); loadPDFs(); for (Map.Entry e : specificPageOverlayFile.entrySet()) { - PDDocument doc = loadPDF(e.getValue()); - specificPageOverlay.put(e.getKey(), doc); - specificPageOverlayPage.put(e.getKey(), getLayoutPage(doc)); + PDDocument doc = loadedDocuments.get(e.getValue()); + if (doc == null) + { + doc = loadPDF(e.getValue()); + loadedDocuments.put(e.getValue(), doc); + layouts.put(doc, getLayoutPage(doc)); + } + openDocuments.add(doc); + specificPageOverlayPage.put(e.getKey(), layouts.get(doc)); } processPages(inputPDFDocument); return inputPDFDocument; } /** - * Close all input pdfs which were used for the overlay. - * + * This will add overlays documents to a document. If you created the overlay documents with + * subsetted fonts, you need to save them first so that the subsetting gets done. + * + * @param specificPageOverlayDocuments Optional map of overlay documents for specific pages. The + * page numbers are 1-based. The map must be empty (but not null) if no specific mappings are + * used. + * + * @return The modified input PDF document, which has to be saved and closed by the caller. If + * the input document was passed by {@link #setInputPDF(PDDocument) setInputPDF(PDDocument)} + * then it is that object that is returned. + * * @throws IOException if something went wrong */ + public PDDocument overlayDocuments(Map specificPageOverlayDocuments) throws IOException + { + loadPDFs(); + for (Map.Entry e : specificPageOverlayDocuments.entrySet()) + { + PDDocument doc = e.getValue(); + if (doc != null) + { + specificPageOverlayPage.put(e.getKey(), getLayoutPage(doc)); + } + } + processPages(inputPDFDocument); + return inputPDFDocument; + } + + /** + * Close all input documents which were used for the overlay and opened by this class. + * + * @throws IOException if something went wrong + */ + @Override public void close() throws IOException { if (defaultOverlay != null) @@ -144,15 +188,12 @@ public void close() throws IOException { evenPageOverlay.close(); } - if (specificPageOverlay != null) + for (PDDocument doc : openDocuments) { - for (Map.Entry e : specificPageOverlay.entrySet()) - { - e.getValue().close(); - } - specificPageOverlay.clear(); - specificPageOverlayPage.clear(); + doc.close(); } + openDocuments.clear(); + specificPageOverlayPage.clear(); } private void loadPDFs() throws IOException @@ -233,12 +274,14 @@ private static final class LayoutPage private final PDRectangle overlayMediaBox; private final COSStream overlayContentStream; private final COSDictionary overlayResources; + private final int overlayRotation; - private LayoutPage(PDRectangle mediaBox, COSStream contentStream, COSDictionary resources) + private LayoutPage(PDRectangle mediaBox, COSStream contentStream, COSDictionary resources, int rotation) { overlayMediaBox = mediaBox; overlayContentStream = contentStream; overlayResources = resources; + overlayRotation = rotation; } } @@ -251,54 +294,55 @@ private LayoutPage getLayoutPage(PDDocument doc) throws IOException { resources = new PDResources(); } - return new LayoutPage(page.getMediaBox(), createContentStream(contents), - resources.getCOSObject()); + return new LayoutPage(page.getMediaBox(), createCombinedContentStream(contents), + resources.getCOSObject(), page.getRotation()); } private Map getLayoutPages(PDDocument doc) throws IOException { - int numberOfPages = doc.getNumberOfPages(); - Map layoutPages = new HashMap(numberOfPages); - for (int i=0;i layoutPages = new HashMap(); + for (PDPage page : doc.getPages()) { - PDPage page = doc.getPage(i); COSBase contents = page.getCOSObject().getDictionaryObject(COSName.CONTENTS); PDResources resources = page.getResources(); if (resources == null) { resources = new PDResources(); } - layoutPages.put(i,new LayoutPage(page.getMediaBox(), createContentStream(contents), - resources.getCOSObject())); + layoutPages.put(i, new LayoutPage(page.getMediaBox(), createCombinedContentStream(contents), + resources.getCOSObject(), page.getRotation())); + i++; } return layoutPages; } - private COSStream createContentStream(COSBase contents) throws IOException + private COSStream createCombinedContentStream(COSBase contents) throws IOException { List contentStreams = createContentStreamList(contents); // concatenate streams - COSStream concatStream = new COSStream(); + COSStream concatStream = inputPDFDocument.getDocument().createCOSStream(); OutputStream out = concatStream.createOutputStream(COSName.FLATE_DECODE); for (COSStream contentStream : contentStreams) { InputStream in = contentStream.createInputStream(); - byte[] buf = new byte[2048]; - int n; - while ((n = in.read(buf)) > 0) - { - out.write(buf, 0, n); - } + IOUtils.copy(in, out); out.flush(); + in.close(); } out.close(); return concatStream; } + // get the content streams as a list private List createContentStreamList(COSBase contents) throws IOException { List contentStreams = new ArrayList(); - if (contents instanceof COSStream) + if (contents == null) + { + return contentStreams; + } + else if (contents instanceof COSStream) { contentStreams.add((COSStream) contents); } @@ -315,45 +359,58 @@ else if (contents instanceof COSObject) } else { - throw new IOException("Contents are unknown type:" + contents.getClass().getName()); + throw new IOException("Unknown content type: " + contents.getClass().getName()); } return contentStreams; } private void processPages(PDDocument document) throws IOException { - int pageCount = 0; - for (PDPage page : document.getPages()) + int pageCounter = 0; + PDPageTree pageTree = document.getPages(); + int numberOfPages = pageTree.getCount(); + for (PDPage page : pageTree) { + pageCounter++; + LayoutPage layoutPage = getLayoutPage(pageCounter, numberOfPages); + if (layoutPage == null) + { + continue; + } COSDictionary pageDictionary = page.getCOSObject(); - COSBase contents = pageDictionary.getDictionaryObject(COSName.CONTENTS); - COSArray contentArray = new COSArray(); + COSBase originalContent = pageDictionary.getDictionaryObject(COSName.CONTENTS); + COSArray newContentArray = new COSArray(); switch (position) { - case FOREGROUND: - // save state - contentArray.add(createStream("q\n")); - addOriginalContent(contents, contentArray); - // restore state - contentArray.add(createStream("Q\n")); - // overlay content - overlayPage(contentArray, page, pageCount + 1, document.getNumberOfPages()); - break; - case BACKGROUND: - // overlay content - overlayPage(contentArray, page, pageCount + 1, document.getNumberOfPages()); - addOriginalContent(contents, contentArray); - break; - default: - throw new IOException("Unknown type of position:" + position); + case FOREGROUND: + // save state + newContentArray.add(createStream("q\n")); + addOriginalContent(originalContent, newContentArray); + // restore state + newContentArray.add(createStream("Q\n")); + // overlay content last + overlayPage(page, layoutPage, newContentArray); + break; + case BACKGROUND: + // overlay content first + overlayPage(page, layoutPage, newContentArray); + + addOriginalContent(originalContent, newContentArray); + break; + default: + throw new IOException("Unknown type of position:" + position); } - pageDictionary.setItem(COSName.CONTENTS, contentArray); - pageCount++; + pageDictionary.setItem(COSName.CONTENTS, newContentArray); } } private void addOriginalContent(COSBase contents, COSArray contentArray) throws IOException { + if (contents == null) + { + return; + } + if (contents instanceof COSStream) { contentArray.add(contents); @@ -364,12 +421,24 @@ else if (contents instanceof COSArray) } else { - throw new IOException("Unknown content type:" + contents.getClass().getName()); + throw new IOException("Unknown content type: " + contents.getClass().getName()); } } - private void overlayPage(COSArray array, PDPage page, int pageNumber, int numberOfPages) + private void overlayPage(PDPage page, LayoutPage layoutPage, COSArray array) throws IOException + { + PDResources resources = page.getResources(); + if (resources == null) + { + resources = new PDResources(); + page.setResources(resources); + } + COSName xObjectId = createOverlayXObject(page, layoutPage); + array.add(createOverlayStream(page, layoutPage, xObjectId)); + } + + private LayoutPage getLayoutPage(int pageNumber, int numberOfPages) { LayoutPage layoutPage = null; if (!useAllOverlayPages && specificPageOverlayPage.containsKey(pageNumber)) @@ -401,27 +470,34 @@ else if (useAllOverlayPages) int usePageNum = (pageNumber -1 ) % numberOfOverlayPages; layoutPage = specificPageOverlayPage.get(usePageNum); } - if (layoutPage != null) - { - PDResources resources = page.getResources(); - if (resources == null) - { - resources = new PDResources(); - page.setResources(resources); - } - COSName xObjectId = createOverlayXObject(page, layoutPage, - layoutPage.overlayContentStream); - array.add(createOverlayStream(page, layoutPage, xObjectId)); - } + return layoutPage; } - private COSName createOverlayXObject(PDPage page, LayoutPage layoutPage, COSStream contentStream) + private COSName createOverlayXObject(PDPage page, LayoutPage layoutPage) { - PDFormXObject xobjForm = new PDFormXObject(contentStream); + PDFormXObject xobjForm = new PDFormXObject(layoutPage.overlayContentStream); xobjForm.setResources(new PDResources(layoutPage.overlayResources)); xobjForm.setFormType(1); - xobjForm.setBBox( layoutPage.overlayMediaBox.createRetranslatedRectangle()); - xobjForm.setMatrix(new AffineTransform()); + xobjForm.setBBox(layoutPage.overlayMediaBox.createRetranslatedRectangle()); + AffineTransform at = new AffineTransform(); + switch (layoutPage.overlayRotation) + { + case 90: + at.translate(0, layoutPage.overlayMediaBox.getWidth()); + at.rotate(Math.toRadians(-90)); + break; + case 180: + at.translate(layoutPage.overlayMediaBox.getWidth(), layoutPage.overlayMediaBox.getHeight()); + at.rotate(Math.toRadians(-180)); + break; + case 270: + at.translate(layoutPage.overlayMediaBox.getHeight(), 0); + at.rotate(Math.toRadians(-270)); + break; + default: + break; + } + xobjForm.setMatrix(at); PDResources resources = page.getResources(); return resources.add(xobjForm, "OL"); } @@ -430,20 +506,55 @@ private COSStream createOverlayStream(PDPage page, LayoutPage layoutPage, COSNam throws IOException { // create a new content stream that executes the XObject content - PDRectangle pageMediaBox = page.getMediaBox(); - float hShift = (pageMediaBox.getWidth() - layoutPage.overlayMediaBox.getWidth()) / 2.0f; - float vShift = (pageMediaBox.getHeight() - layoutPage.overlayMediaBox.getHeight()) / 2.0f; StringBuilder overlayStream = new StringBuilder(); - overlayStream.append("q\nq 1 0 0 1 "); - overlayStream.append(float2String(hShift)); - overlayStream.append(" "); - overlayStream.append(float2String(vShift) ); - overlayStream.append(" cm /"); + overlayStream.append("q\nq\n"); + PDRectangle overlayMediaBox = new PDRectangle(layoutPage.overlayMediaBox.getCOSArray()); + if (layoutPage.overlayRotation == 90 || layoutPage.overlayRotation == 270) + { + overlayMediaBox.setLowerLeftX(layoutPage.overlayMediaBox.getLowerLeftY()); + overlayMediaBox.setLowerLeftY(layoutPage.overlayMediaBox.getLowerLeftX()); + overlayMediaBox.setUpperRightX(layoutPage.overlayMediaBox.getUpperRightY()); + overlayMediaBox.setUpperRightY(layoutPage.overlayMediaBox.getUpperRightX()); + } + AffineTransform at = calculateAffineTransform(page, overlayMediaBox); + double[] flatmatrix = new double[6]; + at.getMatrix(flatmatrix); + for (double v : flatmatrix) + { + overlayStream.append(float2String((float) v)); + overlayStream.append(" "); + } + overlayStream.append(" cm\n"); + + // if debugging, insert + // 0 0 overlayMediaBox.getHeight() overlayMediaBox.getWidth() re\ns\n + // into the content stream + + overlayStream.append(" /"); overlayStream.append(xObjectId.getName()); overlayStream.append(" Do Q\nQ\n"); return createStream(overlayStream.toString()); } + /** + * Calculate the transform to be used when positioning the overlay. The default implementation + * centers on the destination. Override this method to do your own, e.g. move to a corner, or + * rotate. + * + * @param page The page that will get the overlay. + * @param overlayMediaBox The overlay media box. + * @return The affine transform to be used. + */ + protected AffineTransform calculateAffineTransform(PDPage page, PDRectangle overlayMediaBox) + { + AffineTransform at = new AffineTransform(); + PDRectangle pageMediaBox = page.getMediaBox(); + float hShift = (pageMediaBox.getWidth() - overlayMediaBox.getWidth()) / 2.0f; + float vShift = (pageMediaBox.getHeight() - overlayMediaBox.getHeight()) / 2.0f; + at.translate(hShift, vShift); + return at; + } + private String float2String(float floatValue) { // use a BigDecimal as intermediate state to avoid @@ -463,8 +574,9 @@ private String float2String(float floatValue) private COSStream createStream(String content) throws IOException { - COSStream stream = new COSStream(); - OutputStream out = stream.createOutputStream(COSName.FLATE_DECODE); + COSStream stream = inputPDFDocument.getDocument().createCOSStream(); + OutputStream out = stream.createOutputStream( + content.length() > 20 ? COSName.FLATE_DECODE : null); out.write(content.getBytes("ISO-8859-1")); out.close(); return stream; @@ -481,9 +593,11 @@ public void setOverlayPosition(Position overlayPosition) } /** - * Sets the file to be overlayed. - * - * @param inputFile the file to be overlayed + * Sets the file to be overlaid. + * + * @param inputFile the file to be overlaid. The {@link PDDocument} object gathered from + * opening this file will be returned by + * {@link #overlay(java.util.Map) overlay(Map<Integer, String>)}. */ public void setInputFile(String inputFile) { @@ -491,9 +605,10 @@ public void setInputFile(String inputFile) } /** - * Sets the PDF to be overlayed. - * - * @param inputPDF the PDF to be overlayed + * Sets the PDF to be overlaid. + * + * @param inputPDF the PDF to be overlaid. This will be the object that is returned by + * {@link #overlay(java.util.Map) overlay(Map<Integer, String>)}. */ public void setInputPDF(PDDocument inputPDF) { @@ -521,7 +636,8 @@ public void setDefaultOverlayFile(String defaultOverlayFile) } /** - * Sets the default overlay PDF. + * Sets the default overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param defaultOverlayPDF the default overlay PDF */ @@ -551,7 +667,8 @@ public void setFirstPageOverlayFile(String firstPageOverlayFile) } /** - * Sets the first page overlay PDF. + * Sets the first page overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param firstPageOverlayPDF the first page overlay PDF */ @@ -571,7 +688,8 @@ public void setLastPageOverlayFile(String lastPageOverlayFile) } /** - * Sets the last page overlay PDF. + * Sets the last page overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param lastPageOverlayPDF the last page overlay PDF */ @@ -591,9 +709,11 @@ public void setAllPagesOverlayFile(String allPagesOverlayFile) } /** - * Sets the all pages overlay PDF. + * Sets the all pages overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * - * @param allPagesOverlayPDF the all pages overlay PDF + * @param allPagesOverlayPDF the all pages overlay PDF. This should not be a PDDocument that you + * created on the fly, it should be saved first, if it contains any fonts that are subset. */ public void setAllPagesOverlayPDF(PDDocument allPagesOverlayPDF) { @@ -611,7 +731,8 @@ public void setOddPageOverlayFile(String oddPageOverlayFile) } /** - * Sets the odd page overlay PDF. + * Sets the odd page overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param oddPageOverlayPDF the odd page overlay PDF */ @@ -631,7 +752,8 @@ public void setEvenPageOverlayFile(String evenPageOverlayFile) } /** - * Sets the even page overlay PDF. + * Sets the even page overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param evenPageOverlayPDF the even page overlay PDF */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFCloneUtility.java b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFCloneUtility.java index f7d53050dfc..0798ab42a6f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFCloneUtility.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFCloneUtility.java @@ -20,8 +20,10 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; @@ -34,19 +36,22 @@ /** * Utility class used to clone PDF objects. It keeps track of objects it has already cloned. - * + * Although this class is public, it is for PDFBox internal use and should not be used outside, + * except by very experienced users. The "public" modifier will be removed in 3.0. The class should + * not be used on documents that are being generated because these can contain unfinished parts, + * e.g. font subsetting information. */ -class PDFCloneUtility +public class PDFCloneUtility { - private final PDDocument destination; private final Map clonedVersion = new HashMap(); + private final Set clonedValues = new HashSet(); /** * Creates a new instance for the given target document. * @param dest the destination PDF document that will receive the clones */ - PDFCloneUtility(PDDocument dest) + public PDFCloneUtility(PDDocument dest) { this.destination = dest; } @@ -77,8 +82,14 @@ public COSBase cloneForNewDocument( Object base ) throws IOException if( retval != null ) { //we are done, it has already been converted. + return retval; + } + if (base instanceof COSBase && clonedValues.contains(base)) + { + // Don't clone a clone + return (COSBase) base; } - else if( base instanceof List) + if (base instanceof List) { COSArray array = new COSArray(); List list = (List) base; @@ -91,13 +102,11 @@ else if( base instanceof List) else if( base instanceof COSObjectable && !(base instanceof COSBase) ) { retval = cloneForNewDocument( ((COSObjectable)base).getCOSObject() ); - clonedVersion.put( base, retval ); } else if( base instanceof COSObject ) { COSObject object = (COSObject)base; retval = cloneForNewDocument( object.getObject() ); - clonedVersion.put( base, retval ); } else if( base instanceof COSArray ) { @@ -108,7 +117,6 @@ else if( base instanceof COSArray ) newArray.add( cloneForNewDocument( array.get( i ) ) ); } retval = newArray; - clonedVersion.put( base, retval ); } else if( base instanceof COSStream ) { @@ -143,13 +151,13 @@ else if( base instanceof COSDictionary ) retval = (COSBase)base; } clonedVersion.put( base, retval ); + clonedValues.add(retval); return retval; } - /** * Merges two objects of the same type by deep-cloning its members. - *
+ *
* Base and target must be instances of the same class. * @param base the base object to be cloned * @param target the merge target @@ -167,10 +175,10 @@ public void cloneMerge( final COSObjectable base, COSObjectable target) throws I return; //we are done, it has already been converted. // ### Is that correct for cloneMerge??? } - else if (!(base instanceof COSBase)) + //TODO what when clone-merging a clone? Does it ever happen? + if (!(base instanceof COSBase)) { cloneMerge(base.getCOSObject(), target.getCOSObject()); - clonedVersion.put(base, retval); } else if( base instanceof COSObject ) { @@ -178,20 +186,25 @@ else if( base instanceof COSObject ) { cloneMerge(((COSObject) base).getObject(),((COSObject) target).getObject() ); } - else if(target instanceof COSDictionary) + else if (target instanceof COSDictionary || target instanceof COSArray) { cloneMerge(((COSObject) base).getObject(), target); } - clonedVersion.put( base, retval ); } else if( base instanceof COSArray ) { - COSArray array = (COSArray)base; - for( int i=0; i entry : dic.entrySet() ) + if (target instanceof COSObject) { - COSName key = entry.getKey(); - COSBase value = entry.getValue(); - if (((COSDictionary)target).getItem(key) != null) - { - cloneMerge(value, ((COSDictionary)target).getItem(key)); - } - else + cloneMerge(base, ((COSObject) target).getObject()); + } + else + { + COSDictionary dic = (COSDictionary) base; + clonedVersion.put(base, retval); + for (Map.Entry entry : dic.entrySet()) { - ((COSDictionary)target).setItem( key, cloneForNewDocument(value)); + COSName key = entry.getKey(); + COSBase value = entry.getValue(); + if (((COSDictionary) target).getItem(key) != null) + { + cloneMerge(value, ((COSDictionary) target).getItem(key)); + } + else + { + ((COSDictionary) target).setItem(key, cloneForNewDocument(value)); + } } } } @@ -233,6 +253,6 @@ else if( base instanceof COSDictionary ) retval = (COSBase)base; } clonedVersion.put( base, retval ); + clonedValues.add(retval); } - } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java index e984c1af267..60519afd3d6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java @@ -17,16 +17,22 @@ package org.apache.pdfbox.multipdf; import java.io.File; -import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; @@ -34,8 +40,9 @@ import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; @@ -44,16 +51,30 @@ import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.PDStructureElementNameTreeNode; import org.apache.pdfbox.pdmodel.PageMode; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.common.PDDestinationOrAction; +import org.apache.pdfbox.pdmodel.common.PDMetadata; +import org.apache.pdfbox.pdmodel.common.PDNameTreeNode; import org.apache.pdfbox.pdmodel.common.PDNumberTreeNode; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDMarkInfo; +import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDParentTreeValue; +import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureElement; import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot; +import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent; +import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; import org.apache.pdfbox.pdmodel.interactive.form.PDField; +import org.apache.pdfbox.pdmodel.interactive.form.PDNonTerminalField; +import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences; /** * This class will take a list of pdf documents and merge them, saving the @@ -63,21 +84,109 @@ */ public class PDFMergerUtility { - private static final String STRUCTURETYPE_DOCUMENT = "Document"; + /** + * Log instance. + */ + private static final Log LOG = LogFactory.getLog(PDFMergerUtility.class); - private final List sources; - private final List fileInputStreams; + private final List sources; private String destinationFileName; private OutputStream destinationStream; private boolean ignoreAcroFormErrors = false; + private PDDocumentInformation destinationDocumentInformation = null; + private PDMetadata destinationMetadata = null; + + private DocumentMergeMode documentMergeMode = DocumentMergeMode.PDFBOX_LEGACY_MODE; + private AcroFormMergeMode acroFormMergeMode = AcroFormMergeMode.PDFBOX_LEGACY_MODE; + + /** + * The mode to use when merging documents: + * + *
    + *
  • {@link DocumentMergeMode#OPTIMIZE_RESOURCES_MODE} Optimizes resource handling such as + * closing documents early. Not all document elements are merged compared to + * the PDFBOX_LEGACY_MODE. Currently supported are: + *
      + *
    • Page content and resources + *
    + *
  • {@link DocumentMergeMode#PDFBOX_LEGACY_MODE} Keeps all files open until the + * merge has been completed. This is currently necessary to merge documents + * containing a Structure Tree.
    This is the standard mode for PDFBox 2.0. + *
+ */ + public enum DocumentMergeMode + { + OPTIMIZE_RESOURCES_MODE, + PDFBOX_LEGACY_MODE + } + + /** + * The mode to use when merging AcroForm between documents: + * + *
    + *
  • {@link AcroFormMergeMode#JOIN_FORM_FIELDS_MODE} fields with the same fully qualified name + * will be merged into one with the widget annotations of the merged fields + * becoming part of the same field.
    + * Although the API is finalized processing of different form field types is still in + * development. Currently only (nested) text fields do work with intermediate nodes + * being existent. + *
  • {@link AcroFormMergeMode#PDFBOX_LEGACY_MODE} fields with the same fully qualified name + * will be renamed and treated as independent. This mode was used in versions + * of PDFBox up to 2.x. + *
+ */ + public enum AcroFormMergeMode + { + JOIN_FORM_FIELDS_MODE, + PDFBOX_LEGACY_MODE + } /** * Instantiate a new PDFMergerUtility. */ public PDFMergerUtility() { - sources = new ArrayList(); - fileInputStreams = new ArrayList(); + sources = new ArrayList(); + } + + /** + * Get the merge mode to be used for merging AcroForms between documents + * + * {@link AcroFormMergeMode} + */ + public AcroFormMergeMode getAcroFormMergeMode() + { + return acroFormMergeMode; + } + + /** + * Set the merge mode to be used for merging AcroForms between documents + * + * {@link AcroFormMergeMode} + */ + public void setAcroFormMergeMode(AcroFormMergeMode theAcroFormMergeMode) + { + this.acroFormMergeMode = theAcroFormMergeMode; + } + + /** + * Set the merge mode to be used for merging documents + * + * {@link DocumentMergeMode} + */ + public void setDocumentMergeMode(DocumentMergeMode theDocumentMergeMode) + { + this.documentMergeMode = theDocumentMergeMode; + } + + /** + * Get the merge mode to be used for merging documents + * + * {@link DocumentMergeMode} + */ + public DocumentMergeMode getDocumentMergeMode() + { + return documentMergeMode; } /** @@ -120,6 +229,50 @@ public void setDestinationStream(OutputStream destStream) destinationStream = destStream; } + /** + * Get the destination document information that is to be set in {@link #mergeDocuments(org.apache.pdfbox.io.MemoryUsageSetting) + * }. The default is null, which means that it is ignored. + * + * @return The destination document information. + */ + public PDDocumentInformation getDestinationDocumentInformation() + { + return destinationDocumentInformation; + } + + /** + * Set the destination document information that is to be set in {@link #mergeDocuments(org.apache.pdfbox.io.MemoryUsageSetting) + * }. The default is null, which means that it is ignored. + * + * @param info The destination document information. + */ + public void setDestinationDocumentInformation(PDDocumentInformation info) + { + destinationDocumentInformation = info; + } + + /** + * Set the destination metadata that is to be set in {@link #mergeDocuments(org.apache.pdfbox.io.MemoryUsageSetting) + * }. The default is null, which means that it is ignored. + * + * @return The destination metadata. + */ + public PDMetadata getDestinationMetadata() + { + return destinationMetadata; + } + + /** + * Set the destination metadata that is to be set in {@link #mergeDocuments(org.apache.pdfbox.io.MemoryUsageSetting) + * }. The default is null, which means that it is ignored. + * + * @param meta The destination metadata. + */ + public void setDestinationMetadata(PDMetadata meta) + { + destinationMetadata = meta; + } + /** * Add a source file to the list of files to merge. * @@ -141,9 +294,7 @@ public void addSource(String source) throws FileNotFoundException */ public void addSource(File source) throws FileNotFoundException { - FileInputStream stream = new FileInputStream(source); - sources.add(stream); - fileInputStreams.add(stream); + sources.add(source); } /** @@ -189,29 +340,135 @@ public void mergeDocuments() throws IOException * @throws IOException If there is an error saving the document. */ public void mergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException + { + if (documentMergeMode == DocumentMergeMode.PDFBOX_LEGACY_MODE) + { + legacyMergeDocuments(memUsageSetting); + } + else if (documentMergeMode == DocumentMergeMode.OPTIMIZE_RESOURCES_MODE) + { + optimizedMergeDocuments(memUsageSetting); + } + } + + private void optimizedMergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException + { + PDDocument destination = null; + try + { + destination = new PDDocument(memUsageSetting); + PDFCloneUtility cloner = new PDFCloneUtility(destination); + + for (Object sourceObject : sources) + { + PDDocument sourceDoc = null; + try + { + if (sourceObject instanceof File) + { + sourceDoc = PDDocument.load((File) sourceObject, memUsageSetting); + } + else + { + sourceDoc = PDDocument.load((InputStream) sourceObject, memUsageSetting); + } + + for (PDPage page : sourceDoc.getPages()) + { + PDPage newPage = new PDPage((COSDictionary) cloner.cloneForNewDocument(page.getCOSObject())); + newPage.setCropBox(page.getCropBox()); + newPage.setMediaBox(page.getMediaBox()); + newPage.setRotation(page.getRotation()); + PDResources resources = page.getResources(); + if (resources != null) + { + // this is smart enough to just create references for resources that are used on multiple pages + newPage.setResources(new PDResources((COSDictionary) cloner.cloneForNewDocument(resources))); + } + else + { + newPage.setResources(new PDResources()); + } + destination.addPage(newPage); + } + } + finally + { + IOUtils.closeQuietly(sourceDoc); + } + } + + if (destinationStream == null) + { + destination.save(destinationFileName); + } + else + { + destination.save(destinationStream); + } + } + finally + { + IOUtils.closeQuietly(destination); + } + } + + /** + * Merge the list of source documents, saving the result in the destination + * file. + * + * @param memUsageSetting defines how memory is used for buffering PDF streams; + * in case of null unrestricted main memory is used + * + * @throws IOException If there is an error saving the document. + */ + private void legacyMergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException { PDDocument destination = null; - InputStream sourceFile; - PDDocument source; - if (sources != null && sources.size() > 0) + if (sources.size() > 0) { - List tobeclosed = new ArrayList(); + // Make sure that: + // - first Exception is kept + // - destination is closed + // - all PDDocuments are closed + // - all FileInputStreams are closed + // - there's a way to see which errors occurred + + List tobeclosed = new ArrayList(sources.size()); try { MemoryUsageSetting partitionedMemSetting = memUsageSetting != null ? memUsageSetting.getPartitionedCopy(sources.size()+1) : MemoryUsageSetting.setupMainMemoryOnly(); - Iterator sit = sources.iterator(); destination = new PDDocument(partitionedMemSetting); - while (sit.hasNext()) + for (Object sourceObject : sources) + { + PDDocument sourceDoc = null; + if (sourceObject instanceof File) + { + sourceDoc = PDDocument.load((File) sourceObject, partitionedMemSetting); + } + else + { + sourceDoc = PDDocument.load((InputStream) sourceObject, + partitionedMemSetting); + } + tobeclosed.add(sourceDoc); + appendDocument(destination, sourceDoc); + } + + // optionally set meta data + if (destinationDocumentInformation != null) + { + destination.setDocumentInformation(destinationDocumentInformation); + } + if (destinationMetadata != null) { - sourceFile = sit.next(); - source = PDDocument.load(sourceFile, partitionedMemSetting); - tobeclosed.add(source); - appendDocument(destination, source); + destination.getDocumentCatalog().setMetadata(destinationMetadata); } + if (destinationStream == null) { destination.save(destinationFileName); @@ -225,15 +482,12 @@ public void mergeDocuments(MemoryUsageSetting memUsageSetting) throws IOExceptio { if (destination != null) { - destination.close(); + IOUtils.closeAndLogException(destination, LOG, "PDDocument", null); } + for (PDDocument doc : tobeclosed) { - doc.close(); - } - for (FileInputStream stream : fileInputStreams) - { - stream.close(); + IOUtils.closeAndLogException(doc, LOG, "PDDocument", null); } } } @@ -259,19 +513,15 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE throw new IOException("Error: destination PDF is closed."); } - PDDocumentCatalog destCatalog = destination.getDocumentCatalog(); PDDocumentCatalog srcCatalog = source.getDocumentCatalog(); - if (isDynamicXfa(srcCatalog.getAcroForm())) { throw new IOException("Error: can't merge source document containing dynamic XFA form content."); - } - + } + PDDocumentInformation destInfo = destination.getDocumentInformation(); PDDocumentInformation srcInfo = source.getDocumentInformation(); - destInfo.getCOSObject().mergeInto(srcInfo.getCOSObject()); - - + mergeInto(srcInfo.getCOSObject(), destInfo.getCOSObject(), Collections.emptySet()); // use the highest version number for the resulting pdf float destVersion = destination.getVersion(); @@ -282,41 +532,47 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE destination.setVersion(srcVersion); } + int pageIndexOpenActionDest = -1; + PDDocumentCatalog destCatalog = destination.getDocumentCatalog(); if (destCatalog.getOpenAction() == null) { - destCatalog.setOpenAction(srcCatalog.getOpenAction()); - } - - PDFCloneUtility cloner = new PDFCloneUtility(destination); - - try - { - PDAcroForm destAcroForm = destCatalog.getAcroForm(); - PDAcroForm srcAcroForm = srcCatalog.getAcroForm(); - - if (destAcroForm == null && srcAcroForm != null) + // PDFBOX-3972: get local dest page index, it must be reassigned after the page cloning + PDDestinationOrAction openAction = null; + try { - destCatalog.getCOSObject().setItem(COSName.ACRO_FORM, - cloner.cloneForNewDocument(srcAcroForm.getCOSObject())); - + openAction = srcCatalog.getOpenAction(); } - else + catch (IOException ex) { - if (srcAcroForm != null) - { - mergeAcroForm(cloner, destAcroForm, srcAcroForm); - } + // PDFBOX-4223 + LOG.error("Invalid OpenAction ignored", ex); } - } - catch (IOException e) - { - // if we are not ignoring exceptions, we'll re-throw this - if (!ignoreAcroFormErrors) + PDDestination openActionDestination = null; + if (openAction instanceof PDActionGoTo) { - throw new IOException(e); + openActionDestination = ((PDActionGoTo) openAction).getDestination(); } + else if (openAction instanceof PDDestination) + { + openActionDestination = (PDDestination) openAction; + } + // note that it can also be something else, e.g. PDActionJavaScript, then do nothing. + + if (openActionDestination instanceof PDPageDestination) + { + PDPage page = ((PDPageDestination) openActionDestination).getPage(); + if (page != null) + { + pageIndexOpenActionDest = srcCatalog.getPages().indexOf(page); + } + } + + destCatalog.setOpenAction(openAction); } + PDFCloneUtility cloner = new PDFCloneUtility(destination); + mergeAcroForm(cloner, destCatalog, srcCatalog); + COSArray destThreads = (COSArray) destCatalog.getCOSObject().getDictionaryObject(COSName.THREADS); COSArray srcThreads = (COSArray) cloner.cloneForNewDocument(destCatalog.getCOSObject().getDictionaryObject( COSName.THREADS)); @@ -343,10 +599,17 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE } } - PDDocumentNameDestinationDictionary destDests = destCatalog.getDests(); + if (destNames != null && destNames.getCOSObject().containsKey(COSName.ID_TREE)) + { + // found in 001031.pdf from PDFBOX-4417 and doesn't belong there + destNames.getCOSObject().removeItem(COSName.ID_TREE); + LOG.warn("Removed /IDTree from /Names dictionary, doesn't belong there"); + } + PDDocumentNameDestinationDictionary srcDests = srcCatalog.getDests(); if (srcDests != null) { + PDDocumentNameDestinationDictionary destDests = destCatalog.getDests(); if (destDests == null) { destCatalog.getCOSObject().setItem(COSName.DESTS, cloner.cloneForNewDocument(srcDests)); @@ -357,17 +620,23 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE } } - PDDocumentOutline destOutline = destCatalog.getDocumentOutline(); PDDocumentOutline srcOutline = srcCatalog.getDocumentOutline(); if (srcOutline != null) { - if (destOutline == null) + PDDocumentOutline destOutline = destCatalog.getDocumentOutline(); + if (destOutline == null || destOutline.getFirstChild() == null) { PDDocumentOutline cloned = new PDDocumentOutline((COSDictionary) cloner.cloneForNewDocument(srcOutline)); destCatalog.setDocumentOutline(cloned); } else { + // search last sibling for dest, because /Last entry is sometimes wrong + PDOutlineItem destLastOutlineItem = destOutline.getFirstChild(); + while (destLastOutlineItem.getNextSibling() != null) + { + destLastOutlineItem = destLastOutlineItem.getNextSibling(); + } for (PDOutlineItem item : srcOutline.children()) { // get each child, clone its dictionary, remove siblings info, @@ -376,26 +645,25 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE clonedDict.removeItem(COSName.PREV); clonedDict.removeItem(COSName.NEXT); PDOutlineItem clonedItem = new PDOutlineItem(clonedDict); - destOutline.addLast(clonedItem); + destLastOutlineItem.insertSiblingAfter(clonedItem); + destLastOutlineItem = destLastOutlineItem.getNextSibling(); } } } PageMode destPageMode = destCatalog.getPageMode(); - PageMode srcPageMode = srcCatalog.getPageMode(); if (destPageMode == null) { + PageMode srcPageMode = srcCatalog.getPageMode(); destCatalog.setPageMode(srcPageMode); } - COSDictionary destLabels = (COSDictionary) destCatalog.getCOSObject().getDictionaryObject( - COSName.PAGE_LABELS); - COSDictionary srcLabels = (COSDictionary) srcCatalog.getCOSObject() - .getDictionaryObject(COSName.PAGE_LABELS); + COSDictionary srcLabels = srcCatalog.getCOSObject().getCOSDictionary(COSName.PAGE_LABELS); if (srcLabels != null) { int destPageCount = destination.getNumberOfPages(); COSArray destNums; + COSDictionary destLabels = destCatalog.getCOSObject().getCOSDictionary(COSName.PAGE_LABELS); if (destLabels == null) { destLabels = new COSDictionary(); @@ -410,9 +678,21 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE COSArray srcNums = (COSArray) srcLabels.getDictionaryObject(COSName.NUMS); if (srcNums != null) { + int startSize = destNums.size(); for (int i = 0; i < srcNums.size(); i += 2) { - COSNumber labelIndex = (COSNumber) srcNums.getObject(i); + COSBase base = srcNums.getObject(i); + if (!(base instanceof COSNumber)) + { + LOG.error("page labels ignored, index " + i + " should be a number, but is " + base); + // remove what we added + while (destNums.size() > startSize) + { + destNums.remove(startSize); + } + break; + } + COSNumber labelIndex = (COSNumber) base; long labelIndexValue = labelIndex.intValue(); destNums.add(COSInteger.get(labelIndexValue + destPageCount)); destNums.add(cloner.cloneForNewDocument(srcNums.getObject(i + 1))); @@ -420,77 +700,125 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE } } - COSStream destMetadata = (COSStream) destCatalog.getCOSObject().getDictionaryObject(COSName.METADATA); - COSStream srcMetadata = (COSStream) srcCatalog.getCOSObject().getDictionaryObject(COSName.METADATA); + COSStream destMetadata = destCatalog.getCOSObject().getCOSStream(COSName.METADATA); + COSStream srcMetadata = srcCatalog.getCOSObject().getCOSStream(COSName.METADATA); if (destMetadata == null && srcMetadata != null) { - PDStream newStream = new PDStream(destination, srcMetadata.createInputStream(), (COSName) null); - newStream.getCOSObject().mergeInto(srcMetadata); - destCatalog.getCOSObject().setItem(COSName.METADATA, newStream); + try + { + PDStream newStream = new PDStream(destination, srcMetadata.createInputStream(), (COSName) null); + mergeInto(srcMetadata, newStream.getCOSObject(), + new HashSet(Arrays.asList(COSName.FILTER, COSName.LENGTH))); + destCatalog.getCOSObject().setItem(COSName.METADATA, newStream); + } + catch (IOException ex) + { + // PDFBOX-4227 cleartext XMP stream with /Flate + LOG.error("Metadata skipped because it could not be read", ex); + } + } + + COSDictionary destOCP = destCatalog.getCOSObject().getCOSDictionary(COSName.OCPROPERTIES); + COSDictionary srcOCP = srcCatalog.getCOSObject().getCOSDictionary(COSName.OCPROPERTIES); + if (destOCP == null && srcOCP != null) + { + destCatalog.getCOSObject().setItem(COSName.OCPROPERTIES, cloner.cloneForNewDocument(srcOCP)); + } + else if (destOCP != null && srcOCP != null) + { + cloner.cloneMerge(srcOCP, destOCP); } + + mergeOutputIntents(cloner, srcCatalog, destCatalog); - // merge logical structure hierarchy if logical structure information is available in both source pdf and - // destination pdf + // merge logical structure hierarchy boolean mergeStructTree = false; int destParentTreeNextKey = -1; - COSDictionary destParentTreeDict = null; - COSDictionary srcParentTreeDict; - COSArray destNumbersArray = null; - COSArray srcNumbersArray = null; - PDMarkInfo destMark = destCatalog.getMarkInfo(); - PDStructureTreeRoot destStructTree = destCatalog.getStructureTreeRoot(); - PDMarkInfo srcMark = srcCatalog.getMarkInfo(); + Map srcNumberTreeAsMap = null; + Map destNumberTreeAsMap = null; PDStructureTreeRoot srcStructTree = srcCatalog.getStructureTreeRoot(); + PDStructureTreeRoot destStructTree = destCatalog.getStructureTreeRoot(); + if (destStructTree == null && srcStructTree != null) + { + // create a dummy structure tree in the destination, so that the source + // tree is cloned. (We can't just copy the tree reference due to PDFBOX-3999) + destStructTree = new PDStructureTreeRoot(); + destCatalog.setStructureTreeRoot(destStructTree); + destStructTree.setParentTree(new PDNumberTreeNode(PDParentTreeValue.class)); + // PDFBOX-4429: remove bogus StructParent(s) + for (PDPage page : destCatalog.getPages()) + { + page.getCOSObject().removeItem(COSName.STRUCT_PARENTS); + for (PDAnnotation ann : page.getAnnotations()) + { + ann.getCOSObject().removeItem(COSName.STRUCT_PARENT); + } + } + } if (destStructTree != null) { PDNumberTreeNode destParentTree = destStructTree.getParentTree(); destParentTreeNextKey = destStructTree.getParentTreeNextKey(); if (destParentTree != null) { - destParentTreeDict = destParentTree.getCOSObject(); - destNumbersArray = (COSArray) destParentTreeDict.getDictionaryObject(COSName.NUMS); - if (destNumbersArray != null) + destNumberTreeAsMap = getNumberTreeAsMap(destParentTree); + if (destParentTreeNextKey < 0) { - if (destParentTreeNextKey < 0) + if (destNumberTreeAsMap.isEmpty()) { - destParentTreeNextKey = destNumbersArray.size() / 2; + destParentTreeNextKey = 0; } - if (destParentTreeNextKey > 0 && srcStructTree != null) + else { - PDNumberTreeNode srcParentTree = srcStructTree.getParentTree(); - if (srcParentTree != null) + destParentTreeNextKey = Collections.max(destNumberTreeAsMap.keySet()) + 1; + } + } + if (destParentTreeNextKey >= 0 && srcStructTree != null) + { + PDNumberTreeNode srcParentTree = srcStructTree.getParentTree(); + if (srcParentTree != null) + { + srcNumberTreeAsMap = getNumberTreeAsMap(srcParentTree); + if (!srcNumberTreeAsMap.isEmpty()) { - srcParentTreeDict = srcParentTree.getCOSObject(); - srcNumbersArray = (COSArray) srcParentTreeDict.getDictionaryObject(COSName.NUMS); - if (srcNumbersArray != null) - { - mergeStructTree = true; - } + mergeStructTree = true; } } } } - if (destMark != null && destMark.isMarked() && !mergeStructTree) - { - destMark.setMarked(false); - } - if (!mergeStructTree) - { - destCatalog.setStructureTreeRoot(null); - } } Map objMapping = new HashMap(); + int pageIndex = 0; for (PDPage page : srcCatalog.getPages()) { PDPage newPage = new PDPage((COSDictionary) cloner.cloneForNewDocument(page.getCOSObject())); + if (!mergeStructTree) + { + // PDFBOX-4429: remove bogus StructParent(s) + newPage.getCOSObject().removeItem(COSName.STRUCT_PARENTS); + for (PDAnnotation ann : newPage.getAnnotations()) + { + ann.getCOSObject().removeItem(COSName.STRUCT_PARENT); + } + } newPage.setCropBox(page.getCropBox()); newPage.setMediaBox(page.getMediaBox()); newPage.setRotation(page.getRotation()); - // this is smart enough to just create references for resources that are used on multiple pages - newPage.setResources(new PDResources((COSDictionary) cloner.cloneForNewDocument(page.getResources()))); + PDResources resources = page.getResources(); + if (resources != null) + { + // this is smart enough to just create references for resources that are used on multiple pages + newPage.setResources(new PDResources((COSDictionary) cloner.cloneForNewDocument(resources))); + } + else + { + newPage.setResources(new PDResources()); + } if (mergeStructTree) { + // add the value of the destination ParentTreeNextKey to every source element + // StructParent(s) value so that these don't overlap with the existing values updateStructParentEntries(newPage, destParentTreeNextKey); objMapping.put(page.getCOSObject(), newPage.getCOSObject()); List oldAnnots = page.getAnnotations(); @@ -502,80 +830,642 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE // TODO update mapping for XObjects } destination.addPage(newPage); + + if (pageIndex == pageIndexOpenActionDest) + { + // PDFBOX-3972: reassign the page. + // The openAction is either a PDActionGoTo or a PDPageDestination + PDDestinationOrAction openAction = destCatalog.getOpenAction(); + PDPageDestination pageDestination; + if (openAction instanceof PDActionGoTo) + { + pageDestination = (PDPageDestination) ((PDActionGoTo) openAction).getDestination(); + } + else + { + pageDestination = (PDPageDestination) openAction; + } + pageDestination.setPage(newPage); + } + ++pageIndex; } if (mergeStructTree) { - updatePageReferences(srcNumbersArray, objMapping); - for (int i = 0; i < srcNumbersArray.size() / 2; i++) + updatePageReferences(cloner, srcNumberTreeAsMap, objMapping); + int maxSrcKey = -1; + for (Map.Entry entry : srcNumberTreeAsMap.entrySet()) { - destNumbersArray.add(COSInteger.get(destParentTreeNextKey + i)); - destNumbersArray.add(srcNumbersArray.getObject(i * 2 + 1)); + int srcKey = entry.getKey(); + maxSrcKey = Math.max(srcKey, maxSrcKey); + destNumberTreeAsMap.put(destParentTreeNextKey + srcKey, cloner.cloneForNewDocument(entry.getValue())); } - destParentTreeNextKey += srcNumbersArray.size() / 2; - destParentTreeDict.setItem(COSName.NUMS, destNumbersArray); - PDNumberTreeNode newParentTreeNode = new PDNumberTreeNode(destParentTreeDict, COSBase.class); + destParentTreeNextKey += maxSrcKey + 1; + PDNumberTreeNode newParentTreeNode = new PDNumberTreeNode(PDParentTreeValue.class); + + // Note that all elements are stored flatly. This could become a problem for large files + // when these are opened in a viewer that uses the tagging information. + // If this happens, then ​PDNumberTreeNode should be improved with a convenience method that + // stores the map into a B+Tree, see https://en.wikipedia.org/wiki/B+_tree + newParentTreeNode.setNumbers(destNumberTreeAsMap); + destStructTree.setParentTree(newParentTreeNode); destStructTree.setParentTreeNextKey(destParentTreeNextKey); - COSDictionary kDictLevel0 = new COSDictionary(); - COSArray newKArray = new COSArray(); - COSArray destKArray = destStructTree.getKArray(); - COSArray srcKArray = srcStructTree.getKArray(); - if (destKArray != null && srcKArray != null) - { - updateParentEntry(destKArray, kDictLevel0); - newKArray.addAll(destKArray); - if (mergeStructTree) - { - updateParentEntry(srcKArray, kDictLevel0); - } - newKArray.addAll(srcKArray); - } - kDictLevel0.setItem(COSName.K, newKArray); - kDictLevel0.setItem(COSName.P, destStructTree); - kDictLevel0.setItem(COSName.S, new COSString(STRUCTURETYPE_DOCUMENT)); - destStructTree.setK(kDictLevel0); + mergeKEntries(cloner, srcStructTree, destStructTree); + mergeRoleMap(srcStructTree, destStructTree); + mergeIDTree(cloner, srcStructTree, destStructTree); + mergeMarkInfo(destCatalog, srcCatalog); + mergeLanguage(destCatalog, srcCatalog); + mergeViewerPreferences(destCatalog, srcCatalog); } } - private int nextFieldNum = 1; - - /** - * Merge the contents of the source form into the destination form for the - * destination file. - * - * @param cloner the object cloner for the destination document - * @param destAcroForm the destination form - * @param srcAcroForm the source form - * @throws IOException If an error occurs while adding the field. - */ - private void mergeAcroForm(PDFCloneUtility cloner, PDAcroForm destAcroForm, PDAcroForm srcAcroForm) - throws IOException + private void mergeViewerPreferences(PDDocumentCatalog destCatalog, PDDocumentCatalog srcCatalog) { - - List srcFields = srcAcroForm.getFields(); - - if (srcFields != null) + PDViewerPreferences srcViewerPreferences = srcCatalog.getViewerPreferences(); + if (srcViewerPreferences == null) + { + return; + } + PDViewerPreferences destViewerPreferences = destCatalog.getViewerPreferences(); + if (destViewerPreferences == null) { - // if a form is merged multiple times using PDFBox the newly generated - // fields starting with dummyFieldName may already exist. We need to determine the last unique - // number used and increment that. - final String prefix = "dummyFieldName"; - final int prefixLength = prefix.length(); + destViewerPreferences = new PDViewerPreferences(new COSDictionary()); + destCatalog.setViewerPreferences(destViewerPreferences); + } + mergeInto(srcViewerPreferences.getCOSObject(), destViewerPreferences.getCOSObject(), + Collections.emptySet()); - for (PDField destField : destAcroForm.getFieldTree()) - { - String fieldName = destField.getPartialName(); - if (fieldName.startsWith(prefix)) - { - nextFieldNum = Math.max(nextFieldNum, Integer.parseInt(fieldName.substring(prefixLength, fieldName.length()))+1); - } - } - - COSArray destFields = (COSArray) destAcroForm.getCOSObject().getItem(COSName.FIELDS); - for (PDField srcField : srcAcroForm.getFieldTree()) - { - COSDictionary dstField = (COSDictionary) cloner.cloneForNewDocument(srcField.getCOSObject()); + // check the booleans - set to true if one is set and true + if (srcViewerPreferences.hideToolbar() || destViewerPreferences.hideToolbar()) + { + destViewerPreferences.setHideToolbar(true); + } + if (srcViewerPreferences.hideMenubar() || destViewerPreferences.hideMenubar()) + { + destViewerPreferences.setHideMenubar(true); + } + if (srcViewerPreferences.hideWindowUI() || destViewerPreferences.hideWindowUI()) + { + destViewerPreferences.setHideWindowUI(true); + } + if (srcViewerPreferences.fitWindow() || destViewerPreferences.fitWindow()) + { + destViewerPreferences.setFitWindow(true); + } + if (srcViewerPreferences.centerWindow() || destViewerPreferences.centerWindow()) + { + destViewerPreferences.setCenterWindow(true); + } + if (srcViewerPreferences.displayDocTitle() || destViewerPreferences.displayDocTitle()) + { + destViewerPreferences.setDisplayDocTitle(true); + } + } + + private void mergeLanguage(PDDocumentCatalog destCatalog, PDDocumentCatalog srcCatalog) + { + if (destCatalog.getLanguage() == null) + { + String srcLanguage = srcCatalog.getLanguage(); + if (srcLanguage != null) + { + destCatalog.setLanguage(srcLanguage); + } + } + } + + private void mergeMarkInfo(PDDocumentCatalog destCatalog, PDDocumentCatalog srcCatalog) + { + PDMarkInfo destMark = destCatalog.getMarkInfo(); + PDMarkInfo srcMark = srcCatalog.getMarkInfo(); + if (destMark == null) + { + destMark = new PDMarkInfo(); + } + if (srcMark == null) + { + srcMark = new PDMarkInfo(); + } + destMark.setMarked(true); + destMark.setSuspect(srcMark.isSuspect() || destMark.isSuspect()); + destMark.setSuspect(srcMark.usesUserProperties() || destMark.usesUserProperties()); + destCatalog.setMarkInfo(destMark); + } + + private void mergeKEntries(PDFCloneUtility cloner, + PDStructureTreeRoot srcStructTree, + PDStructureTreeRoot destStructTree) throws IOException + { + COSArray dstKArray = new COSArray(); + if (destStructTree.getK() != null) + { + COSBase base = destStructTree.getK(); + if (base instanceof COSArray) + { + dstKArray.addAll((COSArray) base); + } + else if (base instanceof COSDictionary) + { + dstKArray.add(base); + } + } + + COSArray srcKArray = new COSArray(); + if (srcStructTree.getK() != null) + { + COSBase base = cloner.cloneForNewDocument(srcStructTree.getK()); + if (base instanceof COSArray) + { + srcKArray.addAll((COSArray) base); + } + else if (base instanceof COSDictionary) + { + srcKArray.add(base); + } + } + + if (srcKArray.size() == 0) + { + return; + } + + if (dstKArray.size() == 1 && dstKArray.getObject(0) instanceof COSDictionary) + { + // Only one element in the destination. If it is a /Document and its children + // are /Document or /Part, then we can insert there + COSDictionary topKDict = (COSDictionary) dstKArray.getObject(0); + if (COSName.DOCUMENT.equals(topKDict.getCOSName(COSName.S))) + { + COSArray kLevelOneArray = topKDict.getCOSArray(COSName.K); + if (kLevelOneArray != null) + { + boolean onlyDocuments = hasOnlyDocumentsOrParts(kLevelOneArray); + if (onlyDocuments) + { + // insert src elements at level 1 + kLevelOneArray.addAll(srcKArray); + updateParentEntry(kLevelOneArray, topKDict, COSName.PART); + return; + } + } + } + } + + if (dstKArray.size() == 0) + { + updateParentEntry(srcKArray, destStructTree.getCOSObject(), null); + destStructTree.setK(srcKArray); + return; + } + + // whatever this is, merge this under a new /Document element + dstKArray.addAll(srcKArray); + COSDictionary kLevelZeroDict = new COSDictionary(); + // If it is all Document, then make it all Part + COSName newStructureType = hasOnlyDocumentsOrParts(dstKArray) ? COSName.PART : null; + updateParentEntry(dstKArray, kLevelZeroDict, newStructureType); + kLevelZeroDict.setItem(COSName.K, dstKArray); + kLevelZeroDict.setItem(COSName.P, destStructTree); + kLevelZeroDict.setItem(COSName.S, COSName.DOCUMENT); + destStructTree.setK(kLevelZeroDict); + } + + private boolean hasOnlyDocumentsOrParts(COSArray kLevelOneArray) + { + for (int i = 0; i < kLevelOneArray.size(); ++i) + { + COSBase base = kLevelOneArray.getObject(i); + if (!(base instanceof COSDictionary)) + { + return false; + } + COSDictionary dict = (COSDictionary) base; + if (!COSName.DOCUMENT.equals(dict.getCOSName(COSName.S)) && + !COSName.PART.equals(dict.getCOSName(COSName.S))) + { + return false; + } + } + return true; + } + + /** + * Update the P reference to the new parent dictionary. + * + * @param kArray the kids array + * @param newParent the new parent + * @param newStructureType the new structure type in /S or null so it doesn't get replaced + */ + private void updateParentEntry(COSArray kArray, COSDictionary newParent, COSName newStructureType) + { + for (int i = 0; i < kArray.size(); i++) + { + COSBase subEntry = kArray.getObject(i); + if (subEntry instanceof COSDictionary) + { + COSDictionary dictEntry = (COSDictionary) subEntry; + dictEntry.setItem(COSName.P, newParent); + if (newStructureType != null) + { + dictEntry.setItem(COSName.S, newStructureType); + } + } + } + } + + private void mergeIDTree(PDFCloneUtility cloner, + PDStructureTreeRoot srcStructTree, + PDStructureTreeRoot destStructTree) throws IOException + { + PDNameTreeNode srcIDTree = srcStructTree.getIDTree(); + if (srcIDTree == null) + { + return; + } + PDNameTreeNode destIDTree = destStructTree.getIDTree(); + if (destIDTree == null) + { + destIDTree = new PDStructureElementNameTreeNode(); + } + Map srcNames = getIDTreeAsMap(srcIDTree); + Map destNames = getIDTreeAsMap(destIDTree); + for (Map.Entry entry : srcNames.entrySet()) + { + if (destNames.containsKey(entry.getKey())) + { + LOG.warn("key " + entry.getKey() + " already exists in destination IDTree"); + } + else + { + destNames.put(entry.getKey(), + new PDStructureElement((COSDictionary) cloner.cloneForNewDocument(entry.getValue().getCOSObject()))); + } + } + destIDTree = new PDStructureElementNameTreeNode(); + destIDTree.setNames(destNames); + destStructTree.setIDTree(destIDTree); + // Note that all elements are stored flatly. This could become a problem for large files + // when these are opened in a viewer that uses the tagging information. + // If this happens, then PDNameTreeNode should be improved with a convenience method that + // stores the map into a B+Tree, see https://en.wikipedia.org/wiki/B+_tree + } + + // PDNameTreeNode.getNames() only brings one level, this is why we need this + // might be made public at a later time, or integrated into PDNameTreeNode with template. + static Map getIDTreeAsMap(PDNameTreeNode idTree) + throws IOException + { + Map names = idTree.getNames(); + if (names == null) + { + names = new LinkedHashMap(); + } + else + { + // must copy because the map is read only + names = new LinkedHashMap(names); + } + List> kids = idTree.getKids(); + if (kids != null) + { + for (PDNameTreeNode kid : kids) + { + names.putAll(getIDTreeAsMap(kid)); + } + } + return names; + } + + // PDNumberTreeNode.getNumbers() only brings one level, this is why we need this + // might be made public at a later time, or integrated into PDNumberTreeNode. + static Map getNumberTreeAsMap(PDNumberTreeNode tree) + throws IOException + { + Map numbers = tree.getNumbers(); + if (numbers == null) + { + numbers = new LinkedHashMap(); + } + else + { + // must copy because the map is read only + numbers = new LinkedHashMap(numbers); + } + List kids = tree.getKids(); + if (kids != null) + { + for (PDNumberTreeNode kid : kids) + { + numbers.putAll(getNumberTreeAsMap(kid)); + } + } + return numbers; + } + + private void mergeRoleMap(PDStructureTreeRoot srcStructTree, PDStructureTreeRoot destStructTree) + { + COSDictionary srcDict = srcStructTree.getCOSObject().getCOSDictionary(COSName.ROLE_MAP); + COSDictionary destDict = destStructTree.getCOSObject().getCOSDictionary(COSName.ROLE_MAP); + if (srcDict == null) + { + return; + } + if (destDict == null) + { + destStructTree.getCOSObject().setItem(COSName.ROLE_MAP, srcDict); // clone not needed + return; + } + for (Map.Entry entry : srcDict.entrySet()) + { + COSBase destValue = destDict.getDictionaryObject(entry.getKey()); + if (destValue != null && destValue.equals(entry.getValue())) + { + // already exists, but identical + continue; + } + if (destDict.containsKey(entry.getKey())) + { + LOG.warn("key " + entry.getKey() + " already exists in destination RoleMap"); + } + else + { + destDict.setItem(entry.getKey(), entry.getValue()); + } + } + } + + // copy outputIntents to destination, but avoid duplicate OutputConditionIdentifier, + // except when it is missing or is named "Custom". + private void mergeOutputIntents(PDFCloneUtility cloner, + PDDocumentCatalog srcCatalog, PDDocumentCatalog destCatalog) throws IOException + { + List srcOutputIntents = srcCatalog.getOutputIntents(); + List dstOutputIntents = destCatalog.getOutputIntents(); + for (PDOutputIntent srcOI : srcOutputIntents) + { + String srcOCI = srcOI.getOutputConditionIdentifier(); + if (srcOCI != null && !"Custom".equals(srcOCI)) + { + // is that identifier already there? + boolean skip = false; + for (PDOutputIntent dstOI : dstOutputIntents) + { + if (dstOI.getOutputConditionIdentifier().equals(srcOCI)) + { + skip = true; + break; + } + } + if (skip) + { + continue; + } + } + destCatalog.addOutputIntent(new PDOutputIntent((COSDictionary) cloner.cloneForNewDocument(srcOI))); + dstOutputIntents.add(srcOI); + } + } + + /** + * Merge the contents of the source form into the destination form for the + * destination file. + * + * @param cloner the object cloner for the destination document + * @param destAcroForm the destination form + * @param srcAcroForm the source form + * @throws IOException If an error occurs while adding the field. + */ + private void mergeAcroForm(PDFCloneUtility cloner, PDDocumentCatalog destCatalog, + PDDocumentCatalog srcCatalog ) throws IOException + { + try + { + PDAcroForm destAcroForm = destCatalog.getAcroForm(); + PDAcroForm srcAcroForm = srcCatalog.getAcroForm(); + + if (destAcroForm == null && srcAcroForm != null) + { + destCatalog.getCOSObject().setItem(COSName.ACRO_FORM, + cloner.cloneForNewDocument(srcAcroForm.getCOSObject())); + + } + else + { + if (srcAcroForm != null) + { + if (acroFormMergeMode == AcroFormMergeMode.PDFBOX_LEGACY_MODE) + { + acroFormLegacyMode(cloner, destAcroForm, srcAcroForm); + } + else if (acroFormMergeMode == AcroFormMergeMode.JOIN_FORM_FIELDS_MODE) + { + acroFormJoinFieldsMode(cloner, destAcroForm, srcAcroForm); + } + } + } + } + catch (IOException e) + { + // if we are not ignoring exceptions, we'll re-throw this + if (!ignoreAcroFormErrors) + { + throw new IOException(e); + } + } + } + + /* + * Merge the contents of the source form into the destination form for the + * destination file. + * + * @param cloner the object cloner for the destination document + * @param destAcroForm the destination form + * @param srcAcroForm the source form + * @throws IOException If an error occurs while adding the field. + */ + private void acroFormJoinFieldsMode(PDFCloneUtility cloner, PDAcroForm destAcroForm, PDAcroForm srcAcroForm) + throws IOException + { + List srcFields = srcAcroForm.getFields(); + COSArray destFields; + + if (srcFields != null && !srcFields.isEmpty()) + { + // get the destinations root fields. Could be that the entry doesn't exist + // or is of wrong type + COSBase base = destAcroForm.getCOSObject().getItem(COSName.FIELDS); + if (base instanceof COSArray) + { + destFields = (COSArray) base; + } + else + { + destFields = new COSArray(); + } + + for (PDField srcField : srcAcroForm.getFieldTree()) + { + // if the form already has a field with this name then we need to rename this field + // to prevent merge conflicts. + PDField destinationField = destAcroForm.getField(srcField.getFullyQualifiedName()); + if (destinationField == null) + { + // field doesn't exist - can safely add it + COSDictionary importedField = (COSDictionary) cloner.cloneForNewDocument(srcField.getCOSObject()); + destFields.add(importedField); + } + else + { + mergeFields(cloner, destinationField, srcField); + } + } + destAcroForm.getCOSObject().setItem(COSName.FIELDS,destFields); + } + } + + private void mergeFields(PDFCloneUtility cloner, PDField destField, PDField srcField) + { + if (destField instanceof PDNonTerminalField && srcField instanceof PDNonTerminalField) + { + LOG.info("Skipping non terminal field " + srcField.getFullyQualifiedName()); + return; + } + + if (destField.getFieldType() == "Tx" && destField.getFieldType() == "Tx") + { + // if the field already has multiple widgets we can add to the array + if (destField.getCOSObject().containsKey(COSName.KIDS)) + { + COSArray widgets = destField.getCOSObject().getCOSArray(COSName.KIDS); + for (PDAnnotationWidget srcWidget : srcField.getWidgets()) + { + try + { + widgets.add(cloner.cloneForNewDocument(srcWidget.getCOSObject())); + } + catch (IOException ioe) + { + LOG.warn("Unable to clone widget for source field " + srcField.getFullyQualifiedName()); + } + + } + } + else + { + COSArray widgets = new COSArray(); + try + { + COSDictionary widgetAsCOS = (COSDictionary) cloner.cloneForNewDocument(destField.getWidgets().get(0)); + cleanupWidgetCOSDictionary(widgetAsCOS, true); + widgetAsCOS.setItem(COSName.PARENT, destField); + widgets.add(widgetAsCOS); + for (PDAnnotationWidget srcWidget : srcField.getWidgets()) + { + try + { + widgetAsCOS = (COSDictionary) cloner.cloneForNewDocument(srcWidget.getCOSObject()); + cleanupWidgetCOSDictionary(widgetAsCOS, false); + widgetAsCOS.setItem(COSName.PARENT, destField); + widgets.add(widgetAsCOS); + } + catch (IOException ioe) + { + LOG.warn("Unable to clone widget for source field " + srcField.getFullyQualifiedName()); + } + + } + destField.getCOSObject().setItem(COSName.KIDS, widgets); + cleanupFieldCOSDictionary(destField.getCOSObject()); + } + catch (IOException ioe) + { + LOG.warn("Unable to clone widget for destination field " + destField.getFullyQualifiedName()); + } + } + } + else + { + LOG.info("Only merging two text fields is currently supported"); + LOG.info("Skipping merging of " + srcField.getFullyQualifiedName() + " into " + destField.getFullyQualifiedName()); + } + } + + // Remove entries from field dictionary which belong to a widget + // Needed when splitting a joint field/widget dictionary + private void cleanupFieldCOSDictionary(COSDictionary fieldCos) + { + //TODO: align that list with the PDF spec. Vurrently only based on sample forms + fieldCos.removeItem(COSName.F); + fieldCos.removeItem(COSName.MK); + fieldCos.removeItem(COSName.P); + fieldCos.removeItem(COSName.RECT); + fieldCos.removeItem(COSName.SUBTYPE); + fieldCos.removeItem(COSName.TYPE); + } + + // remove entries from widget dictionary which belong to fields + // Needed when splitting a joint field/widget dictionary + private void cleanupWidgetCOSDictionary(COSDictionary widgetCos, boolean removeDAEntry) + { + //TODO: align that list with the PDF spec. Vurrently only based on sample forms + // Acrobat removes the DA entry only for the first widget + if (removeDAEntry) + { + widgetCos.removeItem(COSName.DA); + } + widgetCos.removeItem(COSName.FT); + widgetCos.removeItem(COSName.T); + widgetCos.removeItem(COSName.V); + } + + /* + * Merge the contents of the source form into the destination form for the + * destination file. + * + * @param cloner the object cloner for the destination document + * @param destAcroForm the destination form + * @param srcAcroForm the source form + * @throws IOException If an error occurs while adding the field. + */ + private void acroFormLegacyMode(PDFCloneUtility cloner, PDAcroForm destAcroForm, PDAcroForm srcAcroForm) + throws IOException + { + List srcFields = srcAcroForm.getFields(); + COSArray destFields; + + if (srcFields != null && !srcFields.isEmpty()) + { + // if a form is merged multiple times using PDFBox the newly generated + // fields starting with dummyFieldName may already exist. We need to determine the last unique + // number used and increment that. + final String prefix = "dummyFieldName"; + final int prefixLength = prefix.length(); + + for (PDField destField : destAcroForm.getFieldTree()) + { + String fieldName = destField.getPartialName(); + if (fieldName.startsWith(prefix)) + { + nextFieldNum = Math.max(nextFieldNum, Integer.parseInt(fieldName.substring(prefixLength)) + 1); + } + } + + // get the destinations root fields. Could be that the entry doesn't exist + // or is of wrong type + COSBase base = destAcroForm.getCOSObject().getItem(COSName.FIELDS); + if (base instanceof COSArray) + { + destFields = (COSArray) base; + } + else + { + destFields = new COSArray(); + } + + for (PDField srcField : srcAcroForm.getFields()) + { + COSDictionary dstField = (COSDictionary) cloner.cloneForNewDocument(srcField.getCOSObject()); // if the form already has a field with this name then we need to rename this field // to prevent merge conflicts. if (destAcroForm.getField(srcField.getFullyQualifiedName()) != null) @@ -588,6 +1478,8 @@ private void mergeAcroForm(PDFCloneUtility cloner, PDAcroForm destAcroForm, PDAc } } + private int nextFieldNum = 1; + /** * Indicates if acroform errors are ignored or not. * @@ -609,69 +1501,103 @@ public void setIgnoreAcroFormErrors(boolean ignoreAcroFormErrorsValue) ignoreAcroFormErrors = ignoreAcroFormErrorsValue; } + /** + * Update the Pg and Obj references to the new (merged) page. + */ + private void updatePageReferences(PDFCloneUtility cloner, + Map numberTreeAsMap, + Map objMapping) throws IOException + { + for (COSObjectable obj : numberTreeAsMap.values()) + { + if (obj == null) + { + continue; + } + PDParentTreeValue val = (PDParentTreeValue) obj; + COSBase base = val.getCOSObject(); + if (base instanceof COSArray) + { + updatePageReferences(cloner, (COSArray) base, objMapping); + } + else + { + updatePageReferences(cloner, (COSDictionary) base, objMapping); + } + } + } + /** * Update the Pg and Obj references to the new (merged) page. * * @param parentTreeEntry * @param objMapping mapping between old and new references */ - private void updatePageReferences(COSDictionary parentTreeEntry, Map objMapping) + private void updatePageReferences(PDFCloneUtility cloner, + COSDictionary parentTreeEntry, Map objMapping) + throws IOException { - COSBase page = parentTreeEntry.getDictionaryObject(COSName.PG); - if (page instanceof COSDictionary && objMapping.containsKey(page)) + COSDictionary pageDict = parentTreeEntry.getCOSDictionary(COSName.PG); + if (objMapping.containsKey(pageDict)) { - parentTreeEntry.setItem(COSName.PG, objMapping.get(page)); + parentTreeEntry.setItem(COSName.PG, objMapping.get(pageDict)); } COSBase obj = parentTreeEntry.getDictionaryObject(COSName.OBJ); - if (obj instanceof COSDictionary && objMapping.containsKey(obj)) + if (obj instanceof COSDictionary) { - parentTreeEntry.setItem(COSName.OBJ, objMapping.get(obj)); + COSDictionary objDict = (COSDictionary) obj; + if (objMapping.containsKey(objDict)) + { + parentTreeEntry.setItem(COSName.OBJ, objMapping.get(objDict)); + } + else + { + // PDFBOX-3999: clone objects that are not in mapping to make sure that + // these don't remain attached to the source document + COSBase item = parentTreeEntry.getItem(COSName.OBJ); + if (item instanceof COSObject) + { + LOG.debug("clone potential orphan object in structure tree: " + item + + ", Type: " + objDict.getNameAsString(COSName.TYPE) + + ", Subtype: " + objDict.getNameAsString(COSName.SUBTYPE) + + ", T: " + objDict.getNameAsString(COSName.T)); + } + else + { + // don't display in full because of stack overflow + LOG.debug("clone potential orphan object in structure tree" + + ", Type: " + objDict.getNameAsString(COSName.TYPE) + + ", Subtype: " + objDict.getNameAsString(COSName.SUBTYPE) + + ", T: " + objDict.getNameAsString(COSName.T)); + } + parentTreeEntry.setItem(COSName.OBJ, cloner.cloneForNewDocument(obj)); + } } COSBase kSubEntry = parentTreeEntry.getDictionaryObject(COSName.K); if (kSubEntry instanceof COSArray) { - updatePageReferences((COSArray) kSubEntry, objMapping); + updatePageReferences(cloner, (COSArray) kSubEntry, objMapping); } else if (kSubEntry instanceof COSDictionary) { - updatePageReferences((COSDictionary) kSubEntry, objMapping); + updatePageReferences(cloner, (COSDictionary) kSubEntry, objMapping); } } - private void updatePageReferences(COSArray parentTreeEntry, Map objMapping) + private void updatePageReferences(PDFCloneUtility cloner, + COSArray parentTreeEntry, Map objMapping) + throws IOException { for (int i = 0; i < parentTreeEntry.size(); i++) { COSBase subEntry = parentTreeEntry.getObject(i); if (subEntry instanceof COSArray) { - updatePageReferences((COSArray) subEntry, objMapping); + updatePageReferences(cloner, (COSArray) subEntry, objMapping); } else if (subEntry instanceof COSDictionary) { - updatePageReferences((COSDictionary) subEntry, objMapping); - } - } - } - - /** - * Update the P reference to the new parent dictionary. - * - * @param kArray the kids array - * @param newParent the new parent - */ - private void updateParentEntry(COSArray kArray, COSDictionary newParent) - { - for (int i = 0; i < kArray.size(); i++) - { - COSBase subEntry = kArray.getObject(i); - if (subEntry instanceof COSDictionary) - { - COSDictionary dictEntry = (COSDictionary) subEntry; - if (dictEntry.getDictionaryObject(COSName.P) != null) - { - dictEntry.setItem(COSName.P, newParent); - } + updatePageReferences(cloner, (COSDictionary) subEntry, objMapping); } } } @@ -684,12 +1610,18 @@ private void updateParentEntry(COSArray kArray, COSDictionary newParent) */ private void updateStructParentEntries(PDPage page, int structParentOffset) throws IOException { - page.setStructParents(page.getStructParents() + structParentOffset); + if (page.getStructParents() >= 0) + { + page.setStructParents(page.getStructParents() + structParentOffset); + } List annots = page.getAnnotations(); - List newannots = new ArrayList(); + List newannots = new ArrayList(annots.size()); for (PDAnnotation annot : annots) { - annot.setStructParent(annot.getStructParent() + structParentOffset); + if (annot.getStructParent() >= 0) + { + annot.setStructParent(annot.getStructParent() + structParentOffset); + } newannots.add(annot); } page.setAnnotations(newannots); @@ -705,4 +1637,25 @@ private boolean isDynamicXfa(PDAcroForm acroForm) { return acroForm != null && acroForm.xfaIsDynamic(); } + + /** + * This will add all of the dictionaries keys/values to this dictionary, but + * only if they are not in an exclusion list and if they don't already + * exist. If a key already exists in this dictionary then nothing is + * changed. + * + * @param src The source dictionary to get the keys/values from. + * @param dst The destination dictionary to merge the keys/values into. + * @param exclude Names of keys that shall be skipped. + */ + private void mergeInto(COSDictionary src, COSDictionary dst, Set exclude) + { + for (Map.Entry entry : src.entrySet()) + { + if (!exclude.contains(entry.getKey()) && !dst.containsKey(entry.getKey())) + { + dst.setItem(entry.getKey(), entry.getValue()); + } + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PageExtractor.java b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PageExtractor.java index 8ad64262435..f6935f57dc9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PageExtractor.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PageExtractor.java @@ -27,12 +27,12 @@ */ public class PageExtractor { - private PDDocument sourceDocument; + private final PDDocument sourceDocument; // first page to extract is page 1 (by default) private int startPage = 1; - private int endPage = 0; + private int endPage; /** * Creates a new instance of PageExtractor @@ -52,7 +52,7 @@ public PageExtractor(PDDocument sourceDocument) */ public PageExtractor(PDDocument sourceDocument, int startPage, int endPage) { - this(sourceDocument); + this.sourceDocument = sourceDocument; this.startPage = startPage; this.endPage = endPage; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java index 2187d20a5b9..68667547f32 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.interactive.action.PDAction; @@ -45,19 +46,43 @@ public class Splitter private int endPage = Integer.MAX_VALUE; private List destinationDocuments; - private int currentPageNumber = 0; + private int currentPageNumber; + + private MemoryUsageSetting memoryUsageSetting = null; + + /** + * @return the current memory setting. + */ + public MemoryUsageSetting getMemoryUsageSetting() + { + return memoryUsageSetting; + } + + /** + * Set the memory setting. + * + * @param memoryUsageSetting The memory setting. + */ + public void setMemoryUsageSetting(MemoryUsageSetting memoryUsageSetting) + { + this.memoryUsageSetting = memoryUsageSetting; + } /** * This will take a document and split into several other documents. * * @param document The document to split. * - * @return A list of all the split documents. + * @return A list of all the split documents. These should all be saved before closing any + * documents, including the source document. Any further operations should be made after + * reloading them, to avoid problems due to resource sharing. * * @throws IOException If there is an IOError */ public List split(PDDocument document) throws IOException { + // reset the currentPageNumber for a case if the split method will be used several times + currentPageNumber = 0; destinationDocuments = new ArrayList(); sourceDocument = document; processPages(); @@ -86,7 +111,7 @@ public void setSplitAtPage(int split) /** * This will set the start page. * - * @param start the start page + * @param start the 1-based start page * @throws IllegalArgumentException if the start page is smaller than one. */ public void setStartPage(int start) @@ -101,7 +126,7 @@ public void setStartPage(int start) /** * This will set the end page. * - * @param end the end page + * @param end the 1-based end page * @throws IllegalArgumentException if the end page is smaller than one. */ public void setEndPage(int end) @@ -120,9 +145,8 @@ public void setEndPage(int end) */ private void processPages() throws IOException { - for (int i = 0; i < sourceDocument.getNumberOfPages(); i++) + for (PDPage page : sourceDocument.getPages()) { - PDPage page = sourceDocument.getPage(i); if (currentPageNumber + 1 >= startPage && currentPageNumber + 1 <= endPage) { processPage(page); @@ -167,13 +191,13 @@ private void createNewDocumentIfNecessary() throws IOException * return isPrime(pageNumber); * } * - * @param pageNumber the page number to be checked as splitting page + * @param pageNumber the 0-based page number to be checked as splitting page * * @return true If a new document should be created. */ protected boolean splitAtPage(int pageNumber) { - return pageNumber % splitLength == 0; + return (pageNumber + 1 - Math.max(1, startPage)) % splitLength == 0; } /** @@ -184,7 +208,8 @@ protected boolean splitAtPage(int pageNumber) */ protected PDDocument createNewDocument() throws IOException { - PDDocument document = new PDDocument(); + PDDocument document = memoryUsageSetting == null ? + new PDDocument() : new PDDocument(memoryUsageSetting); document.getDocument().setVersion(getSourceDocument().getVersion()); document.setDocumentInformation(getSourceDocument().getDocumentInformation()); document.getDocumentCatalog().setViewerPreferences( @@ -204,11 +229,7 @@ protected void processPage(PDPage page) throws IOException createNewDocumentIfNecessary(); PDPage imported = getDestinationDocument().importPage(page); - imported.setCropBox(page.getCropBox()); - imported.setMediaBox(page.getMediaBox()); - // only the resources of the page will be copied imported.setResources(page.getResources()); - imported.setRotation(page.getRotation()); // remove page links to avoid copying not needed resources processAnnotations(imported); } @@ -222,28 +243,25 @@ private void processAnnotations(PDPage imported) throws IOException { PDAnnotationLink link = (PDAnnotationLink)annotation; PDDestination destination = link.getDestination(); - if (destination == null && link.getAction() != null) + PDAction action = link.getAction(); + if (destination == null && action instanceof PDActionGoTo) { - PDAction action = link.getAction(); - if (action instanceof PDActionGoTo) - { - destination = ((PDActionGoTo)action).getDestination(); - } + destination = ((PDActionGoTo) action).getDestination(); } if (destination instanceof PDPageDestination) { - // TODO preserve links to pages within the splitted result + // TODO preserve links to pages within the split result ((PDPageDestination) destination).setPage(null); } } - // TODO preserve links to pages within the splitted result + // TODO preserve links to pages within the split result annotation.setPage(null); } } /** * The source PDF document. * - * @return the pdf to be splitted + * @return the pdf to be split */ protected final PDDocument getSourceDocument() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java index 379e72eb54d..4512b21dd2f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java @@ -18,7 +18,9 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.Arrays; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; @@ -52,6 +54,8 @@ public abstract class BaseParser static final int MAX_LENGTH_LONG = Long.toString(Long.MAX_VALUE).length(); + private final CharsetDecoder utf8Decoder = Charsets.UTF_8.newDecoder(); + /** * Log instance. */ @@ -115,7 +119,7 @@ public abstract class BaseParser /** * This is the stream that will be read from. */ - protected final SequentialSource seqSource; + final SequentialSource seqSource; /** * This is the document that will be parsed. @@ -125,7 +129,7 @@ public abstract class BaseParser /** * Default constructor. */ - public BaseParser(SequentialSource pdfSource) + BaseParser(SequentialSource pdfSource) { this.seqSource = pdfSource; } @@ -147,26 +151,31 @@ private static boolean isHexDigit(char ch) private COSBase parseCOSDictionaryValue() throws IOException { long numOffset = seqSource.getPosition(); - COSBase number = parseDirObject(); + COSBase value = parseDirObject(); skipSpaces(); - if (!isDigit()) + // proceed if the given object is a number and the following is a number as well + if ((!(value instanceof COSNumber) || !isDigit())) { - return number; + return value; } + // read the remaining information of the object number long genOffset = seqSource.getPosition(); COSBase generationNumber = parseDirObject(); skipSpaces(); readExpectedChar('R'); - if (!(number instanceof COSInteger)) + if (!(value instanceof COSInteger)) { - throw new IOException("expected number, actual=" + number + " at offset " + numOffset); + LOG.error("expected number, actual=" + value + " at offset " + numOffset); + return COSNull.NULL; } if (!(generationNumber instanceof COSInteger)) { - throw new IOException("expected number, actual=" + number + " at offset " + genOffset); + LOG.error("expected number, actual=" + value + " at offset " + genOffset); + return COSNull.NULL; } - COSObjectKey key = new COSObjectKey(((COSInteger) number).longValue(), + COSObjectKey key = new COSObjectKey(((COSInteger) value).longValue(), ((COSInteger) generationNumber).intValue()); + // dereference the object return getObjectFromPool(key); } @@ -183,7 +192,7 @@ private COSBase getObjectFromPool(COSObjectKey key) throws IOException /** * This will parse a PDF dictionary. * - * @return The parsed dictionary. + * @return The parsed dictionary, never null. * * @throws IOException If there is an error reading the stream. */ @@ -204,7 +213,12 @@ protected COSDictionary parseCOSDictionary() throws IOException } else if (c == '/') { - parseCOSDictionaryNameValuePair(obj); + // something went wrong, most likely the dictionary is corrupted + // stop immediately and return everything read so far + if (!parseCOSDictionaryNameValuePair(obj)) + { + return obj; + } } else { @@ -268,29 +282,19 @@ private boolean readUntilEndOfCOSDictionary() throws IOException return false; } - private void parseCOSDictionaryNameValuePair(COSDictionary obj) throws IOException + private boolean parseCOSDictionaryNameValuePair(COSDictionary obj) throws IOException { COSName key = parseCOSName(); COSBase value = parseCOSDictionaryValue(); skipSpaces(); - if (((char) seqSource.peek()) == 'd') + if (value == null) { - // if the next string is 'def' then we are parsing a cmap stream - // and want to ignore it, otherwise throw an exception. - String potentialDEF = readString(); - if (!potentialDEF.equals(DEF)) - { - seqSource.unread(potentialDEF.getBytes(ISO_8859_1)); - } - else - { - skipSpaces(); - } + LOG.warn("Bad dictionary declaration at offset " + seqSource.getPosition()); + return false; } - - if (value == null) + else if (value instanceof COSInteger && !((COSInteger)value).isValid()) { - LOG.warn("Bad Dictionary Declaration " + seqSource); + LOG.warn("Skipped out of range number value at offset " + seqSource.getPosition()); } else { @@ -298,6 +302,7 @@ private void parseCOSDictionaryNameValuePair(COSDictionary obj) throws IOExcepti value.setDirect(true); obj.setItem(key, value); } + return true; } protected void skipWhiteSpaces() throws IOException @@ -335,60 +340,43 @@ else if (ASCII_LF != whitespace) } /** - * This is really a bug in the Document creators code, but it caused a crash - * in PDFBox, the first bug was in this format: - * /Title ( (5) - * /Creator which was patched in 1 place. - * However it missed the case where the Close Paren was escaped + * This is really a bug in the Document creators code, but it caused a crash in PDFBox, the first bug was in this + * format: /Title ( (5) /Creator which was patched in 1 place. * - * The second bug was in this format - * /Title (c:\) - * /Producer + * However it missed the case where the number of opening and closing parenthesis isn't balanced * - * This patch moves this code out of the parseCOSString method, so it can be used twice. + * The second bug was in this format /Title (c:\) /Producer * + * This patch moves this code out of the parseCOSString method, so it can be used twice. * * @param bracesParameter the number of braces currently open. * * @return the corrected value of the brace counter * @throws IOException */ - private int checkForMissingCloseParen(final int bracesParameter) throws IOException + private int checkForEndOfString(final int bracesParameter) throws IOException { int braces = bracesParameter; byte[] nextThreeBytes = new byte[3]; int amountRead = seqSource.read(nextThreeBytes); - //lets handle the special case seen in Bull River Rules and Regulations.pdf - //The dictionary looks like this - // 2 0 obj - // << - // /Type /Info - // /Creator (PaperPort http://www.scansoft.com) - // /Producer (sspdflib 1.0 http://www.scansoft.com) - // /Title ( (5) - // /Author () - // /Subject () - // - // Notice the /Title, the braces are not even but they should - // be. So lets assume that if we encounter an this scenario - // then that - // means that there is an error in the pdf and assume that - // was the end of the document. - // - if (amountRead == 3 && - (( nextThreeBytes[0] == ASCII_CR // Look for a carriage return - && nextThreeBytes[1] == ASCII_LF // Look for a new line - && nextThreeBytes[2] == 0x2f ) // Look for a slash / - // Add a second case without a new line - || (nextThreeBytes[0] == ASCII_CR // Look for a carriage return - && nextThreeBytes[1] == 0x2f ))) // Look for a slash / + // Check the next 3 bytes if available + // The following cases are valid indicators for the end of the string + // 1. Next line contains another COSObject: CR + LF + '/' + // 2. COSDictionary ends in the next line: CR + LF + '>' + // 3. Next line contains another COSObject: CR + '/' + // 4. COSDictionary ends in the next line: CR + '>' + if (amountRead == 3 && nextThreeBytes[0] == ASCII_CR) + { + if ( (nextThreeBytes[1] == ASCII_LF && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>') + || nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>') { braces = 0; } + } if (amountRead > 0) { - seqSource.unread(Arrays.copyOfRange(nextThreeBytes, 0, amountRead)); + seqSource.unread(nextThreeBytes, 0, amountRead); } return braces; } @@ -403,27 +391,19 @@ private int checkForMissingCloseParen(final int bracesParameter) throws IOExcept protected COSString parseCOSString() throws IOException { char nextChar = (char) seqSource.read(); - char openBrace; - char closeBrace; - if( nextChar == '(' ) - { - openBrace = '('; - closeBrace = ')'; - } - else if( nextChar == '<' ) + if (nextChar == '<') { return parseCOSHexString(); } - else + else if (nextChar != '(') { throw new IOException( "parseCOSString string should start with '(' or '<' and not '" + - nextChar + "' " + seqSource); + nextChar + "' at offset " + seqSource.getPosition()); } ByteArrayOutputStream out = new ByteArrayOutputStream(); - //This is the number of braces read - // + // This is the number of braces read int braces = 1; int c = seqSource.read(); while( braces > 0 && c != -1) @@ -431,17 +411,17 @@ else if( nextChar == '<' ) char ch = (char)c; int nextc = -2; // not yet read - if(ch == closeBrace) + if (ch == ')') { braces--; - braces = checkForMissingCloseParen(braces); + braces = checkForEndOfString(braces); if( braces != 0 ) { out.write(ch); } } - else if( ch == openBrace ) + else if (ch == '(') { braces++; out.write(ch); @@ -469,7 +449,7 @@ else if( ch == '\\' ) break; case ')': // PDFBox 276 /Title (c:\) - braces = checkForMissingCloseParen(braces); + braces = checkForEndOfString(braces); if( braces != 0 ) { out.write(next); @@ -501,8 +481,7 @@ else if( ch == '\\' ) case '5': case '6': case '7': - { - StringBuffer octal = new StringBuffer(); + StringBuilder octal = new StringBuilder(); octal.append( next ); c = seqSource.read(); char digit = (char)c; @@ -536,13 +515,10 @@ else if( ch == '\\' ) } out.write(character); break; - } default: - { // dropping the backslash // see 7.3.4.2 Literal Strings for further information out.write(next); - } } } else @@ -641,6 +617,7 @@ else if ( ( c == ' ' ) || ( c == '\n' ) || */ protected COSArray parseCOSArray() throws IOException { + long startPosition = seqSource.getPosition(); readExpectedChar('['); COSArray po = new COSArray(); COSBase pbo; @@ -652,10 +629,10 @@ protected COSArray parseCOSArray() throws IOException if( pbo instanceof COSObject ) { // We have to check if the expected values are there or not PDFBOX-385 - if (po.get(po.size()-1) instanceof COSInteger) + if (po.size() > 0 && po.get(po.size() - 1) instanceof COSInteger) { COSInteger genNumber = (COSInteger)po.remove( po.size() -1 ); - if (po.get(po.size()-1) instanceof COSInteger) + if (po.size() > 0 && po.get(po.size() - 1) instanceof COSInteger) { COSInteger number = (COSInteger)po.remove( po.size() -1 ); COSObjectKey key = new COSObjectKey(number.longValue(), genNumber.intValue()); @@ -679,12 +656,18 @@ protected COSArray parseCOSArray() throws IOException else { //it could be a bad object in the array which is just skipped - LOG.warn("Corrupt object reference at offset " + seqSource.getPosition()); - - // This could also be an "endobj" or "endstream" which means we can assume that - // the array has ended. + LOG.warn("Corrupt array element at offset " + + seqSource.getPosition() + ", start offset: " + startPosition); String isThisTheEnd = readString(); + // return immediately if a corrupt element is followed by another array + // to avoid a possible infinite recursion as most likely the whole array is corrupted + if (isThisTheEnd.isEmpty() && seqSource.peek() == '[') + { + return po; + } seqSource.unread(isThisTheEnd.getBytes(ISO_8859_1)); + // This could also be an "endobj" or "endstream" which means we can assume that + // the array has ended. if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals(isThisTheEnd)) { return po; @@ -707,7 +690,8 @@ protected COSArray parseCOSArray() throws IOException protected boolean isEndOfName(int ch) { return ch == ASCII_SPACE || ch == ASCII_CR || ch == ASCII_LF || ch == 9 || ch == '>' || - ch == '<' || ch == '[' || ch =='/' || ch ==']' || ch ==')' || ch =='('; + ch == '<' || ch == '[' || ch =='/' || ch ==']' || ch ==')' || ch =='(' || + ch == 0 || ch == '\f' || ch == '%'; } /** @@ -736,7 +720,7 @@ protected COSName parseCOSName() throws IOException // valid hex digits. if (isHexDigit((char)ch1) && isHexDigit((char)ch2)) { - String hex = "" + (char)ch1 + (char)ch2; + String hex = Character.toString((char) ch1) + (char) ch2; try { buffer.write(Integer.parseInt(hex, 16)); @@ -775,10 +759,37 @@ else if (isEndOfName(ch)) { seqSource.unread(c); } - String string = new String(buffer.toByteArray(), Charsets.UTF_8); + + byte[] bytes = buffer.toByteArray(); + String string; + if (isValidUTF8(bytes)) + { + string = new String(buffer.toByteArray(), Charsets.UTF_8); + } + else + { + // some malformed PDFs don't use UTF-8 see PDFBOX-3347 + string = new String(buffer.toByteArray(), Charsets.WINDOWS_1252); + } return COSName.getPDFName(string); } + /** + * Returns true if a byte sequence is valid UTF-8. + */ + private boolean isValidUTF8(byte[] input) + { + try + { + utf8Decoder.decode(ByteBuffer.wrap(input)); + return true; + } + catch (CharacterCodingException e) + { + return false; + } + } + /** * This will parse a boolean object from the stream. * @@ -788,7 +799,7 @@ else if (isEndOfName(ch)) */ protected COSBoolean parseBoolean() throws IOException { - COSBoolean retval = null; + COSBoolean retval; char c = (char) seqSource.peek(); if( c == 't' ) { @@ -833,140 +844,112 @@ else if( c == 'f' ) */ protected COSBase parseDirObject() throws IOException { - COSBase retval = null; - skipSpaces(); - int nextByte = seqSource.peek(); - char c = (char)nextByte; + char c = (char)seqSource.peek(); switch(c) { case '<': - { // pull off first left bracket int leftBracket = seqSource.read(); // check for second left bracket c = (char) seqSource.peek(); seqSource.unread(leftBracket); - if(c == '<') - { - - retval = parseCOSDictionary(); - skipSpaces(); - } - else - { - retval = parseCOSString(); - } - break; - } + return c == '<' ? parseCOSDictionary() : parseCOSString(); case '[': - { // array - retval = parseCOSArray(); - break; - } + return parseCOSArray(); case '(': - retval = parseCOSString(); - break; + return parseCOSString(); case '/': // name - retval = parseCOSName(); - break; + return parseCOSName(); case 'n': - { // null readExpectedString(NULL); - retval = COSNull.NULL; - break; - } + return COSNull.NULL; case 't': - { String trueString = new String( seqSource.readFully(4), ISO_8859_1 ); if( trueString.equals( TRUE ) ) { - retval = COSBoolean.TRUE; + return COSBoolean.TRUE; } else { throw new IOException( "expected true actual='" + trueString + "' " + seqSource + "' at offset " + seqSource.getPosition()); } - break; - } case 'f': - { String falseString = new String( seqSource.readFully(5), ISO_8859_1 ); if( falseString.equals( FALSE ) ) { - retval = COSBoolean.FALSE; + return COSBoolean.FALSE; } else { throw new IOException( "expected false actual='" + falseString + "' " + seqSource + "' at offset " + seqSource.getPosition()); } - break; - } case 'R': seqSource.read(); - retval = new COSObject(null); - break; + return new COSObject(null); case (char)-1: return null; default: - { if( Character.isDigit(c) || c == '-' || c == '+' || c == '.') { - StringBuilder buf = new StringBuilder(); - int ic = seqSource.read(); - c = (char)ic; - while( Character.isDigit( c )|| - c == '-' || - c == '+' || - c == '.' || - c == 'E' || - c == 'e' ) - { - buf.append( c ); - ic = seqSource.read(); - c = (char)ic; - } - if( ic != -1 ) - { - seqSource.unread(ic); - } - retval = COSNumber.get( buf.toString() ); + return parseCOSNumber(); } - else + // This is not suppose to happen, but we will allow for it + // so we are more compatible with POS writers that don't + // follow the spec + long startOffset = seqSource.getPosition(); + String badString = readString(); + if (badString.isEmpty()) { - //This is not suppose to happen, but we will allow for it - //so we are more compatible with POS writers that don't - //follow the spec - String badString = readString(); - if( badString == null || badString.length() == 0 ) - { - int peek = seqSource.peek(); - // we can end up in an infinite loop otherwise - throw new IOException( "Unknown dir object c='" + c + - "' cInt=" + (int)c + " peek='" + (char)peek - + "' peekInt=" + peek + " at offset " + seqSource.getPosition() ); - } + int peek = seqSource.peek(); + // we can end up in an infinite loop otherwise + throw new IOException( + "Unknown dir object c='" + c + "' cInt=" + (int) c + " peek='" + (char) peek + + "' peekInt=" + peek + " at offset " + seqSource.getPosition() + + " (start offset: " + startOffset + ")"); + } - // if it's an endstream/endobj, we want to put it back so the caller will see it - if(ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badString)) - { - seqSource.unread(badString.getBytes(ISO_8859_1)); - } + // if it's an endstream/endobj, we want to put it back so the caller will see it + if (ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badString)) + { + seqSource.unread(badString.getBytes(ISO_8859_1)); } + else + { + LOG.warn("Skipped unexpected dir object = '" + badString + "' at offset " + + seqSource.getPosition() + " (start offset: " + startOffset + ")"); + } + } + return null; + } + + private COSNumber parseCOSNumber() throws IOException + { + StringBuilder buf = new StringBuilder(); + int ic = seqSource.read(); + char c = (char) ic; + while (Character.isDigit(c) || c == '-' || c == '+' || c == '.' || c == 'E' || c == 'e') + { + buf.append(c); + ic = seqSource.read(); + c = (char) ic; } + if (ic != -1) + { + seqSource.unread(ic); } - return retval; + return COSNumber.get(buf.toString()); } /** * This will read the next string from the stream. * - * @return The string that was read from the stream. + * @return The string that was read from the stream, never null. * * @throws IOException If there is an error reading from the stream. */ @@ -1319,7 +1302,9 @@ protected int readInt() throws IOException catch( NumberFormatException e ) { seqSource.unread(intBuffer.toString().getBytes(ISO_8859_1)); - throw new IOException( "Error: Expected an integer type at offset "+ seqSource.getPosition(), e); + throw new IOException("Error: Expected an integer type at offset " + + seqSource.getPosition() + + ", instead got '" + intBuffer + "'", e); } return retval; } @@ -1353,24 +1338,17 @@ protected long readLong() throws IOException } /** - * This method is used to read a token by the {@linkplain #readInt()} method - * and the {@linkplain #readLong()} method. + * This method is used to read a token by the {@linkplain #readInt()} and the {@linkplain #readLong()} method. Valid + * delimiters are any non digit values. * * @return the token to parse as integer or long by the calling method. * @throws IOException throws by the {@link #seqSource} methods. */ protected final StringBuilder readStringNumber() throws IOException { - int lastByte = 0; + int lastByte; StringBuilder buffer = new StringBuilder(); - while( (lastByte = seqSource.read() ) != ASCII_SPACE && - lastByte != ASCII_LF && - lastByte != ASCII_CR && - lastByte != 60 && //see sourceforge bug 1714707 - lastByte != '[' && // PDFBOX-1845 - lastByte != '(' && // PDFBOX-2579 - lastByte != 0 && //See sourceforge bug 853328 - lastByte != -1 ) + while ((lastByte = seqSource.read()) >= '0' && lastByte <= '9') { buffer.append( (char)lastByte ); if (buffer.length() > MAX_LENGTH_LONG) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java index aa1882a14cd..8ed185e712e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java @@ -17,14 +17,15 @@ package org.apache.pdfbox.pdfparser; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; +import java.security.KeyStore; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -32,7 +33,6 @@ import java.util.Queue; import java.util.Set; import java.util.TreeMap; -import java.util.Vector; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; @@ -45,10 +45,16 @@ import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; +import org.apache.pdfbox.pdmodel.encryption.AccessPermission; +import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; +import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; +import org.apache.pdfbox.pdmodel.encryption.PDEncryption; +import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial; import org.apache.pdfbox.pdmodel.encryption.SecurityHandler; - +import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; import static org.apache.pdfbox.util.Charsets.ISO_8859_1; @@ -85,7 +91,13 @@ public class COSParser extends BaseParser private final byte[] strmBuf = new byte[ STRMBUFLEN ]; protected final RandomAccessRead source; - + + private AccessPermission accessPermission; + private InputStream keyStoreInputStream = null; + @SuppressWarnings({"squid:S2068"}) + private String password = ""; + private String keyAlias = null; + /** * Only parse the PDF file minimally allowing access to basic information. */ @@ -112,6 +124,16 @@ public class COSParser extends BaseParser */ protected static final char[] OBJ_MARKER = new char[] { 'o', 'b', 'j' }; + /** + * trailer-marker. + */ + private static final char[] TRAILER_MARKER = new char[] { 't', 'r', 'a', 'i', 'l', 'e', 'r' }; + + /** + * ObjStream-marker. + */ + private static final char[] OBJ_STREAM = new char[] { '/', 'O', 'b', 'j', 'S', 't', 'm' }; + private long trailerOffset; /** @@ -125,12 +147,16 @@ public class COSParser extends BaseParser private boolean isLenient = true; protected boolean initialParseDone = false; + + private boolean trailerWasRebuild = false; /** * Contains all found objects of a brute force search. */ private Map bfSearchCOSObjectKeyOffsets = null; + private Long lastEOFMarker = null; private List bfSearchXRefTablesOffsets = null; private List bfSearchXRefStreamsOffsets = null; + private PDEncryption encryption = null; /** * The security handler. @@ -158,6 +184,8 @@ public class COSParser extends BaseParser /** * Default constructor. + * + * @param source input representing the pdf. */ public COSParser(RandomAccessRead source) { @@ -165,6 +193,25 @@ public COSParser(RandomAccessRead source) this.source = source; } + /** + * Constructor for encrypted pdfs. + * + * @param source input representing the pdf. + * @param password password to be used for decryption. + * @param keyStore key store to be used for decryption when using public key security + * @param keyAlias alias to be used for decryption when using public key security + * + */ + public COSParser(RandomAccessRead source, String password, InputStream keyStore, + String keyAlias) + { + super(new RandomAccessSource(source)); + this.source = source; + this.password = password; + this.keyAlias = keyAlias; + keyStoreInputStream = keyStore; + } + /** * Sets how many trailing bytes of PDF file are searched for EOF marker and 'startxref' marker. If not set we use * default value {@link #DEFAULT_TRAIL_BYTECOUNT}. @@ -188,6 +235,62 @@ public void setEOFLookupRange(int byteCount) } } + /** + * Read the trailer information and provide a COSDictionary containing the trailer information. + * + * @return a COSDictionary containing the trailer information + * @throws IOException if something went wrong + */ + protected COSDictionary retrieveTrailer() throws IOException + { + COSDictionary trailer = null; + boolean rebuildTrailer = false; + try + { + // parse startxref + // TODO FDF files don't have a startxref value, so that rebuildTrailer is triggered + long startXRefOffset = getStartxrefOffset(); + if (startXRefOffset > -1) + { + trailer = parseXref(startXRefOffset); + } + else + { + rebuildTrailer = isLenient(); + } + } + catch (IOException exception) + { + if (isLenient()) + { + rebuildTrailer = true; + } + else + { + throw exception; + } + } + // check if the trailer contains a Root object + if (trailer != null && trailer.getItem(COSName.ROOT) == null) + { + rebuildTrailer = isLenient(); + } + if (rebuildTrailer) + { + trailer = rebuildTrailer(); + } + else + { + // prepare decryption if necessary + prepareDecryption(); + if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) + { + bfSearchForObjStreams(); + } + } + return trailer; + } + /** * Parses cross reference tables. * @@ -208,46 +311,36 @@ protected COSDictionary parseXref(long startXRefOffset) throws IOException document.setStartXref(startXrefOffset); long prev = startXrefOffset; // ---- parse whole chain of xref tables/object streams using PREV reference + Set prevSet = new HashSet(); + COSDictionary trailer = null; while (prev > 0) { // seek to xref table source.seek(prev); - // skip white spaces skipSpaces(); + // save current position instead of prev due to skipped spaces + prevSet.add(source.getPosition()); // -- parse xref if (source.peek() == X) { // xref table and trailer // use existing parser to parse xref table - parseXrefTable(prev); - // parse the last trailer. - trailerOffset = source.getPosition(); - // PDFBOX-1739 skip extra xref entries in RegisSTAR documents - while (isLenient && source.peek() != 't') - { - if (source.getPosition() == trailerOffset) - { - // warn only the first time - LOG.warn("Expected trailer object at position " + trailerOffset - + ", keep trying"); - } - readLine(); - } - if (!parseTrailer()) + if (!parseXrefTable(prev) || !parseTrailer()) { - throw new IOException("Expected trailer object at position: " + throw new IOException("Expected trailer object at offset " + source.getPosition()); } - COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer(); + trailer = xrefTrailerResolver.getCurrentTrailer(); // check for a XRef stream, it may contain some object ids of compressed objects if(trailer.containsKey(COSName.XREF_STM)) { int streamOffset = trailer.getInt(COSName.XREF_STM); // check the xref stream reference - fixedOffset = checkXRefStreamOffset(streamOffset, false); + fixedOffset = checkXRefOffset(streamOffset); if (fixedOffset > -1 && fixedOffset != streamOffset) { + LOG.warn("/XRefStm offset " + streamOffset + " is incorrect, corrected to " + fixedOffset); streamOffset = (int)fixedOffset; trailer.setInt(COSName.XREF_STM, streamOffset); } @@ -255,7 +348,21 @@ protected COSDictionary parseXref(long startXRefOffset) throws IOException { source.seek(streamOffset); skipSpaces(); - parseXrefObjStream(prev, false); + try + { + parseXrefObjStream(prev, false); + } + catch (IOException ex) + { + if (isLenient) + { + LOG.error("Failed to parse /XRefStm at offset " + streamOffset, ex); + } + else + { + throw ex; + } + } } else { @@ -269,38 +376,32 @@ protected COSDictionary parseXref(long startXRefOffset) throws IOException } } } - prev = trailer.getInt(COSName.PREV); - if (prev > 0) - { - // check the xref table reference - fixedOffset = checkXRefOffset(prev); - if (fixedOffset > -1 && fixedOffset != prev) - { - prev = fixedOffset; - trailer.setLong(COSName.PREV, prev); - } - } + prev = trailer.getLong(COSName.PREV); } else { // parse xref stream prev = parseXrefObjStream(prev, true); - if (prev > 0) + trailer = xrefTrailerResolver.getCurrentTrailer(); + } + if (prev > 0) + { + // check the xref table reference + fixedOffset = checkXRefOffset(prev); + if (fixedOffset > -1 && fixedOffset != prev) { - // check the xref table reference - fixedOffset = checkXRefOffset(prev); - if (fixedOffset > -1 && fixedOffset != prev) - { - prev = fixedOffset; - COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer(); - trailer.setLong(COSName.PREV, prev); - } + prev = fixedOffset; + trailer.setLong(COSName.PREV, prev); } } + if (prevSet.contains(prev)) + { + throw new IOException("/Prev loop at offset " + prev); + } } // ---- build valid xrefs out of the xref chain xrefTrailerResolver.setStartxref(startXrefOffset); - COSDictionary trailer = xrefTrailerResolver.getTrailer(); + trailer = xrefTrailerResolver.getTrailer(); document.setTrailer(trailer); document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType()); // check the offsets of all referenced objects @@ -318,13 +419,18 @@ protected COSDictionary parseXref(long startXRefOffset) throws IOException private long parseXrefObjStream(long objByteOffset, boolean isStandalone) throws IOException { // ---- parse indirect object head - readObjectNumber(); + long objectNumber = readObjectNumber(); + + // remember the highest XRef object number to avoid it being reused in incremental saving + long currentHighestXRefObjectNumber = document.getHighestXRefObjectNumber(); + document.setHighestXRefObjectNumber(Math.max(currentHighestXRefObjectNumber, objectNumber)); + readGenerationNumber(); readExpectedString(OBJ_MARKER, true); COSDictionary dict = parseCOSDictionary(); COSStream xrefStream = parseCOSStream(dict); - parseXrefStream(xrefStream, (int) objByteOffset, isStandalone); + parseXrefStream(xrefStream, objByteOffset, isStandalone); xrefStream.close(); return dict.getLong(COSName.PREV); @@ -385,21 +491,14 @@ protected final long getStartxrefOffset() throws IOException } // find last startxref preceding EOF marker bufOff = lastIndexOf(STARTXREF, buf, bufOff); - long startXRefOffset = skipBytes + bufOff; - if (bufOff < 0) { - if (isLenient) - { - LOG.debug("Can't find offset for startxref"); - return -1; - } - else - { - throw new IOException("Missing 'startxref' marker."); - } + throw new IOException("Missing 'startxref' marker."); + } + else + { + return skipBytes + bufOff; } - return startXRefOffset; } /** @@ -492,7 +591,9 @@ private void addNewToList(final Queue toBeParsedList, /** * Adds newObject to toBeParsedList if it is not an COSObject or we didn't - * add this COSObject already (checked via addedObjects). + * add this COSObject already (checked via addedObjects). Simple objects are + * not added because nothing is done with them when toBeParsedList is + * processed. */ private void addNewToList(final Queue toBeParsedList, final COSBase newObject, final Set addedObjects) @@ -504,8 +605,12 @@ private void addNewToList(final Queue toBeParsedList, final COSBase new { return; } + toBeParsedList.add(newObject); + } + else if (newObject instanceof COSDictionary || newObject instanceof COSArray) + { + toBeParsedList.add(newObject); } - toBeParsedList.add(newObject); } /** @@ -544,10 +649,9 @@ protected void parseDictObjects(COSDictionary dict, COSName... excludeObjects) t } else if (baseObj instanceof COSArray) { - final Iterator arrIter = ((COSArray) baseObj).iterator(); - while (arrIter.hasNext()) + for (COSBase cosBase : (COSArray) baseObj) { - addNewToList(toBeParsedList, arrIter.next(), addedObjects); + addNewToList(toBeParsedList, cosBase, addedObjects); } } else if (baseObj instanceof COSObject) @@ -558,7 +662,17 @@ else if (baseObj instanceof COSObject) if (!parsedObjects.contains(objId)) { - Long fileOffset = xrefTrailerResolver.getXrefTable().get(objKey); + Long fileOffset = document.getXrefTable().get(objKey); + if (fileOffset == null && isLenient && bfSearchCOSObjectKeyOffsets != null) + { + fileOffset = bfSearchCOSObjectKeyOffsets.get(objKey); + if (fileOffset != null) + { + LOG.debug("Set missing " + fileOffset + " for object " + objKey); + document.getXrefTable().put(objKey, fileOffset); + } + } + // it is allowed that object references point to null, // thus we have to test if (fileOffset != null && fileOffset != 0) @@ -570,15 +684,39 @@ else if (baseObj instanceof COSObject) else { // negative offset means we have a compressed - // object within object stream; - // get offset of object stream - fileOffset = xrefTrailerResolver.getXrefTable().get( - new COSObjectKey((int)-fileOffset, 0)); - if ((fileOffset == null) || (fileOffset <= 0)) + // object within object stream => get offset of object stream + COSObjectKey key = new COSObjectKey((int) -fileOffset, 0); + fileOffset = document.getXrefTable().get(key); + if (fileOffset == null || fileOffset <= 0) { - throw new IOException( - "Invalid object stream xref object reference for key '" + objKey + "': " - + fileOffset); + if (isLenient && bfSearchCOSObjectKeyOffsets != null) + { + fileOffset = bfSearchCOSObjectKeyOffsets.get(key); + if (fileOffset != null) + { + LOG.debug("Set missing " + fileOffset + " for object " + + key); + document.getXrefTable().put(key, fileOffset); + } + else + { + LOG.warn("Invalid object stream xref object reference for key '" + + objKey + "': " + fileOffset); + continue; + } + } + else + { + String msg = + "Invalid object stream xref object reference for key '" + + objKey + "': " + fileOffset; + if (isLenient && fileOffset == null) + { + LOG.warn(msg); + continue; + } + throw new IOException(msg); + } } List stmObjects = objToBeParsed.get(fileOffset); @@ -587,6 +725,12 @@ else if (baseObj instanceof COSObject) stmObjects = new ArrayList(); objToBeParsed.put(fileOffset, stmObjects); } + // java does not have a test for immutable + else if (!(stmObjects instanceof ArrayList)) + { + throw new IOException(obj + " cannot be assigned to offset " + + fileOffset + ", this belongs to " + stmObjects.get(0)); + } stmObjects.add(obj); } } @@ -678,7 +822,18 @@ protected COSBase parseObjectDynamically(long objNr, int objGenNr, { // not previously parsed // ---- read offset or object stream object number from xref table - Long offsetOrObjstmObNr = xrefTrailerResolver.getXrefTable().get(objKey); + Long offsetOrObjstmObNr = document.getXrefTable().get(objKey); + + // maybe something is wrong with the xref table -> perform brute force search for all objects + if (offsetOrObjstmObNr == null && isLenient && bfSearchCOSObjectKeyOffsets != null) + { + offsetOrObjstmObNr = bfSearchCOSObjectKeyOffsets.get(objKey); + if (offsetOrObjstmObNr != null) + { + LOG.debug("Set missing offset " + offsetOrObjstmObNr + " for object " + objKey); + document.getXrefTable().put(objKey, offsetOrObjstmObNr); + } + } // sanity test to circumvent loops with broken documents if (requireExistingNotCompressedObj @@ -695,7 +850,7 @@ protected COSBase parseObjectDynamically(long objNr, int objGenNr, if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) { LOG.debug("Add all new read objects from brute force search to the xref table"); - Map xrefOffset = xrefTrailerResolver.getXrefTable(); + Map xrefOffset = document.getXrefTable(); final Set> entries = bfSearchCOSObjectKeyOffsets.entrySet(); for (Entry entry : entries) { @@ -745,7 +900,7 @@ private void parseFileObject(Long offsetOrObjstmObNr, final COSObjectKey objKey, { throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() + " points to wrong object: " + readObjNr - + ":" + readObjGen); + + ":" + readObjGen + " at offset " + offsetOrObjstmObNr); } skipSpaces(); @@ -818,7 +973,24 @@ private void parseObjectStream(int objstmObjNr) throws IOException if (objstmBaseObj instanceof COSStream) { // parse object stream - PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document); + PDFObjectStreamParser parser; + try + { + parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document); + } + catch (IOException ex) + { + if (isLenient) + { + LOG.error("object stream " + objstmObjNr + " could not be parsed due to an exception", ex); + return; + } + else + { + throw ex; + } + } + try { parser.parse(); @@ -836,15 +1008,12 @@ private void parseObjectStream(int objstmObjNr) throws IOException throw exception; } } - - // get set of object numbers referenced for this object stream - final Set refObjNrs = xrefTrailerResolver.getContainedObjectNumbers(objstmObjNr); - // register all objects which are referenced to be contained in object stream for (COSObject next : parser.getObjects()) { COSObjectKey stmObjKey = new COSObjectKey(next); - if (refObjNrs.contains(stmObjKey.getNumber())) + Long offset = xrefTrailerResolver.getXrefTable().get(stmObjKey); + if (offset != null && offset == -objstmObjNr) { COSObject stmObj = document.getObjectFromPool(stmObjKey); stmObj.setObject(next.getObject()); @@ -862,7 +1031,7 @@ private COSNumber getLength(final COSBase lengthBaseObj, final COSName streamTyp { return null; } - COSNumber retVal = null; + COSNumber retVal; // maybe length was given directly if (lengthBaseObj instanceof COSNumber) { @@ -872,7 +1041,8 @@ private COSNumber getLength(final COSBase lengthBaseObj, final COSName streamTyp else if (lengthBaseObj instanceof COSObject) { COSObject lengthObj = (COSObject) lengthBaseObj; - if (lengthObj.getObject() == null) + COSBase length = lengthObj.getObject(); + if (length == null) { // not read so far, keep current stream position final long curFileOffset = source.getPosition(); @@ -880,17 +1050,24 @@ else if (lengthBaseObj instanceof COSObject) parseObjectDynamically(lengthObj, isObjectStream); // reset current stream position source.seek(curFileOffset); - if (lengthObj.getObject() == null) - { - throw new IOException("Length object content was not read."); - } + length = lengthObj.getObject(); + } + if (length == null) + { + throw new IOException("Length object content was not read."); } - if (!(lengthObj.getObject() instanceof COSNumber)) + if (COSNull.NULL == length) + { + LOG.warn("Length object (" + lengthObj.getObjectNumber() + " " + + lengthObj.getGenerationNumber() + ") not found"); + return null; + } + if (!(length instanceof COSNumber)) { throw new IOException("Wrong type of referenced length object " + lengthObj - + ": " + lengthObj.getObject().getClass().getSimpleName()); + + ": " + length.getClass().getSimpleName()); } - retVal = (COSNumber) lengthObj.getObject(); + retVal = (COSNumber) length; } else { @@ -973,10 +1150,6 @@ protected COSStream parseCOSStream(COSDictionary dic) throws IOException { stream.setItem(COSName.LENGTH, streamLengthObj); } - else - { - stream.removeItem(COSName.LENGTH); - } } } String endStream = readString(); @@ -987,7 +1160,7 @@ protected COSStream parseCOSStream(COSDictionary dic) throws IOException // avoid follow-up warning about missing endobj source.rewind(ENDOBJ.length); } - else if (endStream.length() > 9 && isLenient && endStream.substring(0,9).equals(ENDSTREAM_STRING)) + else if (endStream.length() > 9 && isLenient && endStream.startsWith(ENDSTREAM_STRING)) { LOG.warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " + source.getPosition()); @@ -1173,41 +1346,44 @@ private long checkXRefOffset(long startXRefOffset) throws IOException return startXRefOffset; } source.seek(startXRefOffset); + skipSpaces(); if (source.peek() == X && isString(XREF_TABLE)) { return startXRefOffset; } if (startXRefOffset > 0) { - long fixedOffset = checkXRefStreamOffset(startXRefOffset, true); - if (fixedOffset > -1) + if (checkXRefStreamOffset(startXRefOffset)) + { + return startXRefOffset; + } + else { - return fixedOffset; + return calculateXRefFixedOffset(startXRefOffset, false); } } - // try to find a fixed offset - return calculateXRefFixedOffset(startXRefOffset, false); + // can't find a valid offset + return -1; } /** * Check if the cross reference stream can be found at the current offset. * * @param startXRefOffset the expected start offset of the XRef stream - * @param checkOnly check only but don't repair the offset if set to true * @return the revised offset * @throws IOException if something went wrong */ - private long checkXRefStreamOffset(long startXRefOffset, boolean checkOnly) throws IOException + private boolean checkXRefStreamOffset(long startXRefOffset) throws IOException { // repair mode isn't available in non-lenient mode if (!isLenient || startXRefOffset == 0) { - return startXRefOffset; + return true; } // seek to offset-1 source.seek(startXRefOffset-1); int nextValue = source.read(); - // the first character has to be whitespace(s), and then a digit + // the first character has to be a whitespace, and then a digit if (isWhitespace(nextValue)) { skipSpaces(); @@ -1219,19 +1395,22 @@ private long checkXRefStreamOffset(long startXRefOffset, boolean checkOnly) thro readObjectNumber(); readGenerationNumber(); readExpectedString(OBJ_MARKER, true); + // check the dictionary to avoid false positives + COSDictionary dict = parseCOSDictionary(); source.seek(startXRefOffset); - return startXRefOffset; + if ("XRef".equals(dict.getNameAsString(COSName.TYPE))) + { + return true; + } } catch (IOException exception) { - // there wasn't an object of a xref stream - // try to repair the offset + // there wasn't an object of a xref stream source.seek(startXRefOffset); } } } - // try to find a fixed offset - return checkOnly ? -1 : calculateXRefFixedOffset(startXRefOffset, true); + return false; } /** @@ -1261,6 +1440,43 @@ private long calculateXRefFixedOffset(long objectOffset, boolean streamsOnly) th return 0; } + private boolean validateXrefOffsets(Map xrefOffset) throws IOException + { + if (xrefOffset == null) + { + return true; + } + Map correctedKeys = new HashMap(); + for (Entry objectEntry : xrefOffset.entrySet()) + { + COSObjectKey objectKey = objectEntry.getKey(); + Long objectOffset = objectEntry.getValue(); + // a negative offset number represents an object number itself + // see type 2 entry in xref stream + if (objectOffset != null && objectOffset >= 0) + { + COSObjectKey foundObjectKey = findObjectKey(objectKey, objectOffset); + if (foundObjectKey == null) + { + LOG.debug("Stop checking xref offsets as at least one (" + objectKey + + ") couldn't be dereferenced"); + return false; + } + else if (foundObjectKey != objectKey) + { + // Generation was fixed - need to update map later, after iteration + correctedKeys.put(objectKey, foundObjectKey); + } + } + } + for (Entry correctedKeyEntry : correctedKeys.entrySet()) + { + xrefOffset.put(correctedKeyEntry.getValue(), + xrefOffset.remove(correctedKeyEntry.getKey())); + } + return true; + } + /** * Check the XRef table by dereferencing all objects and fixing the offset if necessary. * @@ -1274,113 +1490,60 @@ private void checkXrefOffsets() throws IOException return; } Map xrefOffset = xrefTrailerResolver.getXrefTable(); - if (xrefOffset != null) + if (!validateXrefOffsets(xrefOffset)) { - boolean bruteForceSearch = false; - for (Entry objectEntry : xrefOffset.entrySet()) - { - COSObjectKey objectKey = objectEntry.getKey(); - Long objectOffset = objectEntry.getValue(); - // a negative offset number represents a object number itself - // see type 2 entry in xref stream - if (objectOffset != null && objectOffset >= 0 - && !checkObjectKeys(objectKey, objectOffset)) - { - LOG.debug("Stop checking xref offsets as at least one couldn't be dereferenced"); - bruteForceSearch = true; - break; - } - } - if (bruteForceSearch) + bfSearchForObjects(); + if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) { - bfSearchForObjects(); - if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) - { - List objStreams = new ArrayList(); - // find all object streams - for (COSObjectKey key : xrefOffset.keySet()) - { - Long offset = xrefOffset.get(key); - if (offset != null && offset < 0 ) - { - COSObjectKey objStream = new COSObjectKey(-offset, 0); - if (!objStreams.contains(objStream)) - { - objStreams.add(new COSObjectKey(-offset, 0)); - } - } - } - // remove all found object streams - for (COSObjectKey key : bfSearchCOSObjectKeyOffsets.keySet()) - { - objStreams.remove(key); - } - // remove all objects which are part of an object stream which wasn't found - for (COSObjectKey key : objStreams) - { - Set objects = xrefTrailerResolver.getContainedObjectNumbers((int)(key.getNumber())); - for (Long objNr :objects) - { - xrefOffset.remove(new COSObjectKey(objNr, 0)); - } - } - LOG.debug("Replaced read xref table with the results of a brute force search"); - xrefOffset.putAll(bfSearchCOSObjectKeyOffsets); - } + LOG.debug("Replaced read xref table with the results of a brute force search"); + xrefOffset.clear(); + xrefOffset.putAll(bfSearchCOSObjectKeyOffsets); } } } /** - * Check if the given object can be found at the given offset. + * Check if the given object can be found at the given offset. Returns the provided object key if everything is ok. + * If the generation number differs it will be fixed and a new object key is returned. * - * @param objectKey the object we are looking for + * @param objectKey the key of object we are looking for * @param offset the offset where to look - * @return returns true if the given object can be dereferenced at the given offset + * @return returns the found/fixed object key + * * @throws IOException if something went wrong */ - private boolean checkObjectKeys(COSObjectKey objectKey, long offset) throws IOException + private COSObjectKey findObjectKey(COSObjectKey objectKey, long offset) throws IOException { // there can't be any object at the very beginning of a pdf if (offset < MINIMUM_SEARCH_OFFSET) { - return false; + return null; } - long objectNr = objectKey.getNumber(); - int objectGen = objectKey.getGeneration(); - long originOffset = source.getPosition(); - source.seek(offset); - String objectString = createObjectString(objectNr, objectGen); try { - if (isString(objectString.getBytes(ISO_8859_1))) + source.seek(offset); + // try to read the given object/generation number + if (objectKey.getNumber() == readObjectNumber()) { - // everything is ok, return origin object key - source.seek(originOffset); - return true; + int genNumber = readGenerationNumber(); + // finally try to read the object marker + readExpectedString(OBJ_MARKER, true); + if (genNumber == objectKey.getGeneration()) + { + return objectKey; + } + else if (isLenient && genNumber > objectKey.getGeneration()) + { + return new COSObjectKey(objectKey.getNumber(), genNumber); + } } } catch (IOException exception) { // Swallow the exception, obviously there isn't any valid object number + LOG.debug("No valid object at given location " + offset + " - ignoring", exception); } - finally - { - source.seek(originOffset); - } - // no valid object number found - return false; - } - /** - * Create a string for the given object id. - * - * @param objectID the object id - * @param genID the generation id - * @return the generated string - */ - private String createObjectString(long objectID, int genID) - { - return Long.toString(objectID) + " " + Integer.toString(genID) + " obj"; + return null; } /** @@ -1392,17 +1555,24 @@ private void bfSearchForObjects() throws IOException { if (bfSearchCOSObjectKeyOffsets == null) { + bfSearchForLastEOFMarker(); bfSearchCOSObjectKeyOffsets = new HashMap(); long originOffset = source.getPosition(); long currentOffset = MINIMUM_SEARCH_OFFSET; - String objString = " obj"; - char[] string = objString.toCharArray(); + long lastObjectId = Long.MIN_VALUE; + int lastGenID = Integer.MIN_VALUE; + long lastObjOffset = Long.MIN_VALUE; + char[] endobjString = "ndo".toCharArray(); + char[] endobjRemainingString = "bj".toCharArray(); + boolean endOfObjFound = false; do { source.seek(currentOffset); - if (isString(string)) + int nextChar = source.read(); + currentOffset++; + if (isWhitespace(nextChar) && isString(OBJ_MARKER)) { - long tempOffset = currentOffset - 1; + long tempOffset = currentOffset - 2; source.seek(tempOffset); int genID = source.peek(); // is the next char a digit? @@ -1411,44 +1581,67 @@ private void bfSearchForObjects() throws IOException genID -= 48; tempOffset--; source.seek(tempOffset); - if (isSpace()) + if (isWhitespace()) { - while (tempOffset > MINIMUM_SEARCH_OFFSET && isSpace()) + while (tempOffset > MINIMUM_SEARCH_OFFSET && isWhitespace()) { source.seek(--tempOffset); } - int length = 0; + boolean objectIDFound = false; while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) { source.seek(--tempOffset); - length++; + objectIDFound = true; } - if (length > 0) + if (objectIDFound) { source.read(); - byte[] objIDBytes = source.readFully(length); - String objIdString = new String(objIDBytes, 0, - objIDBytes.length, ISO_8859_1); - Long objectID; - try + long objectId = readObjectNumber(); + if (lastObjOffset > 0) { - objectID = Long.valueOf(objIdString); - } - catch (NumberFormatException exception) - { - objectID = null; - } - if (objectID != null) - { - bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(objectID, genID), tempOffset+1); + // add the former object ID only if there was a subsequent object ID + bfSearchCOSObjectKeyOffsets + .put(new COSObjectKey(lastObjectId, lastGenID), + lastObjOffset); } + lastObjectId = objectId; + lastGenID = genID; + lastObjOffset = tempOffset + 1; + currentOffset += OBJ_MARKER.length - 1; + endOfObjFound = false; } } } } - currentOffset++; + // check for "endo" as abbreviation for "endobj", as the pdf may be cut off + // in the middle of the keyword, see PDFBOX-3936. + // We could possibly implement a more intelligent algorithm if necessary + else if (nextChar == 'e' && isString(endobjString)) + { + currentOffset += endobjString.length; + source.seek(currentOffset); + if (source.isEOF()) + { + endOfObjFound = true; + continue; + } + if (isString(endobjRemainingString)) + { + currentOffset += endobjRemainingString.length; + endOfObjFound = true; + continue; + } + } + } + while (currentOffset < lastEOFMarker && !source.isEOF()); + if ((lastEOFMarker < Long.MAX_VALUE || endOfObjFound) && lastObjOffset > 0) + { + // if the pdf wasn't cut off in the middle or if the last object ends with a "endobj" marker + // the last object id has to be added here so that it can't get lost as there isn't any subsequent + // object id + bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(lastObjectId, lastGenID), + lastObjOffset); } - while (!source.isEOF()); // reestablish origin position source.seek(originOffset); } @@ -1513,7 +1706,7 @@ else if (newOffsetStream > -1) private long searchNearestValue(List values, long offset) { long newValue = -1; - long currentDifference = -1; + Long currentDifference = null; int currentOffsetIndex = -1; int numberOfOffsets = values.size(); // find the nearest value @@ -1521,7 +1714,7 @@ private long searchNearestValue(List values, long offset) { long newDifference = offset - values.get(i); // find the nearest offset - if (currentDifference == -1 + if (currentDifference == null || (Math.abs(currentDifference) > Math.abs(newDifference))) { currentDifference = newDifference; @@ -1534,50 +1727,340 @@ private long searchNearestValue(List values, long offset) } return newValue; } + /** - * Brute force search for all xref entries (tables). + * Brute force search for all trailer marker. * * @throws IOException if something went wrong */ - private void bfSearchForXRefTables() throws IOException + private boolean bfSearchForTrailer(COSDictionary trailer) throws IOException { - if (bfSearchXRefTablesOffsets == null) + long originOffset = source.getPosition(); + source.seek(MINIMUM_SEARCH_OFFSET); + while (!source.isEOF()) { - // a pdf may contain more than one xref entry - bfSearchXRefTablesOffsets = new Vector(); - long originOffset = source.getPosition(); - source.seek(MINIMUM_SEARCH_OFFSET); - // search for xref tables - while (!source.isEOF()) + // search for trailer marker + if (isString(TRAILER_MARKER)) { - if (isString(XREF_TABLE)) + source.seek(source.getPosition() + TRAILER_MARKER.length); + try { - long newOffset = source.getPosition(); - source.seek(newOffset - 1); - // ensure that we don't read "startxref" instead of "xref" - if (isWhitespace()) + boolean rootFound = false; + boolean infoFound = false; + skipSpaces(); + COSDictionary trailerDict = parseCOSDictionary(); + COSObject rootObj = trailerDict.getCOSObject(COSName.ROOT); + if (rootObj != null) { - bfSearchXRefTablesOffsets.add(newOffset); + // check if the dictionary can be dereferenced and is the one we are looking for + COSDictionary rootDict = retrieveCOSDictionary(rootObj); + if (rootDict != null && isCatalog(rootDict)) + { + rootFound = true; + } } - source.seek(newOffset + 4); - } - source.read(); - } - source.seek(originOffset); - } - } - - /** - * Brute force search for all /XRef entries (streams). - * - * @throws IOException if something went wrong - */ - private void bfSearchForXRefStreams() throws IOException - { + COSObject infoObj = trailerDict.getCOSObject(COSName.INFO); + if (infoObj != null) + { + // check if the dictionary can be dereferenced and is the one we are looking for + COSDictionary infoDict = retrieveCOSDictionary(infoObj); + if (infoDict != null && isInfo(infoDict)) + { + infoFound = true; + } + } + if (rootFound && infoFound) + { + trailer.setItem(COSName.ROOT, rootObj); + trailer.setItem(COSName.INFO, infoObj); + if (trailerDict.containsKey(COSName.ENCRYPT)) + { + COSObject encObj = trailerDict.getCOSObject(COSName.ENCRYPT); + if (encObj != null) + { + // check if the dictionary can be dereferenced + // TODO check if the dictionary is an encryption dictionary? + COSDictionary encDict = retrieveCOSDictionary(encObj); + if (encDict != null) + { + trailer.setItem(COSName.ENCRYPT, encObj); + } + } + } + if (trailerDict.containsKey(COSName.ID)) + { + COSBase idObj = trailerDict.getItem(COSName.ID); + if (idObj instanceof COSArray) + { + trailer.setItem(COSName.ID, idObj); + } + } + return true; + } + } + catch (IOException exception) + { + continue; + } + } + source.read(); + } + source.seek(originOffset); + return false; + } + + /** + * Brute force search for the last EOF marker. + * + * @throws IOException if something went wrong + */ + private void bfSearchForLastEOFMarker() throws IOException + { + if (lastEOFMarker == null) + { + long originOffset = source.getPosition(); + source.seek(MINIMUM_SEARCH_OFFSET); + while (!source.isEOF()) + { + // search for EOF marker + if (isString(EOF_MARKER)) + { + long tempMarker = source.getPosition(); + source.seek(tempMarker + 5); + try + { + // check if the following data is some valid pdf content + // which most likely indicates that the pdf is linearized, + // updated or just cut off somewhere in the middle + skipSpaces(); + if (!isString(XREF_TABLE)) + { + readObjectNumber(); + readGenerationNumber(); + } + } + catch (IOException exception) + { + // save the EOF marker as the following data is most likely some garbage + lastEOFMarker = tempMarker; + } + } + source.read(); + } + source.seek(originOffset); + // no EOF marker found + if (lastEOFMarker == null) + { + lastEOFMarker = Long.MAX_VALUE; + } + } + } + + /** + * Brute force search for all object streams. + * + * @throws IOException if something went wrong + */ + private void bfSearchForObjStreams() throws IOException + { + HashMap bfSearchObjStreamsOffsets = new HashMap(); + long originOffset = source.getPosition(); + source.seek(MINIMUM_SEARCH_OFFSET); + char[] string = " obj".toCharArray(); + while (!source.isEOF()) + { + // search for EOF marker + if (isString(OBJ_STREAM)) + { + long currentPosition = source.getPosition(); + // search backwards for the beginning of the object + long newOffset = -1; + boolean objFound = false; + for (int i = 1; i < 40 && !objFound; i++) + { + long currentOffset = currentPosition - (i * 10); + if (currentOffset > 0) + { + source.seek(currentOffset); + for (int j = 0; j < 10; j++) + { + if (isString(string)) + { + long tempOffset = currentOffset - 1; + source.seek(tempOffset); + int genID = source.peek(); + // is the next char a digit? + if (isDigit(genID)) + { + tempOffset--; + source.seek(tempOffset); + if (isSpace()) + { + int length = 0; + source.seek(--tempOffset); + while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit()) + { + source.seek(--tempOffset); + length++; + } + if (length > 0) + { + source.read(); + newOffset = source.getPosition(); + long objNumber = readObjectNumber(); + int genNumber = readGenerationNumber(); + COSObjectKey streamObjectKey = new COSObjectKey(objNumber, + genNumber); + bfSearchObjStreamsOffsets.put(newOffset, + streamObjectKey); + } + } + } + LOG.debug("Dictionary start for object stream -> " + newOffset); + objFound = true; + break; + } + else + { + currentOffset++; + source.read(); + } + } + } + } + source.seek(currentPosition + OBJ_STREAM.length); + } + source.read(); + } + // add all found compressed objects to the brute force search result + for (Long offset : bfSearchObjStreamsOffsets.keySet()) + { + Long bfOffset = bfSearchCOSObjectKeyOffsets.get(bfSearchObjStreamsOffsets.get(offset)); + // incomplete object stream found? + if (bfOffset == null) + { + LOG.warn("Skipped incomplete object stream:" + bfSearchObjStreamsOffsets.get(offset) + + " at " + offset); + continue; + } + // check if the object was overwritten + if (offset.equals(bfOffset)) + { + source.seek(offset); + long stmObjNumber = readObjectNumber(); + int stmGenNumber = readGenerationNumber(); + readExpectedString(OBJ_MARKER, true); + int nrOfObjects = 0; + COSStream stream = null; + List objectNumbers = null; + try + { + COSDictionary dict = parseCOSDictionary(); + int offsetFirstStream = dict.getInt(COSName.FIRST); + nrOfObjects = dict.getInt(COSName.N); + // skip the stream if required values are missing + if (offsetFirstStream == -1 || nrOfObjects == -1) + { + continue; + } + stream = parseCOSStream(dict); + if (securityHandler != null) + { + securityHandler.decryptStream(stream, stmObjNumber, stmGenNumber); + } + PDFObjectStreamParser strmParser = new PDFObjectStreamParser(stream, document); + objectNumbers = new ArrayList(nrOfObjects); + for (int i = 0; i < nrOfObjects; i++) + { + objectNumbers.add(strmParser.readObjectNumber()); + strmParser.readLong(); + } + } + catch (IOException exception) + { + LOG.debug( + "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); + continue; + } + finally + { + if (stream != null) + { + stream.close(); + } + } + if (objectNumbers.size() < nrOfObjects) + { + LOG.debug( + "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); + continue; + } + Map xrefOffset = xrefTrailerResolver.getXrefTable(); + for (Long objNumber : objectNumbers) + { + COSObjectKey objKey = new COSObjectKey(objNumber, 0); + Long existingOffset = bfSearchCOSObjectKeyOffsets.get(objKey); + if (existingOffset != null && existingOffset < 0) + { + // translate stream object key to its offset + COSObjectKey objStmKey = new COSObjectKey(Math.abs(existingOffset), 0); + existingOffset = bfSearchCOSObjectKeyOffsets.get(objStmKey); + } + if (existingOffset == null || offset > existingOffset) + { + bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber); + xrefOffset.put(objKey, -stmObjNumber); + } + } + } + } + source.seek(originOffset); + } + + /** + * Brute force search for all xref entries (tables). + * + * @throws IOException if something went wrong + */ + private void bfSearchForXRefTables() throws IOException + { + if (bfSearchXRefTablesOffsets == null) + { + // a pdf may contain more than one xref entry + bfSearchXRefTablesOffsets = new ArrayList(); + long originOffset = source.getPosition(); + source.seek(MINIMUM_SEARCH_OFFSET); + // search for xref tables + while (!source.isEOF()) + { + if (isString(XREF_TABLE)) + { + long newOffset = source.getPosition(); + source.seek(newOffset - 1); + // ensure that we don't read "startxref" instead of "xref" + if (isWhitespace()) + { + bfSearchXRefTablesOffsets.add(newOffset); + } + source.seek(newOffset + 4); + } + source.read(); + } + source.seek(originOffset); + } + } + + /** + * Brute force search for all /XRef entries (streams). + * + * @throws IOException if something went wrong + */ + private void bfSearchForXRefStreams() throws IOException + { if (bfSearchXRefStreamsOffsets == null) { // a pdf may contain more than one /XRef entry - bfSearchXRefStreamsOffsets = new Vector(); + bfSearchXRefStreamsOffsets = new ArrayList(); long originOffset = source.getPosition(); source.seek(MINIMUM_SEARCH_OFFSET); // search for XRef streams @@ -1591,7 +2074,7 @@ private void bfSearchForXRefStreams() throws IOException long newOffset = -1; long xrefOffset = source.getPosition(); boolean objFound = false; - for (int i = 1; i < 30 && !objFound; i++) + for (int i = 1; i < 40 && !objFound; i++) { long currentOffset = xrefOffset - (i * 10); if (currentOffset > 0) @@ -1607,7 +2090,6 @@ private void bfSearchForXRefStreams() throws IOException // is the next char a digit? if (isDigit(genID)) { - genID -= 48; tempOffset--; source.seek(tempOffset); if (isSpace()) @@ -1653,7 +2135,7 @@ private void bfSearchForXRefStreams() throws IOException /** * Rebuild the trailer dictionary if startxref can't be found. - * + * * @return the rebuild trailer dictionary * * @throws IOException if something went wrong @@ -1664,7 +2146,10 @@ protected final COSDictionary rebuildTrailer() throws IOException bfSearchForObjects(); if (bfSearchCOSObjectKeyOffsets != null) { - xrefTrailerResolver.nextXrefObj( 0, XRefType.TABLE ); + // reset trailer resolver + xrefTrailerResolver.reset(); + // use the found objects to rebuild the trailer resolver + xrefTrailerResolver.nextXrefObj(0, XRefType.TABLE); for (Entry entry : bfSearchCOSObjectKeyOffsets.entrySet()) { xrefTrailerResolver.setXRef(entry.getKey(), entry.getValue()); @@ -1672,51 +2157,255 @@ protected final COSDictionary rebuildTrailer() throws IOException xrefTrailerResolver.setStartxref(0); trailer = xrefTrailerResolver.getTrailer(); getDocument().setTrailer(trailer); - // search for the different parts of the trailer dictionary - for(Entry entry : bfSearchCOSObjectKeyOffsets.entrySet()) + boolean searchForObjStreamsDone = false; + if (!bfSearchForTrailer(trailer) && !searchForTrailerItems(trailer)) + { + // root entry wasn't found, maybe it is part of an object stream + bfSearchForObjStreams(); + searchForObjStreamsDone = true; + // search again for the root entry + searchForTrailerItems(trailer); + } + // prepare decryption if necessary + prepareDecryption(); + if (!searchForObjStreamsDone) { - Long offset = entry.getValue(); - source.seek(offset); - readObjectNumber(); - readGenerationNumber(); - readExpectedString(OBJ_MARKER, true); - try + bfSearchForObjStreams(); + } + } + trailerWasRebuild = true; + return trailer; + } + + /** + * Search for the different parts of the trailer dictionary. + * + * @param trailer + * @return true if the root was found, false if not. + * @throws IOException + */ + private boolean searchForTrailerItems(COSDictionary trailer) throws IOException + { + COSObject rootObject = null; + Long rootOffset = null; + COSObject infoObject = null; + Long infoOffset = null; + for (Entry entry : bfSearchCOSObjectKeyOffsets.entrySet()) + { + COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), entry.getValue()); + if (dictionary == null) + { + continue; + } + // document catalog + if (isCatalog(dictionary)) + { + COSObject cosObject = document.getObjectFromPool(entry.getKey()); + rootObject = compareCOSObjects(cosObject, entry.getValue(), rootObject, rootOffset); + if (rootObject == cosObject) { - COSDictionary dictionary = parseCOSDictionary(); - if (dictionary != null) - { - // document catalog - if (COSName.CATALOG.equals(dictionary.getCOSName(COSName.TYPE))) - { - trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey())); - } - // info dictionary - else if (dictionary.containsKey(COSName.MOD_DATE) && - (dictionary.containsKey(COSName.TITLE) - || dictionary.containsKey(COSName.AUTHOR) - || dictionary.containsKey(COSName.SUBJECT) - || dictionary.containsKey(COSName.KEYWORDS) - || dictionary.containsKey(COSName.CREATOR) - || dictionary.containsKey(COSName.PRODUCER) - || dictionary.containsKey(COSName.CREATION_DATE))) - { - trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey())); - } - // TODO encryption dictionary - } + rootOffset = entry.getValue(); } - catch(IOException exception) + } + // info dictionary + else if (isInfo(dictionary)) + { + COSObject cosObject = document.getObjectFromPool(entry.getKey()); + infoObject = compareCOSObjects(cosObject, entry.getValue(), infoObject, infoOffset); + if (infoObject == cosObject) { - LOG.debug("Skipped object " + entry.getKey() + ", either it's corrupt or not a dictionary"); + infoOffset = entry.getValue(); } } + // encryption dictionary, if existing, is lost + // We can't run "Algorithm 2" from PDF specification because of missing ID } - return trailer; + if (rootObject != null) + { + trailer.setItem(COSName.ROOT, rootObject); + } + if (infoObject != null) + { + trailer.setItem(COSName.INFO, infoObject); + } + return rootObject != null; } - + + private COSObject compareCOSObjects(COSObject newObject, Long newOffset, + COSObject currentObject, Long currentOffset) + { + if (currentObject != null) + { + // check if the current object is an updated version of the previous found object + if (currentObject.getObjectNumber() == newObject.getObjectNumber()) + { + return currentObject.getGenerationNumber() < newObject.getGenerationNumber() + ? newObject + : currentObject; + } + // most likely the object with the bigger offset is the newer one + return currentOffset != null && newOffset > currentOffset ? newObject : currentObject; + } + return newObject; + } + + private COSDictionary retrieveCOSDictionary(COSObject object) throws IOException + { + COSObjectKey key = new COSObjectKey(object); + Long offset = bfSearchCOSObjectKeyOffsets.get(key); + if (offset != null) + { + long currentPosition = source.getPosition(); + COSDictionary dictionary = retrieveCOSDictionary(key, offset); + source.seek(currentPosition); + return dictionary; + } + return null; + } + + private COSDictionary retrieveCOSDictionary(COSObjectKey key, long offset) throws IOException + { + COSDictionary dictionary = null; + // handle compressed objects + if (offset < 0) + { + COSObject compressedObject = document.getObjectFromPool(key); + if (compressedObject.getObject() == null) + { + parseObjectStream((int) -offset); + } + COSBase baseObject = compressedObject.getObject(); + if (baseObject instanceof COSDictionary) + { + dictionary = (COSDictionary) baseObject; + } + } + else + { + source.seek(offset); + readObjectNumber(); + readGenerationNumber(); + readExpectedString(OBJ_MARKER, true); + if (source.peek() != '<') + { + return null; + } + try + { + dictionary = parseCOSDictionary(); + } + catch (IOException exception) + { + LOG.debug("Skipped object " + key + + ", either it's corrupt or not a dictionary"); + } + } + return dictionary; + } + + /** + * Check if all entries of the pages dictionary are present. Those which can't be dereferenced are removed. + * + * @param root the root dictionary of the pdf + */ + protected void checkPages(COSDictionary root) + { + if (trailerWasRebuild && root != null) + { + // check if all page objects are dereferenced + COSBase pages = root.getDictionaryObject(COSName.PAGES); + if (pages instanceof COSDictionary) + { + checkPagesDictionary((COSDictionary) pages, new HashSet()); + } + } + } + + private int checkPagesDictionary(COSDictionary pagesDict, Set set) + { + // check for kids + COSBase kids = pagesDict.getDictionaryObject(COSName.KIDS); + int numberOfPages = 0; + if (kids instanceof COSArray) + { + COSArray kidsArray = (COSArray) kids; + List kidsList = kidsArray.toList(); + for (COSBase kid : kidsList) + { + if (!(kid instanceof COSObject) || set.contains((COSObject) kid)) + { + kidsArray.remove(kid); + continue; + } + COSObject kidObject = (COSObject) kid; + COSBase kidBaseobject = kidObject.getObject(); + // object wasn't dereferenced -> remove it + if (kidBaseobject == null || kidBaseobject.equals(COSNull.NULL)) + { + LOG.warn("Removed null object " + kid + " from pages dictionary"); + kidsArray.remove(kid); + } + else if (kidBaseobject instanceof COSDictionary) + { + COSDictionary kidDictionary = (COSDictionary) kidBaseobject; + COSName type = kidDictionary.getCOSName(COSName.TYPE); + if (COSName.PAGES.equals(type)) + { + // process nested pages dictionaries + set.add(kidObject); + numberOfPages += checkPagesDictionary(kidDictionary, set); + } + else if (COSName.PAGE.equals(type)) + { + // count pages + numberOfPages++; + } + } + } + } + // fix counter + pagesDict.setInt(COSName.COUNT, numberOfPages); + return numberOfPages; + } + + /** + * Tell if the dictionary is a PDF catalog. Override this for an FDF catalog. + * + * @param dictionary + * @return true if the given dictionary is a root dictionary + */ + protected boolean isCatalog(COSDictionary dictionary) + { + return COSName.CATALOG.equals(dictionary.getCOSName(COSName.TYPE)); + } + /** - * This will parse the startxref section from the stream. - * The startxref value is ignored. + * Tell if the dictionary is an info dictionary. + * + * @param dictionary + * @return true if the given dictionary is an info dictionary + */ + private boolean isInfo(COSDictionary dictionary) + { + if (dictionary.containsKey(COSName.PARENT) || dictionary.containsKey(COSName.A) || dictionary.containsKey(COSName.DEST)) + { + return false; + } + if (!dictionary.containsKey(COSName.MOD_DATE) && !dictionary.containsKey(COSName.TITLE) + && !dictionary.containsKey(COSName.AUTHOR) + && !dictionary.containsKey(COSName.SUBJECT) + && !dictionary.containsKey(COSName.KEYWORDS) + && !dictionary.containsKey(COSName.CREATOR) + && !dictionary.containsKey(COSName.PRODUCER) + && !dictionary.containsKey(COSName.CREATION_DATE)) + { + return false; + } + return true; + } + + /** + * This will parse the startxref section from the stream. The startxref value is ignored. * * @return the startxref value or -1 on parsing error * @throws IOException If an IO error occurs. @@ -1758,10 +2447,7 @@ private boolean isString(byte[] string) throws IOException } numberOfBytes += readMore; } - if (Arrays.equals(string, bytesRead)) - { - bytesMatching = true; - } + bytesMatching = Arrays.equals(string, bytesRead); source.rewind(numberOfBytes); } return bytesMatching; @@ -1783,6 +2469,7 @@ private boolean isString(char[] string) throws IOException if (source.read() != c) { bytesMatching = false; + break; } } source.seek(originOffset); @@ -1797,6 +2484,24 @@ private boolean isString(char[] string) throws IOException */ private boolean parseTrailer() throws IOException { + // parse the last trailer. + trailerOffset = source.getPosition(); + // PDFBOX-1739 skip extra xref entries in RegisSTAR documents + if (isLenient) + { + int nextCharacter = source.peek(); + while (nextCharacter != 't' && isDigit(nextCharacter)) + { + if (source.getPosition() == trailerOffset) + { + // warn only the first time + LOG.warn("Expected trailer object at offset " + trailerOffset + + ", keep trying"); + } + readLine(); + nextCharacter = source.peek(); + } + } if(source.peek() != 't') { return false; @@ -1891,7 +2596,7 @@ private boolean parseHeader(String headerMarker, String defaultVersion) throws I if ( headerStart > 0 ) { //trim off any leading characters - header = header.substring( headerStart, header.length() ); + header = header.substring(headerStart); } // This is used if there is garbage after the header on the same line @@ -1925,7 +2630,14 @@ private boolean parseHeader(String headerMarker, String defaultVersion) throws I } if (headerVersion < 0) { - throw new IOException( "Error getting header version: " + header); + if (isLenient) + { + headerVersion = 1.7f; + } + else + { + throw new IOException("Error getting header version: " + header); + } } document.setVersion(headerVersion); // rewind @@ -1969,11 +2681,36 @@ protected boolean parseXrefTable(long startByteOffset) throws IOException // Xref tables can have multiple sections. Each starts with a starting object id and a count. while(true) { + String currentLine = readLine(); + String[] splitString = currentLine.split("\\s"); + if (splitString.length != 2) + { + LOG.warn("Unexpected XRefTable Entry: " + currentLine); + return false; + } // first obj id - long currObjID = readObjectNumber(); - + long currObjID; + try + { + currObjID = Long.parseLong(splitString[0]); + } + catch (NumberFormatException exception) + { + LOG.warn("XRefTable: invalid ID for the first object: " + currentLine); + return false; + } + // the number of objects in the xref table - long count = readLong(); + int count = 0; + try + { + count = Integer.parseInt(splitString[1]); + } + catch (NumberFormatException exception) + { + LOG.warn("XRefTable: invalid number of objects: " + currentLine); + return false; + } skipSpaces(); for(int i = 0; i < count; i++) @@ -1987,8 +2724,8 @@ protected boolean parseXrefTable(long startByteOffset) throws IOException break; } //Ignore table contents - String currentLine = readLine(); - String[] splitString = currentLine.split("\\s"); + currentLine = readLine(); + splitString = currentLine.split("\\s"); if (splitString.length < 3) { LOG.warn("invalid xref line: " + currentLine); @@ -2000,12 +2737,16 @@ protected boolean parseXrefTable(long startByteOffset) throws IOException { try { - int currOffset = Integer.parseInt(splitString[0]); - int currGenID = Integer.parseInt(splitString[1]); - COSObjectKey objKey = new COSObjectKey(currObjID, currGenID); - xrefTrailerResolver.setXRef(objKey, currOffset); + long currOffset = Long.parseLong(splitString[0]); + // skip 0 offsets + if (currOffset > 0) + { + int currGenID = Integer.parseInt(splitString[1]); + COSObjectKey objKey = new COSObjectKey(currObjID, currGenID); + xrefTrailerResolver.setXRef(objKey, currOffset); + } } - catch(NumberFormatException e) + catch (NumberFormatException e) { throw new IOException(e); } @@ -2048,9 +2789,8 @@ private void parseXrefStream(COSStream stream, long objByteOffset, boolean isSta } /** - * This will get the document that was parsed. parse() must be called before this is called. - * When you are done with this document you must call close() on it to release - * resources. + * This will get the document that was parsed. The document must be parsed before this is called. When you are done + * with this document you must call close() on it to release resources. * * @return The document that was parsed. * @@ -2060,18 +2800,51 @@ public COSDocument getDocument() throws IOException { if( document == null ) { - throw new IOException( "You must call parse() before calling getDocument()" ); + throw new IOException("You must parse the document first before calling getDocument()"); } return document; } + /** + * This will get the encryption dictionary. The document must be parsed before this is called. + * + * @return The encryption dictionary of the document that was parsed. + * + * @throws IOException If there is an error getting the document. + */ + public PDEncryption getEncryption() throws IOException + { + if (document == null) + { + throw new IOException( + "You must parse the document first before calling getEncryption()"); + } + return encryption; + } + + /** + * This will get the AccessPermission. The document must be parsed before this is called. + * + * @return The access permission of document that was parsed. + * + * @throws IOException If there is an error getting the document. + */ + public AccessPermission getAccessPermission() throws IOException + { + if (document == null) + { + throw new IOException( + "You must parse the document first before calling getAccessPermission()"); + } + return accessPermission; + } + /** * Parse the values of the trailer dictionary and return the root object. * * @param trailer The trailer dictionary. * @return The parsed root object. - * @throws IOException If an IO error occurs or if the root object is - * missing in the trailer dictionary. + * @throws IOException If an IO error occurs or if the root object is missing in the trailer dictionary. */ protected COSBase parseTrailerValuesDynamically(COSDictionary trailer) throws IOException { @@ -2086,12 +2859,104 @@ protected COSBase parseTrailerValuesDynamically(COSDictionary trailer) throws IO } } // parse catalog or root object - COSObject root = (COSObject) trailer.getItem(COSName.ROOT); + COSObject root = trailer.getCOSObject(COSName.ROOT); if (root == null) { throw new IOException("Missing root object specification in trailer."); } - return parseObjectDynamically(root, false); + return root.getObject(); + } + + /** + * Prepare for decryption. + * + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException if something went wrong + */ + private void prepareDecryption() throws IOException + { + if (encryption != null) + { + return; + } + COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); + if (trailerEncryptItem == null || trailerEncryptItem instanceof COSNull) + { + return; + } + + if (trailerEncryptItem instanceof COSObject) + { + COSObject trailerEncryptObj = (COSObject) trailerEncryptItem; + parseDictionaryRecursive(trailerEncryptObj); + } + + try + { + encryption = new PDEncryption(document.getEncryptionDictionary()); + DecryptionMaterial decryptionMaterial; + if (keyStoreInputStream != null) + { + KeyStore ks = KeyStore.getInstance("PKCS12"); + ks.load(keyStoreInputStream, password.toCharArray()); + decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password); + } + else + { + decryptionMaterial = new StandardDecryptionMaterial(password); + } + + securityHandler = encryption.getSecurityHandler(); + securityHandler.prepareForDecryption(encryption, document.getDocumentID(), + decryptionMaterial); + accessPermission = securityHandler.getCurrentAccessPermission(); + } + catch (IOException e) + { + throw e; + } + catch (Exception e) + { + throw new IOException("Error (" + e.getClass().getSimpleName() + + ") while creating security handler for decryption", e); + } + finally + { + if (keyStoreInputStream != null) + { + IOUtils.closeQuietly(keyStoreInputStream); + } + } + } + + /** + * Resolves all not already parsed objects of a dictionary recursively. + * + * @param dictionaryObject dictionary to be parsed + * @throws IOException if something went wrong + * + */ + private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException + { + parseObjectDynamically(dictionaryObject, true); + if (!(dictionaryObject.getObject() instanceof COSDictionary)) + { + // we can't be lenient here, this is called by prepareDecryption() + // to get the encryption directory + throw new IOException("Dictionary object expected at offset " + source.getPosition()); + } + COSDictionary dictionary = (COSDictionary) dictionaryObject.getObject(); + for (COSBase value : dictionary.getValues()) + { + if (value instanceof COSObject) + { + COSObject object = (COSObject) value; + if (object.getObject() == null) + { + parseDictionaryRecursive(object); + } + } + } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java index 1b81bc43b1d..9db9a93a9b9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/EndstreamOutputStream.java @@ -55,7 +55,7 @@ class EndstreamOutputStream extends BufferedOutputStream * @throws IOException */ @Override - public void write(byte[] b, int off, int len) throws IOException + public synchronized void write(byte[] b, int off, int len) throws IOException { if (pos == 0 && len > 10) { @@ -124,7 +124,7 @@ else if (b[off + len - 1] == '\n') * @throws IOException */ @Override - public void flush() throws IOException + public synchronized void flush() throws IOException { // if there is only a CR and no LF, write it if (hasCR && !hasLF) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java index 4c980778b7f..98041f4efa2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java @@ -74,7 +74,19 @@ public FDFParser(InputStream input) throws IOException init(); } - private void init() throws IOException + /** + * Tell if the dictionary is a FDF catalog. + * + * @param dictionary + * @return + */ + @Override + protected final boolean isCatalog(COSDictionary dictionary) + { + return dictionary.containsKey(COSName.FDF); + } + + private void init() { String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE); if (eofLookupRangeStr != null) @@ -102,13 +114,32 @@ private void init() throws IOException private void initialParse() throws IOException { COSDictionary trailer = null; - // parse startxref - long startXRefOffset = getStartxrefOffset(); - if (startXRefOffset > 0) + boolean rebuildTrailer = false; + try + { + // parse startxref + long startXRefOffset = getStartxrefOffset(); + if (startXRefOffset > 0) + { + trailer = parseXref(startXRefOffset); + } + else if (isLenient()) + { + rebuildTrailer = true; + } + } + catch (IOException exception) { - trailer = parseXref(startXRefOffset); + if (isLenient()) + { + rebuildTrailer = true; + } + else + { + throw exception; + } } - else + if (rebuildTrailer) { trailer = rebuildTrailer(); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java index b351451cf3f..10d4db1f218 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.pdfparser; +import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; @@ -52,16 +53,30 @@ public int read() throws IOException public int read(byte[] b) throws IOException { int n = input.read(b); - position += n; - return n; + if (n > 0) + { + position += n; + return n; + } + else + { + return -1; + } } @Override public int read(byte[] b, int offset, int length) throws IOException { int n = input.read(b, offset, length); - position += n; - return n; + if (n > 0) + { + position += n; + return n; + } + else + { + return -1; + } } @Override @@ -95,19 +110,27 @@ public void unread(byte[] bytes) throws IOException position -= bytes.length; } + @Override + public void unread(byte[] bytes, int start, int len) throws IOException + { + input.unread(bytes, start, len); + position -= len; + } + @Override public byte[] readFully(int length) throws IOException { byte[] bytes = new byte[length]; - int off = 0; - int len = length; - while (len > 0) + int bytesRead = 0; + do { - int n = this.read(bytes, off, len); - off += n; - len -= n; - position += n; - } + int count = read(bytes, bytesRead, length - bytesRead); + if (count < 0) + { + throw new EOFException(); + } + bytesRead += count; + } while (bytesRead < length); return bytes; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java index 3b56ab040fc..bceb9d430d0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java @@ -19,11 +19,15 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDocument; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; @@ -41,7 +45,8 @@ public class PDFObjectStreamParser extends BaseParser private static final Log LOG = LogFactory.getLog(PDFObjectStreamParser.class); private List streamObjects = null; - private final COSStream stream; + private final int numberOfObjects; + private final int firstObject; /** * Constructor. @@ -53,8 +58,27 @@ public class PDFObjectStreamParser extends BaseParser public PDFObjectStreamParser(COSStream stream, COSDocument document) throws IOException { super(new InputStreamSource(stream.createInputStream())); - this.stream = stream; this.document = document; + // get mandatory number of objects + numberOfObjects = stream.getInt(COSName.N); + if (numberOfObjects == -1) + { + throw new IOException("/N entry missing in object stream"); + } + if (numberOfObjects < 0) + { + throw new IOException("Illegal /N entry in object stream: " + numberOfObjects); + } + // get mandatory stream offset of the first object + firstObject = stream.getInt(COSName.FIRST); + if (firstObject == -1) + { + throw new IOException("/First entry missing in object stream"); + } + if (firstObject < 0) + { + throw new IOException("Illegal /First entry in object stream: " + firstObject); + } } /** @@ -67,43 +91,19 @@ public void parse() throws IOException { try { - //need to first parse the header. - int numberOfObjects = stream.getInt( "N" ); - List objectNumbers = new ArrayList( numberOfObjects ); - streamObjects = new ArrayList( numberOfObjects ); - for( int i=0; i offsets = readOffsets(); + streamObjects = new ArrayList(offsets.size()); + for (Entry offset : offsets.entrySet()) { - object = new COSObject(cosObject); + COSBase cosObject = parseObject(offset.getKey()); + COSObject object = new COSObject(cosObject); object.setGenerationNumber(0); - if (objectCounter >= objectNumbers.size()) - { - LOG.error("/ObjStm (object stream) has more objects than /N " + numberOfObjects); - break; - } - object.setObjectNumber( objectNumbers.get( objectCounter) ); - streamObjects.add( object ); - if(LOG.isDebugEnabled()) - { - LOG.debug( "parsed=" + object ); - } - // According to the spec objects within an object stream shall not be enclosed - // by obj/endobj tags, but there are some pdfs in the wild using those tags - // skip endobject marker if present - if (!seqSource.isEOF() && seqSource.peek() == 'e') + object.setObjectNumber(offset.getValue()); + streamObjects.add(object); + if (LOG.isDebugEnabled()) { - readLine(); + LOG.debug("parsed=" + object); } - objectCounter++; } } finally @@ -121,4 +121,38 @@ public List getObjects() { return streamObjects; } + + private Map readOffsets() throws IOException + { + // according to the pdf spec the offsets shall be sorted ascending + // but we can't rely on that, so that we have to sort the offsets + // as the sequential parsers relies on it, see PDFBOX-4927 + Map objectNumbers = new TreeMap(); + long firstObjectPosition = seqSource.getPosition() + firstObject - 1; + for (int i = 0; i < numberOfObjects; i++) + { + // don't read beyond the part of the stream reserved for the object numbers + if (seqSource.getPosition() >= firstObjectPosition) + { + break; + } + long objectNumber = readObjectNumber(); + int offset = (int) readLong(); + objectNumbers.put(offset, objectNumber); + } + return objectNumbers; + } + + private COSBase parseObject(int offset) throws IOException + { + long currentPosition = seqSource.getPosition(); + int finalPosition = firstObject + offset; + if (finalPosition > 0 && currentPosition < finalPosition) + { + // jump to the offset of the object to be parsed + seqSource.readFully(finalPosition - (int) currentPosition); + } + return parseDirObject(); + } + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java index 68fea3cc399..836963f016f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java @@ -18,7 +18,6 @@ import java.io.IOException; import java.io.InputStream; -import java.security.KeyStore; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,23 +31,12 @@ import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.encryption.AccessPermission; -import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; -import org.apache.pdfbox.pdmodel.encryption.PDEncryption; -import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial; -import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; +import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; public class PDFParser extends COSParser { private static final Log LOG = LogFactory.getLog(PDFParser.class); - private String password = ""; - private InputStream keyStoreInputStream = null; - private String keyAlias = null; - - private PDEncryption encryption = null; - private AccessPermission accessPermission; - /** * Constructor. * Unrestricted main memory will be used for buffering PDF streams. @@ -134,15 +122,12 @@ public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore, String alias, ScratchFile scratchFile) throws IOException { - super(source); + super(source, decryptionPassword, keyStore, alias); fileLen = source.length(); - password = decryptionPassword; - keyStoreInputStream = keyStore; - keyAlias = alias; init(scratchFile); } - private void init(ScratchFile scratchFile) throws IOException + private void init(ScratchFile scratchFile) { String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE); if (eofLookupRangeStr != null) @@ -170,8 +155,8 @@ private void init(ScratchFile scratchFile) throws IOException */ public PDDocument getPDDocument() throws IOException { - PDDocument doc = new PDDocument(getDocument(), source, accessPermission); - doc.setEncryptionDictionary(encryption); + PDDocument doc = new PDDocument(getDocument(), source, getAccessPermission()); + doc.setEncryptionDictionary(getEncryption()); return doc; } @@ -180,23 +165,12 @@ public PDDocument getPDDocument() throws IOException * to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref * at the beginning of the file. Last the root object is parsed. * + * @throws InvalidPasswordException If the password is incorrect. * @throws IOException If something went wrong. */ protected void initialParse() throws IOException { - COSDictionary trailer = null; - // parse startxref - long startXRefOffset = getStartxrefOffset(); - if (startXRefOffset > -1) - { - trailer = parseXref(startXRefOffset); - } - else if (isLenient()) - { - trailer = rebuildTrailer(); - } - // prepare decryption if necessary - prepareDecryption(); + COSDictionary trailer = retrieveTrailer(); COSBase base = parseTrailerValuesDynamically(trailer); if (!(base instanceof COSDictionary)) @@ -209,19 +183,21 @@ else if (isLenient()) { root.setItem(COSName.TYPE, COSName.CATALOG); } - COSObject catalogObj = document.getCatalog(); - if (catalogObj != null && catalogObj.getObject() instanceof COSDictionary) + // parse all objects, starting at the root dictionary + parseDictObjects(root, (COSName[]) null); + // parse all objects of the info dictionary + COSBase infoBase = trailer.getDictionaryObject(COSName.INFO); + if (infoBase instanceof COSDictionary) { - parseDictObjects((COSDictionary) catalogObj.getObject(), (COSName[]) null); - - COSBase infoBase = trailer.getDictionaryObject(COSName.INFO); - if (infoBase instanceof COSDictionary) - { - parseDictObjects((COSDictionary) infoBase, (COSName[]) null); - } - - document.setDecrypted(); + parseDictObjects((COSDictionary) infoBase, (COSName[]) null); } + // check pages dictionaries + checkPages(root); + if (!(root.getDictionaryObject(COSName.PAGES) instanceof COSDictionary)) + { + throw new IOException("Page tree root must be a dictionary"); + } + document.setDecrypted(); initialParseDone = true; } @@ -229,6 +205,7 @@ else if (isLenient()) * This will parse the stream and populate the COSDocument object. This will close * the keystore stream when it is done parsing. * + * @throws InvalidPasswordException If the password is incorrect. * @throws IOException If there is an error reading from the stream or corrupt data * is found. */ @@ -252,8 +229,6 @@ public void parse() throws IOException } finally { - IOUtils.closeQuietly(keyStoreInputStream); - if (exceptionOccurred && document != null) { IOUtils.closeQuietly(document); @@ -262,76 +237,4 @@ public void parse() throws IOException } } - /** - * Prepare for decryption. - * - * @throws IOException if something went wrong - */ - private void prepareDecryption() throws IOException - { - COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); - if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull)) - { - if (trailerEncryptItem instanceof COSObject) - { - COSObject trailerEncryptObj = (COSObject) trailerEncryptItem; - parseDictionaryRecursive(trailerEncryptObj); - } - try - { - encryption = new PDEncryption(document.getEncryptionDictionary()); - DecryptionMaterial decryptionMaterial; - if (keyStoreInputStream != null) - { - KeyStore ks = KeyStore.getInstance("PKCS12"); - ks.load(keyStoreInputStream, password.toCharArray()); - - decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password); - } - else - { - decryptionMaterial = new StandardDecryptionMaterial(password); - } - - securityHandler = encryption.getSecurityHandler(); - securityHandler.prepareForDecryption(encryption, document.getDocumentID(), - decryptionMaterial); - accessPermission = securityHandler.getCurrentAccessPermission(); - } - catch (IOException e) - { - throw e; - } - catch (Exception e) - { - throw new IOException("Error (" + e.getClass().getSimpleName() - + ") while creating security handler for decryption", e); - } - } - } - - /** - * Resolves all not already parsed objects of a dictionary recursively. - * - * @param dictionaryObject dictionary to be parsed - * @throws IOException if something went wrong - * - */ - private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException - { - parseObjectDynamically(dictionaryObject, true); - COSDictionary dictionary = (COSDictionary)dictionaryObject.getObject(); - for(COSBase value : dictionary.getValues()) - { - if (value instanceof COSObject) - { - COSObject object = (COSObject)value; - if (object.getObject() == null) - { - parseDictionaryRecursive(object); - } - } - } - } - } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java index 8ede5ace762..5bafa96657f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java @@ -16,24 +16,23 @@ */ package org.apache.pdfbox.pdfparser; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.contentstream.PDContentStream; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSBoolean; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSNumber; -import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.RandomAccessBuffer; import org.apache.pdfbox.pdmodel.common.PDStream; /** @@ -100,12 +99,12 @@ public PDFStreamParser(PDContentStream contentStream) throws IOException */ public PDFStreamParser(byte[] bytes) throws IOException { - super(new InputStreamSource(new ByteArrayInputStream(bytes))); + super(new RandomAccessSource(new RandomAccessBuffer(bytes))); } /** - * This will parse the tokens in the stream. This will close the - * stream when it is finished parsing. + * This will parse all the tokens in the stream. This will close the stream when it is finished + * parsing. You can then access these with {@link #getTokens() getTokens()}. * * @throws IOException If there is an error while parsing the stream. */ @@ -119,7 +118,7 @@ public void parse() throws IOException } /** - * This will get the tokens that were parsed from the stream. + * This will get the tokens that were parsed from the stream by the {@link #parse() parse()} method. * * @return All of the tokens in the stream. */ @@ -137,19 +136,15 @@ public List getTokens() */ public Object parseNextToken() throws IOException { - Object retval; - skipSpaces(); - int nextByte = seqSource.peek(); - if( ((byte)nextByte) == -1 ) + if (seqSource.isEOF()) { return null; } - char c = (char)nextByte; + char c = (char) seqSource.peek(); switch (c) { case '<': - { // pull off first left bracket int leftBracket = seqSource.read(); @@ -161,74 +156,47 @@ public Object parseNextToken() throws IOException if (c == '<') { - retval = parseCOSDictionary(); + return parseCOSDictionary(); } else { - retval = parseCOSString(); + return parseCOSString(); } - break; - } case '[': - { // array - retval = parseCOSArray(); - break; - } + return parseCOSArray(); case '(': // string - retval = parseCOSString(); - break; + return parseCOSString(); case '/': // name - retval = parseCOSName(); - break; + return parseCOSName(); case 'n': - { // null String nullString = readString(); if( nullString.equals( "null") ) { - retval = COSNull.NULL; + return COSNull.NULL; } else { - retval = Operator.getOperator(nullString); + return Operator.getOperator(nullString); } - break; - } case 't': case 'f': - { String next = readString(); if( next.equals( "true" ) ) { - retval = COSBoolean.TRUE; - break; + return COSBoolean.TRUE; } else if( next.equals( "false" ) ) { - retval = COSBoolean.FALSE; - } - else - { - retval = Operator.getOperator(next); - } - break; - } - case 'R': - { - String line = readString(); - if( line.equals( "R" ) ) - { - retval = new COSObject( null ); + return COSBoolean.FALSE; } else { - retval = Operator.getOperator(line); + return Operator.getOperator(next); } - break; - } case '0': case '1': case '2': @@ -242,17 +210,26 @@ else if( next.equals( "false" ) ) case '-': case '+': case '.': - { /* We will be filling buf with the rest of the number. Only * allow 1 "." and "-" and "+" at start of number. */ - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); buf.append( c ); seqSource.read(); + + // Ignore double negative (this is consistent with Adobe Reader) + if (c == '-' && seqSource.peek() == c) + { + seqSource.read(); + } boolean dotNotRead = c != '.'; - while( Character.isDigit(c = (char) seqSource.peek()) || dotNotRead && c == '.') + while( Character.isDigit(c = (char) seqSource.peek()) || dotNotRead && c == '.' || c == '-') { - buf.append( c ); + if (c != '-') + { + // PDFBOX-4064: ignore "-" in the middle of a number + buf.append(c); + } seqSource.read(); if (dotNotRead && c == '.') @@ -260,37 +237,47 @@ else if( next.equals( "false" ) ) dotNotRead = false; } } - retval = COSNumber.get( buf.toString() ); - break; - } + return COSNumber.get(buf.toString()); case 'B': - { - String next = readString(); - retval = Operator.getOperator(next); - if( next.equals( "BI" ) ) + String nextOperator = readString(); + Operator beginImageOP = Operator.getOperator(nextOperator); + if (nextOperator.equals(OperatorName.BEGIN_INLINE_IMAGE)) { - Operator beginImageOP = (Operator)retval; COSDictionary imageParams = new COSDictionary(); - beginImageOP.setImageParameters( imageParams ); + beginImageOP.setImageParameters(imageParams); Object nextToken = null; - while( (nextToken = parseNextToken()) instanceof COSName ) + while ((nextToken = parseNextToken()) instanceof COSName) { Object value = parseNextToken(); - imageParams.setItem( (COSName)nextToken, (COSBase)value ); + if (!(value instanceof COSBase)) + { + LOG.warn("Unexpected token in inline image dictionary at offset " + + seqSource.getPosition()); + break; + } + imageParams.setItem((COSName) nextToken, (COSBase) value); + } + // final token will be the image data, maybe?? + if (nextToken instanceof Operator) + { + Operator imageData = (Operator) nextToken; + if (imageData.getImageData() == null + || imageData.getImageData().length == 0) + { + LOG.warn("empty inline image at stream offset " + + seqSource.getPosition()); + } + beginImageOP.setImageData(imageData.getImageData()); } - //final token will be the image data, maybe?? - Operator imageData = (Operator)nextToken; - beginImageOP.setImageData( imageData.getImageData() ); } - break; - } + return beginImageOP; case 'I': - { //Special case for ID operator - String id = "" + (char) seqSource.read() + (char) seqSource.read(); - if( !id.equals( "ID" ) ) + String id = Character.toString((char) seqSource.read()) + (char) seqSource.read(); + if (!id.equals(OperatorName.BEGIN_INLINE_IMAGE_DATA)) { - throw new IOException( "Error: Expected operator 'ID' actual='" + id + "'" ); + throw new IOException( "Error: Expected operator 'ID' actual='" + id + + "' at stream offset " + seqSource.getPosition()); } ByteArrayOutputStream imageData = new ByteArrayOutputStream(); if( isWhitespace() ) @@ -315,37 +302,26 @@ else if( next.equals( "false" ) ) currentByte = seqSource.read(); } // the EI operator isn't unread, as it won't be processed anyway - retval = Operator.getOperator("ID"); + Operator beginImageDataOP = Operator + .getOperator(OperatorName.BEGIN_INLINE_IMAGE_DATA); // save the image data to the operator, so that it can be accessed later - ((Operator)retval).setImageData( imageData.toByteArray() ); - break; - } + beginImageDataOP.setImageData(imageData.toByteArray()); + return beginImageDataOP; case ']': - { // some ']' around without its previous '[' // this means a PDF is somewhat corrupt but we will continue to parse. seqSource.read(); - // must be a better solution than null... - retval = COSNull.NULL; - break; - } + return COSNull.NULL; default: - { - //we must be an operator - String operator = readOperator(); - if( operator.trim().length() == 0 ) + // we must be an operator + String operator = readOperator().trim(); + if (operator.length() > 0) { - //we have a corrupt stream, stop reading here - retval = null; + return Operator.getOperator(operator); } - else - { - retval = Operator.getOperator(operator); - } - } } - return retval; + return null; } /** @@ -369,24 +345,35 @@ private boolean hasNoFollowingBinData(SequentialSource pdfSource) throws IOExcep for (int bIdx = 0; bIdx < readBytes; bIdx++) { final byte b = binCharTestArr[bIdx]; - if (b < 0x09 || b > 0x0a && b < 0x20 && b != 0x0d) + if (b != 0 && b < 0x09 || b > 0x0a && b < 0x20 && b != 0x0d) { // control character or > 0x7f -> we have binary data noBinData = false; break; } // find the start of a PDF operator - if (startOpIdx == -1 && !(b == 9 || b == 0x20 || b == 0x0a || b == 0x0d)) + if (startOpIdx == -1 && !(b == 0 || b == 9 || b == 0x20 || b == 0x0a || b == 0x0d)) { startOpIdx = bIdx; } else if (startOpIdx != -1 && endOpIdx == -1 && - (b == 9 || b == 0x20 || b == 0x0a || b == 0x0d)) + (b == 0 || b == 9 || b == 0x20 || b == 0x0a || b == 0x0d)) { endOpIdx = bIdx; } } - + + // PDFBOX-3742: just assuming that 1-3 non blanks is a PDF operator isn't enough + if (endOpIdx != -1 && startOpIdx != -1) + { + // usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784). + String s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx); + if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s)) + { + noBinData = false; + } + } + // only if not close to eof if (readBytes == MAX_BIN_CHAR_TEST_LENGTH) { @@ -400,11 +387,12 @@ else if (startOpIdx != -1 && endOpIdx == -1 && noBinData = false; } } - pdfSource.unread(Arrays.copyOfRange(binCharTestArr, 0, readBytes)); + pdfSource.unread(binCharTestArr, 0, readBytes); } if (!noBinData) { - LOG.warn("ignoring 'EI' assumed to be in the middle of inline image"); + LOG.warn("ignoring 'EI' assumed to be in the middle of inline image at stream offset " + + pdfSource.getPosition()); } return noBinData; } @@ -422,7 +410,7 @@ protected String readOperator() throws IOException //average string size is around 2 and the normal string buffer size is //about 16 so lets save some space. - StringBuffer buffer = new StringBuffer(4); + StringBuilder buffer = new StringBuilder(4); int nextChar = seqSource.peek(); while( nextChar != -1 && // EOF diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXRefStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXRefStream.java index 5a5f4705471..a8802d91bd9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXRefStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXRefStream.java @@ -29,6 +29,7 @@ import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; @@ -57,7 +58,10 @@ public class PDFXRefStream implements PDFXRef /** * Create a fresh XRef stream like for a fresh file or an incremental update. + * + * @deprecated use {@link #PDFXRefStream(org.apache.pdfbox.cos.COSDocument)} */ + @Deprecated public PDFXRefStream() { this.stream = new COSStream(); @@ -65,6 +69,18 @@ public PDFXRefStream() objectNumbers = new TreeSet(); } + /** + * Create a fresh XRef stream like for a fresh file or an incremental update. + * + * @param cosDocument + */ + public PDFXRefStream(COSDocument cosDocument) + { + stream = cosDocument.createCOSStream(); + streamData = new TreeMap(); + objectNumbers = new TreeSet(); + } + /** * Returns the stream of the XRef. * @return the XRef stream @@ -77,8 +93,7 @@ public COSStream getStream() throws IOException { throw new IllegalArgumentException("size is not set in xrefstream"); } - // add one for object number 0 - stream.setLong(COSName.SIZE, streamData.size() + 1); + stream.setLong(COSName.SIZE, size); List indexEntry = getIndexEntry(); COSArray indexAsArray = new COSArray(); @@ -90,9 +105,8 @@ public COSStream getStream() throws IOException int[] wEntry = getWEntry(); COSArray wAsArray = new COSArray(); - for ( int i = 0; i < wEntry.length; i++ ) + for (int j : wEntry) { - int j = wEntry[i]; wAsArray.add(COSInteger.get(j)); } stream.setItem(COSName.W, wAsArray); @@ -111,6 +125,11 @@ public COSStream getStream() throws IOException { continue; } + // this one too, because it has already been written in COSWriter.doWriteBody() + if (COSName.ENCRYPT.equals(cosName)) + { + continue; + } COSBase dictionaryObject = this.stream.getDictionaryObject(cosName); dictionaryObject.setDirect(true); } @@ -314,7 +333,7 @@ else if (entry instanceof ObjectStreamReference) * A class representing an object stream reference. * */ - class ObjectStreamReference + static class ObjectStreamReference { long objectNumberOfObjectStream; long offset; @@ -324,7 +343,7 @@ class ObjectStreamReference * A class representing a normal reference. * */ - class NormalReference + static class NormalReference { int genNumber; long offset; @@ -334,7 +353,7 @@ class NormalReference * A class representing a free reference. * */ - class FreeReference + static class FreeReference { int nextGenNumber; long nextFree; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java index 016ade11c70..2aad117a2ee 100755 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java @@ -17,10 +17,9 @@ package org.apache.pdfbox.pdfparser; import java.io.IOException; - -import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; -import java.util.List; +import java.util.NoSuchElementException; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; @@ -38,9 +37,10 @@ */ public class PDFXrefStreamParser extends BaseParser { - private final COSStream stream; private final XrefTrailerResolver xrefTrailerResolver; - + private final int[] w = new int[3]; + private ObjectNumbers objectNumbers = null; + /** * Constructor. * @@ -54,136 +54,183 @@ public PDFXrefStreamParser(COSStream stream, COSDocument document, XrefTrailerRe throws IOException { super(new InputStreamSource(stream.createInputStream())); - this.stream = stream; this.document = document; this.xrefTrailerResolver = resolver; + try + { + initParserValues(stream); + } + catch (IOException exception) + { + close(); + } } - /** - * Parses through the unfiltered stream and populates the xrefTable HashMap. - * @throws IOException If there is an error while parsing the stream. - */ - public void parse() throws IOException + private void initParserValues(COSStream stream) throws IOException { - COSBase w = stream.getDictionaryObject(COSName.W); - if (!(w instanceof COSArray)) + COSArray wArray = stream.getCOSArray(COSName.W); + if (wArray == null) { throw new IOException("/W array is missing in Xref stream"); } - COSArray xrefFormat = (COSArray) w; - - COSArray indexArray = (COSArray)stream.getDictionaryObject(COSName.INDEX); - /* - * If Index doesn't exist, we will use the default values. - */ - if(indexArray == null) + if (wArray.size() != 3) + { + throw new IOException( + "Wrong number of values for /W array in XRef: " + Arrays.toString(w)); + } + for (int i = 0; i < 3; i++) + { + w[i] = wArray.getInt(i, 0); + } + if (w[0] < 0 || w[1] < 0 || w[2] < 0) + { + throw new IOException("Incorrect /W array in XRef: " + Arrays.toString(w)); + } + + COSArray indexArray = stream.getCOSArray(COSName.INDEX); + if (indexArray == null) { + // If /Index doesn't exist, we will use the default values. indexArray = new COSArray(); indexArray.add(COSInteger.ZERO); - indexArray.add(stream.getDictionaryObject(COSName.SIZE)); + indexArray.add(COSInteger.get(stream.getInt(COSName.SIZE, 0))); } + if (indexArray.size() % 2 == 1) + { + throw new IOException( + "Wrong number of values for /Index array in XRef: " + Arrays.toString(w)); + } + // create an Iterator for all object numbers using the index array + objectNumbers = new ObjectNumbers(indexArray); + } - List objNums = new ArrayList(); - - /* - * Populates objNums with all object numbers available - */ - Iterator indexIter = indexArray.iterator(); - while(indexIter.hasNext()) + private void close() throws IOException + { + if (seqSource != null) { - long objID = ((COSInteger)indexIter.next()).longValue(); - int size = ((COSInteger)indexIter.next()).intValue(); - for(int i = 0; i < size; i++) - { - objNums.add(objID + i); - } + seqSource.close(); } - Iterator objIter = objNums.iterator(); - /* - * Calculating the size of the line in bytes - */ - int w0 = xrefFormat.getInt(0); - int w1 = xrefFormat.getInt(1); - int w2 = xrefFormat.getInt(2); - int lineSize = w0 + w1 + w2; + document = null; + } - while(!seqSource.isEOF() && objIter.hasNext()) + /** + * Parses through the unfiltered stream and populates the xrefTable HashMap. + * @throws IOException If there is an error while parsing the stream. + */ + public void parse() throws IOException + { + byte[] currLine = new byte[w[0] + w[1] + w[2]]; + + while (!seqSource.isEOF() && objectNumbers.hasNext()) { - byte[] currLine = new byte[lineSize]; seqSource.read(currLine); - - int type; - if (w0 == 0) + // get the current objID + long objID = objectNumbers.next(); + // default value is 1 if w[0] == 0, otherwise parse first field + int type = w[0] == 0 ? 1 : (int) parseValue(currLine, 0, w[0]); + // Skip free objects (type 0) and invalid types + if (type == 0) + { + continue; + } + // second field holds the offset (type 1) or the object stream number (type 2) + long offset = parseValue(currLine, w[0], w[1]); + // third field holds the generation number for type 1 entries + int genNum = type == 1 ? (int) parseValue(currLine, w[0] + w[1], w[2]) : 0; + COSObjectKey objKey = new COSObjectKey(objID, genNum); + if (type == 1) { - // "If the first element is zero, - // the type field shall not be present, and shall default to type 1" - type = 1; + xrefTrailerResolver.setXRef(objKey, offset); } else { - type = 0; - /* - * Grabs the number of bytes specified for the first column in - * the W array and stores it. - */ - for (int i = 0; i < w0; i++) + // For XRef aware parsers we have to know which objects contain object streams. We will store this + // information in normal xref mapping table but add object stream number with minus sign in order to + // distinguish from file offsets + xrefTrailerResolver.setXRef(objKey, -offset); + } + } + close(); + } + + private long parseValue(byte[] data, int start, int length) + { + long value = 0; + for (int i = 0; i < length; i++) + { + value += ((long) data[i + start] & 0x00ff) << ((length - i - 1) * 8); + } + return value; + } + + private static class ObjectNumbers implements Iterator + { + private final long[] start; + private final long[] end; + private int currentRange = 0; + private long currentEnd = 0; + private long currentNumber = 0; + private long maxValue = 0; + + private ObjectNumbers(COSArray indexArray) throws IOException + { + start = new long[indexArray.size() / 2]; + end = new long[start.length]; + int counter = 0; + Iterator indexIter = indexArray.iterator(); + while (indexIter.hasNext()) + { + COSBase base = indexIter.next(); + if (!(base instanceof COSInteger)) { - type += (currLine[i] & 0x00ff) << ((w0 - i - 1) * 8); + throw new IOException("Xref stream must have integer in /Index array"); } + long startValue = ((COSInteger) base).longValue(); + if (!indexIter.hasNext()) + { + break; + } + base = indexIter.next(); + if (!(base instanceof COSInteger)) + { + throw new IOException("Xref stream must have integer in /Index array"); + } + long sizeValue = ((COSInteger) base).longValue(); + start[counter] = startValue; + end[counter++] = startValue + sizeValue; } - //Need to remember the current objID - Long objID = objIter.next(); - /* - * 3 different types of entries. - */ - switch(type) + currentNumber = start[0]; + currentEnd = end[0]; + maxValue = end[counter - 1]; + } + + @Override + public boolean hasNext() + { + return currentNumber < maxValue; + } + + @Override + public Long next() + { + if (currentNumber >= maxValue) { - case 0: - /* - * Skipping free objects - */ - break; - case 1: - int offset = 0; - for(int i = 0; i < w1; i++) - { - offset += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8); - } - int genNum = 0; - for(int i = 0; i < w2; i++) - { - genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8); - } - COSObjectKey objKey = new COSObjectKey(objID, genNum); - xrefTrailerResolver.setXRef(objKey, offset); - break; - case 2: - /* - * object stored in object stream: - * 2nd argument is object number of object stream - * 3rd argument is index of object within object stream - * - * For sequential PDFParser we do not need this information - * because - * These objects are handled by the dereferenceObjects() method - * since they're only pointing to object numbers - * - * However for XRef aware parsers we have to know which objects contain - * object streams. We will store this information in normal xref mapping - * table but add object stream number with minus sign in order to - * distinguish from file offsets - */ - int objstmObjNr = 0; - for(int i = 0; i < w1; i++) - { - objstmObjNr += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8); - } - objKey = new COSObjectKey( objID, 0 ); - xrefTrailerResolver.setXRef( objKey, -objstmObjNr ); - break; - default: - break; + throw new NoSuchElementException(); + } + if (currentNumber < currentEnd) + { + return currentNumber++; } + currentNumber = start[++currentRange]; + currentEnd = end[currentRange]; + return currentNumber++; } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java index c1d8cf65e62..6ef81f0e4b9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java @@ -79,6 +79,12 @@ public void unread(byte[] bytes) throws IOException reader.rewind(bytes.length); } + @Override + public void unread(byte[] bytes, int start, int len) throws IOException + { + reader.rewind(len); + } + @Override public byte[] readFully(int length) throws IOException { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java index 553b227fec1..5d8367a80d1 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java @@ -74,18 +74,28 @@ interface SequentialSource extends Closeable * Unreads a single byte. * * @param b byte array to push back - * @throws IOException If there is an error while seeking + * @throws IOException if there is an error while unreading */ void unread(int b) throws IOException; /** * Unreads an array of bytes. * - * @param bytes byte array to push back - * @throws IOException If there is an error while seeking + * @param bytes byte array to be unread + * @throws IOException if there is an error while unreading */ void unread(byte[] bytes) throws IOException; + /** + * Unreads a portion of an array of bytes. + * + * @param bytes byte array to be unread + * @param start start index + * @param len number of bytes to be unread + * @throws IOException if there is an error while unreading + */ + void unread(byte[] bytes, int start, int len) throws IOException; + /** * Reads a given number of bytes in its entirety. * diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java index cd804379830..3530aa7a647 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java @@ -1,334 +1,369 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdfparser; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Map.Entry; -import java.util.SortedSet; -import java.util.TreeSet; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSObjectKey; - -/** - * This class will collect all XRef/trailer objects and creates correct - * xref/trailer information after all objects are read using startxref - * and 'Prev' information (unused XRef/trailer objects are discarded). - * - * In case of missing startxref or wrong startxref pointer all - * XRef/trailer objects are used to create xref table / trailer dictionary - * in order they occur. - * - * For each new xref object/XRef stream method {@link #nextXrefObj(long, XRefType)} - * must be called with start byte position. All following calls to - * {@link #setXRef(COSObjectKey, long)} or {@link #setTrailer(COSDictionary)} - * will add the data for this byte position. - * - * After all objects are parsed the startxref position must be provided - * using {@link #setStartxref(long)}. This is used to build the chain of - * active xref/trailer objects used for creating document trailer and xref table. - * - * @author Timo Böhme - */ -public class XrefTrailerResolver -{ - - /** - * A class which represents a xref/trailer object. - */ - private class XrefTrailerObj - { - protected COSDictionary trailer = null; - - private XRefType xrefType; - - private final Map xrefTable = new HashMap(); - - /** - * Default constructor. - */ - private XrefTrailerObj() - { - xrefType = XRefType.TABLE; - } - } - - /** - * The XRefType of a trailer. - */ - public enum XRefType - { - /** - * XRef table type. - */ - TABLE, - /** - * XRef stream type. - */ - STREAM; - } - - private final Map bytePosToXrefMap = new HashMap(); - private XrefTrailerObj curXrefTrailerObj = null; - private XrefTrailerObj resolvedXrefTrailer = null; - - /** Log instance. */ - private static final Log LOG = LogFactory.getLog( XrefTrailerResolver.class ); - - /** - * Returns the first trailer if at least one exists. - * - * @return the first trailer or null - */ - public final COSDictionary getFirstTrailer() - { - if (bytePosToXrefMap.isEmpty()) - { - return null; - } - Set offsets = bytePosToXrefMap.keySet(); - SortedSet sortedOffset = new TreeSet(offsets); - return bytePosToXrefMap.get(sortedOffset.first()).trailer; - } - - /** - * Returns the last trailer if at least one exists. - * - * @return the last trailer ir null - */ - public final COSDictionary getLastTrailer() - { - if (bytePosToXrefMap.isEmpty()) - { - return null; - } - Set offsets = bytePosToXrefMap.keySet(); - SortedSet sortedOffset = new TreeSet(offsets); - return bytePosToXrefMap.get(sortedOffset.last()).trailer; - } - - /** - * Signals that a new XRef object (table or stream) starts. - * @param startBytePos the offset to start at - * @param type the type of the Xref object - */ - public void nextXrefObj( final long startBytePos, XRefType type ) - { - bytePosToXrefMap.put( startBytePos, curXrefTrailerObj = new XrefTrailerObj() ); - curXrefTrailerObj.xrefType = type; - } - - /** - * Returns the XRefTxpe of the resolved trailer. - * - * @return the XRefType or null. - */ - public XRefType getXrefType() - { - return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefType; - } - - /** - * Populate XRef HashMap of current XRef object. - * Will add an Xreftable entry that maps ObjectKeys to byte offsets in the file. - * @param objKey The objkey, with id and gen numbers - * @param offset The byte offset in this file - */ - public void setXRef( COSObjectKey objKey, long offset ) - { - if ( curXrefTrailerObj == null ) - { - // should not happen... - LOG.warn( "Cannot add XRef entry for '" + objKey.getNumber() + "' because XRef start was not signalled." ); - return; - } - curXrefTrailerObj.xrefTable.put( objKey, offset ); - } - - /** - * Adds trailer information for current XRef object. - * - * @param trailer the current document trailer dictionary - */ - public void setTrailer( COSDictionary trailer ) - { - if ( curXrefTrailerObj == null ) - { - // should not happen... - LOG.warn( "Cannot add trailer because XRef start was not signalled." ); - return; - } - curXrefTrailerObj.trailer = trailer; - } - - /** - * Returns the trailer last set by {@link #setTrailer(COSDictionary)}. - * - * @return the current trailer. - * - */ - public COSDictionary getCurrentTrailer() - { - return curXrefTrailerObj.trailer; - } - - /** - * Sets the byte position of the first XRef - * (has to be called after very last startxref was read). - * This is used to resolve chain of active XRef/trailer. - * - * In case startxref position is not found we output a - * warning and use all XRef/trailer objects combined - * in byte position order. - * Thus for incomplete PDF documents with missing - * startxref one could call this method with parameter value -1. - * - * @param startxrefBytePosValue starting position of the first XRef - * - */ - public void setStartxref( long startxrefBytePosValue ) - { - if ( resolvedXrefTrailer != null ) - { - LOG.warn( "Method must be called only ones with last startxref value." ); - return; - } - - resolvedXrefTrailer = new XrefTrailerObj(); - resolvedXrefTrailer.trailer = new COSDictionary(); - - XrefTrailerObj curObj = bytePosToXrefMap.get( startxrefBytePosValue ); - List xrefSeqBytePos = new ArrayList(); - - if ( curObj == null ) - { - // no XRef at given position - LOG.warn( "Did not found XRef object at specified startxref position " + startxrefBytePosValue ); - - // use all objects in byte position order (last entries overwrite previous ones) - xrefSeqBytePos.addAll( bytePosToXrefMap.keySet() ); - Collections.sort( xrefSeqBytePos ); - } - else - { - // copy xref type - resolvedXrefTrailer.xrefType = curObj.xrefType; - // found starting Xref object - // add this and follow chain defined by 'Prev' keys - xrefSeqBytePos.add( startxrefBytePosValue ); - while ( curObj.trailer != null ) - { - long prevBytePos = curObj.trailer.getLong( COSName.PREV, -1L ); - if ( prevBytePos == -1 ) - { - break; - } - - curObj = bytePosToXrefMap.get( prevBytePos ); - if ( curObj == null ) - { - LOG.warn( "Did not found XRef object pointed to by 'Prev' key at position " + prevBytePos ); - break; - } - xrefSeqBytePos.add( prevBytePos ); - - // sanity check to prevent infinite loops - if ( xrefSeqBytePos.size() >= bytePosToXrefMap.size() ) - { - break; - } - } - // have to reverse order so that later XRefs will overwrite previous ones - Collections.reverse( xrefSeqBytePos ); - } - - // merge used and sorted XRef/trailer - for ( Long bPos : xrefSeqBytePos ) - { - curObj = bytePosToXrefMap.get( bPos ); - if ( curObj.trailer != null ) - { - resolvedXrefTrailer.trailer.addAll( curObj.trailer ); - } - resolvedXrefTrailer.xrefTable.putAll( curObj.xrefTable ); - } - - } - - /** - * Gets the resolved trailer. Might return null in case - * {@link #setStartxref(long)} was not called before. - * - * @return the trailer if available - */ - public COSDictionary getTrailer() - { - return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.trailer; - } - - /** - * Gets the resolved xref table. Might return null in case - * {@link #setStartxref(long)} was not called before. - * - * @return the xrefTable if available - */ - public Map getXrefTable() - { - return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefTable; - } - - /** Returns object numbers which are referenced as contained - * in object stream with specified object number. - * - * This will scan resolved xref table for all entries having negated - * stream object number as value. - * - * @param objstmObjNr object number of object stream for which contained object numbers - * should be returned - * - * @return set of object numbers referenced for given object stream - * or null if {@link #setStartxref(long)} was not - * called before so that no resolved xref table exists - */ - public Set getContainedObjectNumbers( final int objstmObjNr ) - { - if ( resolvedXrefTrailer == null ) - { - return null; - } - final Set refObjNrs = new HashSet(); - final long cmpVal = - objstmObjNr; - - for ( Entry xrefEntry : resolvedXrefTrailer.xrefTable.entrySet() ) - { - if ( xrefEntry.getValue() == cmpVal ) - { - refObjNrs.add( xrefEntry.getKey().getNumber() ); - } - } - return refObjNrs; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdfparser; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObjectKey; + +/** + * This class will collect all XRef/trailer objects and creates correct + * xref/trailer information after all objects are read using startxref + * and 'Prev' information (unused XRef/trailer objects are discarded). + * + * In case of missing startxref or wrong startxref pointer all + * XRef/trailer objects are used to create xref table / trailer dictionary + * in order they occur. + * + * For each new xref object/XRef stream method {@link #nextXrefObj(long, XRefType)} + * must be called with start byte position. All following calls to + * {@link #setXRef(COSObjectKey, long)} or {@link #setTrailer(COSDictionary)} + * will add the data for this byte position. + * + * After all objects are parsed the startxref position must be provided + * using {@link #setStartxref(long)}. This is used to build the chain of + * active xref/trailer objects used for creating document trailer and xref table. + * + * @author Timo Böhme + */ +public class XrefTrailerResolver +{ + + /** + * A class which represents a xref/trailer object. + */ + private static class XrefTrailerObj + { + protected COSDictionary trailer = null; + + private XRefType xrefType; + + private final Map xrefTable = new HashMap(); + + /** + * Default constructor. + */ + private XrefTrailerObj() + { + xrefType = XRefType.TABLE; + } + + public void reset() + { + xrefTable.clear(); + } + } + + /** + * The XRefType of a trailer. + */ + public enum XRefType + { + /** + * XRef table type. + */ + TABLE, + /** + * XRef stream type. + */ + STREAM + } + + private final Map bytePosToXrefMap = new HashMap(); + private XrefTrailerObj curXrefTrailerObj = null; + private XrefTrailerObj resolvedXrefTrailer = null; + + /** Log instance. */ + private static final Log LOG = LogFactory.getLog( XrefTrailerResolver.class ); + + /** + * Returns the first trailer if at least one exists. + * + * @return the first trailer or null + */ + public final COSDictionary getFirstTrailer() + { + if (bytePosToXrefMap.isEmpty()) + { + return null; + } + Set offsets = bytePosToXrefMap.keySet(); + SortedSet sortedOffset = new TreeSet(offsets); + return bytePosToXrefMap.get(sortedOffset.first()).trailer; + } + + /** + * Returns the last trailer if at least one exists. + * + * @return the last trailer ir null + */ + public final COSDictionary getLastTrailer() + { + if (bytePosToXrefMap.isEmpty()) + { + return null; + } + Set offsets = bytePosToXrefMap.keySet(); + SortedSet sortedOffset = new TreeSet(offsets); + return bytePosToXrefMap.get(sortedOffset.last()).trailer; + } + + /** + * Returns the count of trailers. + * + * @return the count of trailers. + */ + public final int getTrailerCount() + { + return bytePosToXrefMap.size(); + } + + /** + * Signals that a new XRef object (table or stream) starts. + * @param startBytePos the offset to start at + * @param type the type of the Xref object + */ + public void nextXrefObj( final long startBytePos, XRefType type ) + { + curXrefTrailerObj = new XrefTrailerObj(); + bytePosToXrefMap.put(startBytePos, curXrefTrailerObj); + curXrefTrailerObj.xrefType = type; + } + + /** + * Returns the XRefTxpe of the resolved trailer. + * + * @return the XRefType or null. + */ + public XRefType getXrefType() + { + return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefType; + } + + /** + * Populate XRef HashMap of current XRef object. + * Will add an Xreftable entry that maps ObjectKeys to byte offsets in the file. + * @param objKey The objkey, with id and gen numbers + * @param offset The byte offset in this file + */ + public void setXRef( COSObjectKey objKey, long offset ) + { + if ( curXrefTrailerObj == null ) + { + // should not happen... + LOG.warn( "Cannot add XRef entry for '" + objKey.getNumber() + "' because XRef start was not signalled." ); + return; + } + // PDFBOX-3506 check before adding to the map, to avoid entries from the table being + // overwritten by obsolete entries in hybrid files (/XRefStm entry) + if (!curXrefTrailerObj.xrefTable.containsKey(objKey) ) + { + curXrefTrailerObj.xrefTable.put(objKey, offset); + } + } + + /** + * Adds trailer information for current XRef object. + * + * @param trailer the current document trailer dictionary + */ + public void setTrailer( COSDictionary trailer ) + { + if ( curXrefTrailerObj == null ) + { + // should not happen... + LOG.warn( "Cannot add trailer because XRef start was not signalled." ); + return; + } + curXrefTrailerObj.trailer = trailer; + } + + /** + * Returns the trailer last set by {@link #setTrailer(COSDictionary)}. + * + * @return the current trailer. + * + */ + public COSDictionary getCurrentTrailer() + { + return curXrefTrailerObj.trailer; + } + + /** + * Sets the byte position of the first XRef + * (has to be called after very last startxref was read). + * This is used to resolve chain of active XRef/trailer. + * + * In case startxref position is not found we output a + * warning and use all XRef/trailer objects combined + * in byte position order. + * Thus for incomplete PDF documents with missing + * startxref one could call this method with parameter value -1. + * + * @param startxrefBytePosValue starting position of the first XRef + * + */ + public void setStartxref( long startxrefBytePosValue ) + { + if ( resolvedXrefTrailer != null ) + { + LOG.warn( "Method must be called only ones with last startxref value." ); + return; + } + + resolvedXrefTrailer = new XrefTrailerObj(); + resolvedXrefTrailer.trailer = new COSDictionary(); + + XrefTrailerObj curObj = bytePosToXrefMap.get( startxrefBytePosValue ); + List xrefSeqBytePos = new ArrayList(); + + if ( curObj == null ) + { + // no XRef at given position + LOG.warn( "Did not found XRef object at specified startxref position " + startxrefBytePosValue ); + + // use all objects in byte position order (last entries overwrite previous ones) + xrefSeqBytePos.addAll( bytePosToXrefMap.keySet() ); + Collections.sort( xrefSeqBytePos ); + } + else + { + // copy xref type + resolvedXrefTrailer.xrefType = curObj.xrefType; + // found starting Xref object + // add this and follow chain defined by 'Prev' keys + xrefSeqBytePos.add( startxrefBytePosValue ); + while ( curObj.trailer != null ) + { + long prevBytePos = curObj.trailer.getLong( COSName.PREV, -1L ); + if ( prevBytePos == -1 ) + { + break; + } + + curObj = bytePosToXrefMap.get( prevBytePos ); + if ( curObj == null ) + { + LOG.warn( "Did not found XRef object pointed to by 'Prev' key at position " + prevBytePos ); + break; + } + xrefSeqBytePos.add( prevBytePos ); + + // sanity check to prevent infinite loops + if ( xrefSeqBytePos.size() >= bytePosToXrefMap.size() ) + { + break; + } + } + // have to reverse order so that later XRefs will overwrite previous ones + Collections.reverse( xrefSeqBytePos ); + } + + // merge used and sorted XRef/trailer + for ( Long bPos : xrefSeqBytePos ) + { + curObj = bytePosToXrefMap.get( bPos ); + if ( curObj.trailer != null ) + { + resolvedXrefTrailer.trailer.addAll( curObj.trailer ); + } + resolvedXrefTrailer.xrefTable.putAll( curObj.xrefTable ); + } + + } + + /** + * Gets the resolved trailer. Might return null in case + * {@link #setStartxref(long)} was not called before. + * + * @return the trailer if available + */ + public COSDictionary getTrailer() + { + return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.trailer; + } + + /** + * Gets the resolved xref table. Might return null in case + * {@link #setStartxref(long)} was not called before. + * + * @return the xrefTable if available + */ + public Map getXrefTable() + { + return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefTable; + } + + /** Returns object numbers which are referenced as contained + * in object stream with specified object number. + * + * This will scan resolved xref table for all entries having negated + * stream object number as value. + * + * @param objstmObjNr object number of object stream for which contained object numbers + * should be returned + * + * @return set of object numbers referenced for given object stream + * or null if {@link #setStartxref(long)} was not + * called before so that no resolved xref table exists + */ + public Set getContainedObjectNumbers( final int objstmObjNr ) + { + if ( resolvedXrefTrailer == null ) + { + return null; + } + final Set refObjNrs = new HashSet(); + final long cmpVal = - objstmObjNr; + + for ( Entry xrefEntry : resolvedXrefTrailer.xrefTable.entrySet() ) + { + if ( xrefEntry.getValue() == cmpVal ) + { + refObjNrs.add( xrefEntry.getKey().getNumber() ); + } + } + return refObjNrs; + } + + /** + * Reset all data so that it can be used to rebuild the trailer. + * + */ + protected void reset() + { + for (XrefTrailerObj trailerObj : bytePosToXrefMap.values()) + { + trailerObj.reset(); + } + curXrefTrailerObj = null; + resolvedXrefTrailer = null; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/package.html index b78ef4d879c..35348cc4c2f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java index 3bc44f5328d..681ae354c82 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java @@ -49,7 +49,7 @@ public class COSStandardOutputStream extends FilterOutputStream private boolean onNewLine = false; /** - * COSOutputStream constructor comment. + * Constructor. * * @param out The underlying stream to write to. */ @@ -59,17 +59,31 @@ public COSStandardOutputStream(OutputStream out) } /** - * COSOutputStream constructor comment. + * Constructor. * * @param out The underlying stream to write to. * @param position The current position of output stream. + * @deprecated use {@link COSStandardOutputStream#COSStandardOutputStream(java.io.OutputStream, long) } */ + @Deprecated public COSStandardOutputStream(OutputStream out, int position) { super(out); this.position = position; } - + + /** + * Constructor. + * + * @param out The underlying stream to write to. + * @param position The current position of output stream. + */ + public COSStandardOutputStream(OutputStream out, long position) + { + super(out); + this.position = position; + } + /** * This will get the current position in the stream. * diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java index 204c4b98f6d..f5fdb503c31 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java @@ -16,7 +16,6 @@ */ package org.apache.pdfbox.pdfwriter; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.Closeable; import java.io.IOException; @@ -26,10 +25,12 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Deque; +import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; @@ -58,13 +59,13 @@ import org.apache.pdfbox.cos.COSUpdateInfo; import org.apache.pdfbox.cos.ICOSVisitor; import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.io.RandomAccessBuffer; import org.apache.pdfbox.io.RandomAccessInputStream; import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.pdfparser.PDFXRefStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.encryption.SecurityHandler; import org.apache.pdfbox.pdmodel.fdf.FDFDocument; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.COSFilterInputStream; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; import org.apache.pdfbox.util.Charsets; import org.apache.pdfbox.util.Hex; @@ -159,12 +160,12 @@ public class COSWriter implements ICOSVisitor, Closeable */ public static final byte[] ENDSTREAM = "endstream".getBytes(Charsets.US_ASCII); - private final NumberFormat formatXrefOffset = new DecimalFormat("0000000000"); + private final NumberFormat formatXrefOffset = new DecimalFormat("0000000000", + DecimalFormatSymbols.getInstance(Locale.US)); // the decimal format for the xref object generation number data - private final NumberFormat formatXrefGeneration = new DecimalFormat("00000"); - - private final NumberFormat formatDecimal = NumberFormat.getNumberInstance( Locale.US ); + private final NumberFormat formatXrefGeneration = new DecimalFormat("00000", + DecimalFormatSymbols.getInstance(Locale.US)); // the stream where we create the pdf output private OutputStream output; @@ -182,8 +183,10 @@ public class COSWriter implements ICOSVisitor, Closeable // these are used for indirect references in other objects //A hashtable is used on purpose over a hashmap //so that null entries will not get added. + @SuppressWarnings({"squid:S1149"}) private final Map objectKeys = new Hashtable(); - private final Map keyObject = new Hashtable(); + + private final Map keyObject = new HashMap(); // the list of x ref entries to be made so far private final List xRefEntries = new ArrayList(); @@ -211,69 +214,81 @@ public class COSWriter implements ICOSVisitor, Closeable // signing private boolean incrementalUpdate = false; private boolean reachedSignature = false; - private long signatureOffset, signatureLength; - private long byteRangeOffset, byteRangeLength; + private long signatureOffset; + private long signatureLength; + private long byteRangeOffset; + private long byteRangeLength; private RandomAccessRead incrementalInput; - private RandomAccessRead tempIncInput; private OutputStream incrementalOutput; private SignatureInterface signatureInterface; + private byte[] incrementPart; + private COSArray byteRangeArray; /** - * COSWriter constructor comment. + * COSWriter constructor. * - * @param os The wrapped output stream. + * @param outputStream The output stream to write the PDF. It will be closed when this object is + * closed. */ - public COSWriter(OutputStream os) + public COSWriter(OutputStream outputStream) { - super(); - setOutput(os); + setOutput(outputStream); setStandardOutput(new COSStandardOutputStream(output)); - formatDecimal.setMaximumFractionDigits( 10 ); - formatDecimal.setGroupingUsed( false ); - } - - /** - * COSWriter constructor for incremental updates. - * - * @param outputStream output stream where the new PDF data will be written - * @param inputStream input stream containing source PDF data - * - * @throws IOException if something went wrong - * @deprecated Use {@link #COSWriter(OutputStream, RandomAccessRead)} instead - */ - public COSWriter(OutputStream outputStream, InputStream inputStream) throws IOException - { - super(); - tempIncInput = new RandomAccessBuffer(inputStream); - initWriter(outputStream, tempIncInput); } /** - * COSWriter constructor for incremental updates. + * COSWriter constructor for incremental updates. There must be a path of objects that have + * {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document catalog. For + * signatures this is taken care by PDFBox itself. * - * @param outputStream output stream where the new PDF data will be written + * @param outputStream output stream where the new PDF data will be written. It will be closed + * when this object is closed. * @param inputData random access read containing source PDF data - * + * * @throws IOException if something went wrong */ public COSWriter(OutputStream outputStream, RandomAccessRead inputData) throws IOException - { - super(); - initWriter(outputStream, inputData); - } - - private void initWriter(OutputStream outputStream, RandomAccessRead inputData) throws IOException { // write to buffer instead of output setOutput(new ByteArrayOutputStream()); - setStandardOutput(new COSStandardOutputStream(output, (int)inputData.length())); + setStandardOutput(new COSStandardOutputStream(output, inputData.length())); incrementalInput = inputData; incrementalOutput = outputStream; incrementalUpdate = true; + } - formatDecimal.setMaximumFractionDigits( 10 ); - formatDecimal.setGroupingUsed( false ); + /** + * Constructor for incremental updates with a list of objects to write. This allows to + * include objects even if there is no path of objects that have + * {@link COSUpdateInfo#isNeedToBeUpdated()} set so the incremental update gets smaller. Only + * dictionaries are supported; if you need to update other objects classes, then add their + * parent dictionary. + * + * @param outputStream output stream where the new PDF data will be written. It will be closed + * when this object is closed. + * @param inputData random access read containing source PDF data. + * @param objectsToWrite objects that must be part of the incremental saving. + * @throws IOException if something went wrong + */ + public COSWriter(OutputStream outputStream, RandomAccessRead inputData, + Set objectsToWrite) throws IOException + { + // Implementation notes / summary of April 2019 comments in PDFBOX-45: + // we allow only COSDictionary in objectsToWrite because other types, + // especially COSArray, are written directly. If we'd allow them with the current + // COSWriter implementation, they would be written twice, + // once directly and once indirectly as orphan. + // One could improve visitFromArray and visitFromDictionary (see commit 1856891) + // to handle arrays like dictionaries so that arrays are written indirectly, + // but this produces very inefficient files. + // If there is ever a real need to update arrays, then a future implementation could + // recommit change 1856891 (also needs to move the byteRange position detection code) + // and also set isDirect in arrays to true by default, to avoid inefficient files. + // COSArray.setDirect(true) is called at some places in the current implementation for + // documentational purposes only. + this(outputStream, inputData); + this.objectsToWrite.addAll(objectsToWrite); } private void prepareIncrement(PDDocument doc) @@ -286,7 +301,7 @@ private void prepareIncrement(PDDocument doc) Map xrefTable = cosDoc.getXrefTable(); Set keySet = xrefTable.keySet(); - long highestNumber=0; + long highestNumber = doc.getDocument().getHighestXRefObjectNumber(); for ( COSObjectKey cosObjectKey : keySet ) { COSBase object = cosDoc.getObjectFromPool(cosObjectKey).getObject(); @@ -336,18 +351,10 @@ public void close() throws IOException { getStandardOutput().close(); } - if (getOutput() != null) - { - getOutput().close(); - } if (incrementalOutput != null) { incrementalOutput.close(); } - if (tempIncInput != null) - { - incrementalOutput.close(); - } } /** @@ -459,9 +466,9 @@ protected void setStartxref(long newStartxref) protected void doWriteBody(COSDocument doc) throws IOException { COSDictionary trailer = doc.getTrailer(); - COSDictionary root = (COSDictionary)trailer.getDictionaryObject( COSName.ROOT ); - COSDictionary info = (COSDictionary)trailer.getDictionaryObject( COSName.INFO ); - COSDictionary encrypt = (COSDictionary)trailer.getDictionaryObject( COSName.ENCRYPT ); + COSDictionary root = trailer.getCOSDictionary(COSName.ROOT); + COSDictionary info = trailer.getCOSDictionary(COSName.INFO); + COSDictionary encrypt = trailer.getCOSDictionary(COSName.ENCRYPT); if( root != null ) { addObjectToWrite( root ); @@ -471,18 +478,18 @@ protected void doWriteBody(COSDocument doc) throws IOException addObjectToWrite( info ); } - while( objectsToWrite.size() > 0 ) - { - COSBase nextObject = objectsToWrite.removeFirst(); - objectsToWriteSet.remove(nextObject); - doWriteObject( nextObject ); - } + doWriteObjects(); willEncrypt = false; if( encrypt != null ) { addObjectToWrite( encrypt ); } + doWriteObjects(); + } + + private void doWriteObjects() throws IOException + { while( objectsToWrite.size() > 0 ) { COSBase nextObject = objectsToWrite.removeFirst(); @@ -514,8 +521,7 @@ private void addObjectToWrite( COSBase object ) cosBase = keyObject.get(cosObjectKey); } if (actual != null && objectKeys.containsKey(actual) - && object instanceof COSUpdateInfo && !((COSUpdateInfo)object).isNeedToBeUpdated() - && cosBase instanceof COSUpdateInfo && !((COSUpdateInfo)cosBase).isNeedToBeUpdated() ) + && !isNeedToBeUpdated(object) && !isNeedToBeUpdated(cosBase)) { return; } @@ -528,6 +534,20 @@ private void addObjectToWrite( COSBase object ) } } + /** + * Convenience method, so that we get false for types that can't be updated. + * @param base + * @return + */ + private boolean isNeedToBeUpdated(COSBase base) + { + if (base instanceof COSUpdateInfo) + { + return ((COSUpdateInfo) base).isNeedToBeUpdated(); + } + return false; + } + /** * This will write a COS object. * @@ -538,15 +558,6 @@ private void addObjectToWrite( COSBase object ) public void doWriteObject( COSBase obj ) throws IOException { writtenObjects.add( obj ); - if(obj instanceof COSDictionary) - { - COSBase itemType = ((COSDictionary) obj).getItem(COSName.TYPE); - if (COSName.SIG.equals(itemType) || COSName.DOC_TIME_STAMP.equals(itemType)) - { - reachedSignature = true; - } - } - // find the physical reference currentObjectKey = getObjectKey( obj ); // add a x ref entry @@ -576,11 +587,11 @@ protected void doWriteHeader(COSDocument doc) throws IOException String headerString; if (fdfDocument != null) { - headerString = "%FDF-"+ Float.toString(fdfDocument.getDocument().getVersion()); + headerString = "%FDF-"+ doc.getVersion(); } else { - headerString = "%PDF-"+ Float.toString(pdDocument.getDocument().getVersion()); + headerString = "%PDF-"+ doc.getVersion(); } getStandardOutput().write( headerString.getBytes(Charsets.ISO_8859_1) ); @@ -619,7 +630,13 @@ protected void doWriteTrailer(COSDocument doc) throws IOException } // Remove a checksum if present trailer.removeItem( COSName.DOC_CHECKSUM ); - + + COSArray idArray = trailer.getCOSArray(COSName.ID); + if (idArray != null) + { + idArray.setDirect(true); + } + trailer.accept(this); } @@ -632,7 +649,7 @@ private void doWriteXRefInc(COSDocument doc, long hybridPrev) throws IOException // with data available here // create a new XRefStrema object - PDFXRefStream pdfxRefStream = new PDFXRefStream(); + PDFXRefStream pdfxRefStream = new PDFXRefStream(doc); // add all entries from the incremental update. List xRefEntries2 = getXRefEntries(); @@ -696,32 +713,33 @@ private void doWriteXRefTable() throws IOException int xRefLength = xRefRanges.length; int x = 0; int j = 0; - while (x < xRefLength && (xRefLength % 2) == 0) + if ((xRefLength % 2) == 0) { - writeXrefRange(xRefRanges[x], xRefRanges[x + 1]); - - for (int i = 0; i < xRefRanges[x + 1]; ++i) + while (x < xRefLength) { - writeXrefEntry(xRefEntries.get(j++)); + writeXrefRange(xRefRanges[x], xRefRanges[x + 1]); + + for (int i = 0; i < xRefRanges[x + 1]; ++i) + { + writeXrefEntry(xRefEntries.get(j++)); + } + x += 2; } - x += 2; } } /** - * Write an incremental update for a non signature case. This can be used for e.g. augmenting signatures. - * + * Write an incremental update for a non signature case. This can be used for e.g. augmenting + * signatures. + * * @throws IOException */ private void doWriteIncrement() throws IOException { - ByteArrayOutputStream byteOut = (ByteArrayOutputStream) output; - byteOut.flush(); - byte[] buffer = byteOut.toByteArray(); - SequenceInputStream signStream = new SequenceInputStream(new RandomAccessInputStream(incrementalInput), - new ByteArrayInputStream(buffer)); - // write the data to the incremental output stream - IOUtils.copy(signStream, incrementalOutput); + // write existing PDF + IOUtils.copy(new RandomAccessInputStream(incrementalInput), incrementalOutput); + // write the actual incremental update + incrementalOutput.write(((ByteArrayOutputStream) output).toByteArray()); } private void doWriteSignature() throws IOException @@ -733,56 +751,111 @@ private void doWriteSignature() throws IOException long afterLength = getStandardOutput().getPos() - (inLength + signatureLength) - (signatureOffset - inLength); String byteRange = "0 " + beforeLength + " " + afterOffset + " " + afterLength + "]"; - if (byteRangeLength - byteRange.length() < 0) + + // Assign the values to the actual COSArray, so that the user can access it before closing + byteRangeArray.set(0, COSInteger.ZERO); + byteRangeArray.set(1, COSInteger.get(beforeLength)); + byteRangeArray.set(2, COSInteger.get(afterOffset)); + byteRangeArray.set(3, COSInteger.get(afterLength)); + + if (byteRange.length() > byteRangeLength) { - throw new IOException("Can't write new ByteRange, not enough space"); + throw new IOException("Can't write new byteRange '" + byteRange + + "' not enough space: byteRange.length(): " + byteRange.length() + + ", byteRangeLength: " + byteRangeLength); } // copy the new incremental data into a buffer (e.g. signature dict, trailer) ByteArrayOutputStream byteOut = (ByteArrayOutputStream) output; byteOut.flush(); - byte[] buffer = byteOut.toByteArray(); + incrementPart = byteOut.toByteArray(); - // overwrite the ByteRange in the buffer + // overwrite the reserve ByteRange in the buffer byte[] byteRangeBytes = byteRange.getBytes(Charsets.ISO_8859_1); for (int i = 0; i < byteRangeLength; i++) { if (i >= byteRangeBytes.length) { - buffer[(int)(byteRangeOffset + i - inLength)] = 0x20; // SPACE + incrementPart[(int) (byteRangeOffset + i - inLength)] = 0x20; // SPACE } else { - buffer[(int)(byteRangeOffset + i - inLength)] = byteRangeBytes[i]; + incrementPart[(int) (byteRangeOffset + i - inLength)] = byteRangeBytes[i]; } } - // get only the incremental bytes to be signed (includes /ByteRange but not /Contents) - byte[] signBuffer = new byte[buffer.length - (int)signatureLength]; - int bufSignatureOffset = (int)(signatureOffset - inLength); - System.arraycopy(buffer, 0, signBuffer, 0, bufSignatureOffset); - System.arraycopy(buffer, bufSignatureOffset + (int)signatureLength, - signBuffer, bufSignatureOffset, buffer.length - bufSignatureOffset - (int)signatureLength); - - SequenceInputStream signStream = new SequenceInputStream(new RandomAccessInputStream(incrementalInput), - new ByteArrayInputStream(signBuffer)); - - // sign the bytes - byte[] sign = signatureInterface.sign(signStream); - String signature = new COSString(sign).toHexString(); - // substract 2 bytes because of the enclosing "<>" - if (signature.length() > signatureLength - 2) + if(signatureInterface != null) + { + // data to be signed + final InputStream dataToSign = getDataToSign(); + + // sign the bytes + byte[] signatureBytes = signatureInterface.sign(dataToSign); + writeExternalSignature(signatureBytes); + } + // else signature should created externally and set via writeSignature() + } + + /** + * Return the stream of PDF data to be signed. Clients should use this method only to create + * signatures externally. {@link #write(PDDocument)} method should have been called prior. + * The created signature should be set using {@link #writeExternalSignature(byte[])}. + *

+ * When {@link SignatureInterface} instance is used, COSWriter obtains and writes the signature itself. + *

+ * + * @return data stream to be signed + * @throws IllegalStateException if PDF is not prepared for external signing + * @throws IOException if input data is closed + */ + public InputStream getDataToSign() throws IOException + { + if (incrementPart == null || incrementalInput == null) + { + throw new IllegalStateException("PDF not prepared for signing"); + } + // range of incremental bytes to be signed (includes /ByteRange but not /Contents) + int incPartSigOffset = (int) (signatureOffset - incrementalInput.length()); + int afterSigOffset = incPartSigOffset + (int) signatureLength; + int[] range = {0, incPartSigOffset, + afterSigOffset, incrementPart.length - afterSigOffset}; + + return new SequenceInputStream( + new RandomAccessInputStream(incrementalInput), + new COSFilterInputStream(incrementPart, range)); + } + + /** + * Write externally created signature of PDF data obtained via {@link #getDataToSign()} method. + * + * @param cmsSignature CMS signature byte array + * @throws IllegalStateException if PDF is not prepared for external signing + * @throws IOException if source data stream is closed + */ + public void writeExternalSignature(byte[] cmsSignature) throws IOException { + + if (incrementPart == null || incrementalInput == null) + { + throw new IllegalStateException("PDF not prepared for setting signature"); + } + byte[] signatureBytes = Hex.getBytes(cmsSignature); + + // subtract 2 bytes because of the enclosing "<>" + if (signatureBytes.length > signatureLength - 2) { throw new IOException("Can't write signature, not enough space"); } // overwrite the signature Contents in the buffer - byte[] signatureBytes = signature.getBytes(Charsets.ISO_8859_1); - System.arraycopy(signatureBytes, 0, buffer, bufSignatureOffset + 1, signatureBytes.length); + int incPartSigOffset = (int) (signatureOffset - incrementalInput.length()); + System.arraycopy(signatureBytes, 0, incrementPart, incPartSigOffset + 1, signatureBytes.length); // write the data to the incremental output stream IOUtils.copy(new RandomAccessInputStream(incrementalInput), incrementalOutput); - incrementalOutput.write(buffer); + incrementalOutput.write(incrementPart); + + // prevent further use + incrementPart = null; } private void writeXrefRange(long x, long y) throws IOException @@ -829,9 +902,9 @@ protected Long[] getXRefRanges(List xRefEntriesList) long count = 1; List list = new ArrayList(); - for( Object object : xRefEntriesList ) + for (COSWriterXRefEntry object : xRefEntriesList) { - long nr = (int) ((COSWriterXRefEntry) object).getKey().getNumber(); + long nr = object.getKey().getNumber(); if (nr == last + 1) { ++count; @@ -872,15 +945,13 @@ private COSObjectKey getObjectKey( COSBase obj ) { actual = ((COSObject)obj).getObject(); } - COSObjectKey key = null; - if( actual != null ) + // PDFBOX-4540: because objectKeys is accessible from outside, it is possible + // that a COSObject obj is already in the objectKeys map. + COSObjectKey key = objectKeys.get(obj); + if( key == null && actual != null ) { key = objectKeys.get(actual); } - if( key == null ) - { - key = objectKeys.get(obj); - } if (key == null) { setNumber(getNumber()+1); @@ -917,8 +988,12 @@ public Object visitFromArray( COSArray obj ) throws IOException else if( current instanceof COSObject ) { COSBase subValue = ((COSObject)current).getObject(); - if (incrementalUpdate || subValue instanceof COSDictionary || subValue == null) + if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary || subValue == null) { + // PDFBOX-4308: added willEncrypt to prevent an object + // that is referenced several times from being written + // direct and indirect, thus getting encrypted + // with wrong object number or getting encrypted twice addObjectToWrite( current ); writeReference( current ); } @@ -963,6 +1038,14 @@ public Object visitFromBoolean(COSBoolean obj) throws IOException @Override public Object visitFromDictionary(COSDictionary obj) throws IOException { + if (!reachedSignature) + { + COSBase itemType = obj.getItem(COSName.TYPE); + if (COSName.SIG.equals(itemType) || COSName.DOC_TIME_STAMP.equals(itemType)) + { + reachedSignature = true; + } + } getStandardOutput().write(DICT_OPEN); getStandardOutput().writeEOL(); for (Map.Entry entry : obj.entrySet()) @@ -979,13 +1062,14 @@ public Object visitFromDictionary(COSDictionary obj) throws IOException if (!incrementalUpdate) { // write all XObjects as direct objects, this will save some size + // PDFBOX-3684: but avoid dictionary that references itself COSBase item = dict.getItem(COSName.XOBJECT); - if (item != null) + if (item != null && !COSName.XOBJECT.equals(entry.getKey())) { item.setDirect(true); } item = dict.getItem(COSName.RESOURCES); - if (item != null) + if (item != null && !COSName.RESOURCES.equals(entry.getKey())) { item.setDirect(true); } @@ -1006,8 +1090,12 @@ public Object visitFromDictionary(COSDictionary obj) throws IOException else if( value instanceof COSObject ) { COSBase subValue = ((COSObject)value).getObject(); - if (incrementalUpdate || subValue instanceof COSDictionary || subValue == null) + if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary || subValue == null) { + // PDFBOX-4308: added willEncrypt to prevent an object + // that is referenced several times from being written + // direct and indirect, thus getting encrypted + // with wrong object number or getting encrypted twice addObjectToWrite( value ); writeReference( value ); } @@ -1028,6 +1116,7 @@ else if( value instanceof COSObject ) } else if(reachedSignature && COSName.BYTERANGE.equals(entry.getKey())) { + byteRangeArray = (COSArray) entry.getValue(); byteRangeOffset = getStandardOutput().getPos() + 1; value.accept(this); byteRangeLength = getStandardOutput().getPos() - 1 - byteRangeOffset; @@ -1099,14 +1188,16 @@ public Object visitFromDocument(COSDocument doc) throws IOException getStandardOutput().write(EOF); getStandardOutput().writeEOL(); - if(incrementalUpdate) + if (incrementalUpdate) { - if (signatureOffset == 0 || byteRangeOffset == 0) - { - doWriteIncrement(); - } else { - doWriteSignature(); - } + if (signatureOffset == 0 || byteRangeOffset == 0) + { + doWriteIncrement(); + } + else + { + doWriteSignature(); + } } return null; @@ -1218,7 +1309,9 @@ public void write(COSDocument doc) throws IOException } /** - * This will write the pdf document. + * This will write the pdf document. If signature should be created externally, + * {@link #writeExternalSignature(byte[])} should be invoked to set signature after calling this + * method. * * @param doc The document to write. * @@ -1230,10 +1323,13 @@ public void write(PDDocument doc) throws IOException } /** - * This will write the pdf document. + * This will write the pdf document. If signature should be created externally, + * {@link #writeExternalSignature(byte[])} should be invoked to set signature after calling this + * method. * * @param doc The document to write. - * @param signInterface class to be used for signing + * @param signInterface class to be used for signing; {@code null} if external signing would be + * performed or there will be no signing at all * * @throws IOException If an error occurs while generating the data. * @throws IllegalStateException If the document has an encryption dictionary but no protection @@ -1286,8 +1382,17 @@ public void write(PDDocument doc, SignatureInterface signInterface) throws IOExc COSDocument cosDoc = pdDocument.getDocument(); COSDictionary trailer = cosDoc.getTrailer(); - COSArray idArray = (COSArray)trailer.getDictionaryObject( COSName.ID ); + COSArray idArray = null; boolean missingID = true; + COSBase base = trailer.getDictionaryObject(COSName.ID); + if (base instanceof COSArray) + { + idArray = (COSArray) base; + if (idArray.size() == 2) + { + missingID = false; + } + } // check for an existing documentID if (idArray != null && idArray.size() == 2) { @@ -1310,13 +1415,12 @@ public void write(PDDocument doc, SignatureInterface signInterface) throws IOExc // we don't have path or size, so do the best we can md5.update( Long.toString(idTime).getBytes(Charsets.ISO_8859_1) ); - COSDictionary info = (COSDictionary)trailer.getDictionaryObject( COSName.INFO ); + COSDictionary info = trailer.getCOSDictionary(COSName.INFO); if( info != null ) { - Iterator values = info.getValues().iterator(); - while( values.hasNext() ) + for (COSBase cosBase : info.getValues()) { - md5.update(values.next().toString().getBytes(Charsets.ISO_8859_1)); + md5.update(cosBase.toString().getBytes(Charsets.ISO_8859_1)); } } // reuse origin documentID if available as first value @@ -1326,7 +1430,7 @@ public void write(PDDocument doc, SignatureInterface signInterface) throws IOExc idArray = new COSArray(); idArray.add( firstID ); idArray.add( secondID ); - trailer.setItem( COSName.ID, idArray ); + trailer.setItem(COSName.ID, idArray); } cosDoc.accept(this); } @@ -1380,19 +1484,22 @@ private static void writeString(byte[] bytes, boolean forceHex, OutputStream out { // check for non-ASCII characters boolean isASCII = true; - for (byte b : bytes) + if (!forceHex) { - // if the byte is negative then it is an eight bit byte and is outside the ASCII range - if (b < 0) - { - isASCII = false; - break; - } - // PDFBOX-3107 EOL markers within a string are troublesome - if (b == 0x0d || b == 0x0a) + for (byte b : bytes) { - isASCII = false; - break; + // if the byte is negative then it is an eight bit byte and is outside the ASCII range + if (b < 0) + { + isASCII = false; + break; + } + // PDFBOX-3107 EOL markers within a string are troublesome + if (b == 0x0d || b == 0x0a) + { + isASCII = false; + break; + } } } @@ -1412,6 +1519,7 @@ private static void writeString(byte[] bytes, boolean forceHex, OutputStream out break; default: output.write(b); + break; } } output.write(')'); @@ -1420,10 +1528,7 @@ private static void writeString(byte[] bytes, boolean forceHex, OutputStream out { // write hex string output.write('<'); - for (byte b : bytes) - { - output.write(Hex.getBytes(b)); - } + Hex.writeHexBytes(bytes, output); output.write('>'); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriterXRefEntry.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriterXRefEntry.java index 58a069c36f1..f441291701a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriterXRefEntry.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriterXRefEntry.java @@ -40,6 +40,20 @@ public class COSWriterXRefEntry implements Comparable NULLENTRY.setFree(true); } + /** + * Constructor. + * + * @param start The start attribute. + * @param obj The COS object that this entry represents. + * @param keyValue The key to the COS object. + */ + public COSWriterXRefEntry(long start, COSBase obj, COSObjectKey keyValue) + { + setOffset(start); + setObject(obj); + setKey(keyValue); + } + /** * {@inheritDoc} */ @@ -131,21 +145,6 @@ public final void setOffset(long newOffset) offset = newOffset; } - /** - * COSWriterXRefEntry constructor comment. - * - * @param start The start attribute. - * @param obj The COS object that this entry represents. - * @param keyValue The key to the COS object. - */ - public COSWriterXRefEntry(long start, COSBase obj, COSObjectKey keyValue) - { - super(); - setOffset(start); - setObject(obj); - setKey(keyValue); - } - /** * This will get the object. * diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java index d7038d9a1b6..849ffd595a7 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java @@ -21,6 +21,7 @@ import java.util.List; import java.util.Map; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSBoolean; @@ -28,6 +29,7 @@ import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.util.Charsets; @@ -102,7 +104,7 @@ public void writeTokens(Object... tokens) throws IOException * @param tokens The tokens to write to the stream. * @throws IOException If there is an error writing to the stream. */ - public void writeTokens( List tokens ) throws IOException + public void writeTokens( List tokens ) throws IOException { for (Object token : tokens) { @@ -143,11 +145,10 @@ else if( o instanceof COSArray ) output.write(COSWriter.ARRAY_OPEN); for( int i=0; i - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAbstractContentStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAbstractContentStream.java new file mode 100644 index 00000000000..cbcc2bbc422 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAbstractContentStream.java @@ -0,0 +1,1665 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel; + +import java.awt.Color; +import java.awt.geom.AffineTransform; +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStream; +import java.text.NumberFormat; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Locale; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.contentstream.operator.OperatorName; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.pdfwriter.COSWriter; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceN; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.graphics.color.PDICCBased; +import org.apache.pdfbox.pdmodel.graphics.color.PDPattern; +import org.apache.pdfbox.pdmodel.graphics.color.PDSeparation; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage; +import org.apache.pdfbox.pdmodel.graphics.shading.PDShading; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; +import org.apache.pdfbox.util.Charsets; +import org.apache.pdfbox.util.Matrix; +import org.apache.pdfbox.util.NumberFormatUtil; + +/** + * Provides the ability to write to a content stream. + * + * @author Ben Litchfield + */ +abstract class PDAbstractContentStream implements Closeable +{ + private static final Log LOG = LogFactory.getLog(PDAbstractContentStream.class); + + protected final PDDocument document; // may be null + + protected final OutputStream outputStream; + protected final PDResources resources; + + protected boolean inTextMode = false; + protected final Deque fontStack = new ArrayDeque(); + + protected final Deque nonStrokingColorSpaceStack = new ArrayDeque(); + protected final Deque strokingColorSpaceStack = new ArrayDeque(); + + // number format + private final NumberFormat formatDecimal = NumberFormat.getNumberInstance(Locale.US); + private final byte[] formatBuffer = new byte[32]; + + /** + * Create a new appearance stream. + * + * @param document may be null + * @param outputStream The appearances output stream to write to. + * @param resources The resources to use + */ + PDAbstractContentStream(PDDocument document, OutputStream outputStream, PDResources resources) + { + this.document = document; + this.outputStream = outputStream; + this.resources = resources; + + formatDecimal.setMaximumFractionDigits(4); + formatDecimal.setGroupingUsed(false); + } + + /** + * Sets the maximum number of digits allowed for fractional numbers. + * + * @see NumberFormat#setMaximumFractionDigits(int) + * @param fractionDigitsNumber + */ + protected void setMaximumFractionDigits(int fractionDigitsNumber) + { + formatDecimal.setMaximumFractionDigits(fractionDigitsNumber); + } + + /** + * Begin some text operations. + * + * @throws IOException If there is an error writing to the stream or if you attempt to + * nest beginText calls. + * @throws IllegalStateException If the method was not allowed to be called at this time. + */ + public void beginText() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: Nested beginText() calls are not allowed."); + } + writeOperator(OperatorName.BEGIN_TEXT); + inTextMode = true; + } + + /** + * End some text operations. + * + * @throws IOException If there is an error writing to the stream or if you attempt to + * nest endText calls. + * @throws IllegalStateException If the method was not allowed to be called at this time. + */ + public void endText() throws IOException + { + if (!inTextMode) + { + throw new IllegalStateException("Error: You must call beginText() before calling endText."); + } + writeOperator(OperatorName.END_TEXT); + inTextMode = false; + } + + /** + * Set the font and font size to draw text with. + * + * @param font The font to use. + * @param fontSize The font size to draw the text. + * @throws IOException If there is an error writing the font information. + */ + public void setFont(PDFont font, float fontSize) throws IOException + { + if (fontStack.isEmpty()) + { + fontStack.add(font); + } + else + { + fontStack.pop(); + fontStack.push(font); + } + + // keep track of fonts which are configured for subsetting + if (font.willBeSubset()) + { + if (document != null) + { + document.getFontsToSubset().add(font); + } + else + { + LOG.warn("Using the subsetted font '" + font.getName() + + "' without a PDDocument context; call subset() before saving"); + } + } + + writeOperand(resources.add(font)); + writeOperand(fontSize); + writeOperator(OperatorName.SET_FONT_AND_SIZE); + } + + /** + * Shows the given text at the location specified by the current text matrix with the given + * interspersed positioning. This allows the user to efficiently position each glyph or sequence + * of glyphs. + * + * @param textWithPositioningArray An array consisting of String and Float types. Each String is + * output to the page using the current text matrix. Using the default coordinate system, each + * interspersed number adjusts the current text matrix by translating to the left or down for + * horizontal and vertical text respectively. The number is expressed in thousands of a text + * space unit, and may be negative. + * + * @throws IOException if an io exception occurs. + */ + public void showTextWithPositioning(Object[] textWithPositioningArray) throws IOException + { + write("["); + for (Object obj : textWithPositioningArray) + { + if (obj instanceof String) + { + showTextInternal((String) obj); + } + else if (obj instanceof Float) + { + writeOperand((Float) obj); + } + else + { + throw new IllegalArgumentException("Argument must consist of array of Float and String types"); + } + } + write("] "); + writeOperator(OperatorName.SHOW_TEXT_ADJUSTED); + } + + /** + * Shows the given text at the location specified by the current text matrix. + * + * @param text The Unicode text to show. + * @throws IOException If an io exception occurs. + * @throws IllegalArgumentException if a character isn't supported by the current font + */ + public void showText(String text) throws IOException + { + showTextInternal(text); + write(" "); + writeOperator(OperatorName.SHOW_TEXT); + } + + /** + * Outputs a string using the correct encoding and subsetting as required. + * + * @param text The Unicode text to show. + * + * @throws IOException If an io exception occurs. + */ + protected void showTextInternal(String text) throws IOException + { + if (!inTextMode) + { + throw new IllegalStateException("Must call beginText() before showText()"); + } + + if (fontStack.isEmpty()) + { + throw new IllegalStateException("Must call setFont() before showText()"); + } + + PDFont font = fontStack.peek(); + + // complex text layout + byte[] encodedText = null; + + if (encodedText == null) + { + encodedText = font.encode(text); + } + + // Unicode code points to keep when subsetting + if (font.willBeSubset()) + { + int offset = 0; + while (offset < text.length()) + { + int codePoint = text.codePointAt(offset); + font.addToSubset(codePoint); + offset += Character.charCount(codePoint); + } + } + + COSWriter.writeString(encodedText, outputStream); + } + + /** + * Sets the text leading. + * + * @param leading The leading in unscaled text units. + * @throws IOException If there is an error writing to the stream. + */ + public void setLeading(float leading) throws IOException + { + writeOperand(leading); + writeOperator(OperatorName.SET_TEXT_LEADING); + } + + /** + * Move to the start of the next line of text. Requires the leading (see {@link #setLeading}) + * to have been set. + * + * @throws IOException If there is an error writing to the stream. + */ + public void newLine() throws IOException + { + if (!inTextMode) + { + throw new IllegalStateException("Must call beginText() before newLine()"); + } + writeOperator(OperatorName.NEXT_LINE); + } + + /** + * The Td operator. + * Move to the start of the next line, offset from the start of the current line by (tx, ty). + * + * @param tx The x translation. + * @param ty The y translation. + * @throws IOException If there is an error writing to the stream. + * @throws IllegalStateException If the method was not allowed to be called at this time. + */ + public void newLineAtOffset(float tx, float ty) throws IOException + { + if (!inTextMode) + { + throw new IllegalStateException("Error: must call beginText() before newLineAtOffset()"); + } + writeOperand(tx); + writeOperand(ty); + writeOperator(OperatorName.MOVE_TEXT); + } + + /** + * The Tm operator. Sets the text matrix to the given values. + * A current text matrix will be replaced with the new one. + * + * @param matrix the transformation matrix + * @throws IOException If there is an error writing to the stream. + * @throws IllegalStateException If the method was not allowed to be called at this time. + */ + public void setTextMatrix(Matrix matrix) throws IOException + { + if (!inTextMode) + { + throw new IllegalStateException("Error: must call beginText() before setTextMatrix"); + } + writeAffineTransform(matrix.createAffineTransform()); + writeOperator(OperatorName.SET_MATRIX); + } + + /** + * Draw an image at the x,y coordinates, with the default size of the image. + * + * @param image The image to draw. + * @param x The x-coordinate to draw the image. + * @param y The y-coordinate to draw the image. + * + * @throws IOException If there is an error writing to the stream. + */ + public void drawImage(PDImageXObject image, float x, float y) throws IOException + { + drawImage(image, x, y, image.getWidth(), image.getHeight()); + } + + /** + * Draw an image at the x,y coordinates, with the given size. + * + * @param image The image to draw. + * @param x The x-coordinate to draw the image. + * @param y The y-coordinate to draw the image. + * @param width The width to draw the image. + * @param height The height to draw the image. + * + * @throws IOException If there is an error writing to the stream. + * @throws IllegalStateException If the method was called within a text block. + */ + public void drawImage(PDImageXObject image, float x, float y, float width, float height) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: drawImage is not allowed within a text block."); + } + + saveGraphicsState(); + + AffineTransform transform = new AffineTransform(width, 0, 0, height, x, y); + transform(new Matrix(transform)); + + writeOperand(resources.add(image)); + writeOperator(OperatorName.DRAW_OBJECT); + + restoreGraphicsState(); + } + + /** + * Draw an image at the origin with the given transformation matrix. + * + * @param image The image to draw. + * @param matrix The transformation matrix to apply to the image. + * + * @throws IOException If there is an error writing to the stream. + * @throws IllegalStateException If the method was called within a text block. + */ + public void drawImage(PDImageXObject image, Matrix matrix) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: drawImage is not allowed within a text block."); + } + + saveGraphicsState(); + + AffineTransform transform = matrix.createAffineTransform(); + transform(new Matrix(transform)); + + writeOperand(resources.add(image)); + writeOperator(OperatorName.DRAW_OBJECT); + + restoreGraphicsState(); + } + + /** + * Draw an inline image at the x,y coordinates, with the default size of the image. + * + * @param inlineImage The inline image to draw. + * @param x The x-coordinate to draw the inline image. + * @param y The y-coordinate to draw the inline image. + * + * @throws IOException If there is an error writing to the stream. + */ + public void drawImage(PDInlineImage inlineImage, float x, float y) throws IOException + { + drawImage(inlineImage, x, y, inlineImage.getWidth(), inlineImage.getHeight()); + } + + /** + * Draw an inline image at the x,y coordinates and a certain width and height. + * + * @param inlineImage The inline image to draw. + * @param x The x-coordinate to draw the inline image. + * @param y The y-coordinate to draw the inline image. + * @param width The width of the inline image to draw. + * @param height The height of the inline image to draw. + * + * @throws IOException If there is an error writing to the stream. + * @throws IllegalStateException If the method was called within a text block. + */ + public void drawImage(PDInlineImage inlineImage, float x, float y, float width, float height) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: drawImage is not allowed within a text block."); + } + + saveGraphicsState(); + transform(new Matrix(width, 0, 0, height, x, y)); + + // create the image dictionary + StringBuilder sb = new StringBuilder(); + sb.append(OperatorName.BEGIN_INLINE_IMAGE); + + sb.append("\n /W "); + sb.append(inlineImage.getWidth()); + + sb.append("\n /H "); + sb.append(inlineImage.getHeight()); + + sb.append("\n /CS "); + sb.append("/"); + sb.append(inlineImage.getColorSpace().getName()); + + COSArray decodeArray = inlineImage.getDecode(); + if (decodeArray != null && decodeArray.size() > 0) + { + sb.append("\n /D "); + sb.append("["); + for (COSBase base : decodeArray) + { + sb.append(((COSNumber) base).intValue()); + sb.append(" "); + } + sb.append("]"); + } + + if (inlineImage.isStencil()) + { + sb.append("\n /IM true"); + } + + sb.append("\n /BPC "); + sb.append(inlineImage.getBitsPerComponent()); + + // image dictionary + write(sb.toString()); + writeLine(); + + // binary data + writeOperator(OperatorName.BEGIN_INLINE_IMAGE_DATA); + writeBytes(inlineImage.getData()); + writeLine(); + writeOperator(OperatorName.END_INLINE_IMAGE); + + restoreGraphicsState(); + } + + /** + * Draws the given Form XObject at the current location. + * + * @param form Form XObject + * @throws IOException if the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void drawForm(PDFormXObject form) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: drawForm is not allowed within a text block."); + } + + writeOperand(resources.add(form)); + writeOperator(OperatorName.DRAW_OBJECT); + } + + /** + * The cm operator. Concatenates the given matrix with the current transformation matrix (CTM), + * which maps user space coordinates used within a PDF content stream into output device + * coordinates. More details on coordinates can be found in the PDF 32000 specification, 8.3.2 + * Coordinate Spaces. + * + * @param matrix the transformation matrix + * @throws IOException If there is an error writing to the stream. + */ + public void transform(Matrix matrix) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: Modifying the current transformation matrix is not allowed within text objects."); + } + + writeAffineTransform(matrix.createAffineTransform()); + writeOperator(OperatorName.CONCAT); + } + + /** + * q operator. Saves the current graphics state. + * @throws IOException If an error occurs while writing to the stream. + */ + public void saveGraphicsState() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: Saving the graphics state is not allowed within text objects."); + } + + if (!fontStack.isEmpty()) + { + fontStack.push(fontStack.peek()); + } + if (!strokingColorSpaceStack.isEmpty()) + { + strokingColorSpaceStack.push(strokingColorSpaceStack.peek()); + } + if (!nonStrokingColorSpaceStack.isEmpty()) + { + nonStrokingColorSpaceStack.push(nonStrokingColorSpaceStack.peek()); + } + writeOperator(OperatorName.SAVE); + } + + /** + * Q operator. Restores the current graphics state. + * @throws IOException If an error occurs while writing to the stream. + */ + public void restoreGraphicsState() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: Restoring the graphics state is not allowed within text objects."); + } + + if (!fontStack.isEmpty()) + { + fontStack.pop(); + } + if (!strokingColorSpaceStack.isEmpty()) + { + strokingColorSpaceStack.pop(); + } + if (!nonStrokingColorSpaceStack.isEmpty()) + { + nonStrokingColorSpaceStack.pop(); + } + writeOperator(OperatorName.RESTORE); + } + + protected COSName getName(PDColorSpace colorSpace) + { + if (colorSpace instanceof PDDeviceGray || + colorSpace instanceof PDDeviceRGB || + colorSpace instanceof PDDeviceCMYK) + { + return COSName.getPDFName(colorSpace.getName()); + } + else + { + return resources.add(colorSpace); + } + } + + /** + * Sets the stroking color and, if necessary, the stroking color space. + * + * @param color Color in a specific color space. + * @throws IOException If an IO error occurs while writing to the stream. + */ + public void setStrokingColor(PDColor color) throws IOException + { + if (strokingColorSpaceStack.isEmpty() || + strokingColorSpaceStack.peek() != color.getColorSpace()) + { + writeOperand(getName(color.getColorSpace())); + writeOperator(OperatorName.STROKING_COLORSPACE); + setStrokingColorSpaceStack(color.getColorSpace()); + } + + for (float value : color.getComponents()) + { + writeOperand(value); + } + + if (color.getColorSpace() instanceof PDPattern) + { + writeOperand(color.getPatternName()); + } + + if (color.getColorSpace() instanceof PDPattern || + color.getColorSpace() instanceof PDSeparation || + color.getColorSpace() instanceof PDDeviceN || + color.getColorSpace() instanceof PDICCBased) + { + writeOperator(OperatorName.STROKING_COLOR_N); + } + else + { + writeOperator(OperatorName.STROKING_COLOR); + } + } + + /** + * Set the stroking color using an AWT color. Conversion uses the default sRGB color space. + * + * @param color The color to set. + * @throws IOException If an IO error occurs while writing to the stream. + */ + public void setStrokingColor(Color color) throws IOException + { + float[] components = new float[] { + color.getRed() / 255f, color.getGreen() / 255f, color.getBlue() / 255f }; + PDColor pdColor = new PDColor(components, PDDeviceRGB.INSTANCE); + setStrokingColor(pdColor); + } + + /** + * Set the stroking color in the DeviceRGB color space. Range is 0..1. + * + * @param r The red value + * @param g The green value. + * @param b The blue value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameters are invalid. + */ + public void setStrokingColor(float r, float g, float b) throws IOException + { + if (isOutsideOneInterval(r) || isOutsideOneInterval(g) || isOutsideOneInterval(b)) + { + throw new IllegalArgumentException("Parameters must be within 0..1, but are " + + String.format("(%.2f,%.2f,%.2f)", r, g, b)); + } + writeOperand(r); + writeOperand(g); + writeOperand(b); + writeOperator(OperatorName.STROKING_COLOR_RGB); + setStrokingColorSpaceStack(PDDeviceRGB.INSTANCE); + } + + /** + * Set the stroking color in the DeviceRGB color space. Range is 0..255. + * + * @param r The red value + * @param g The green value. + * @param b The blue value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameters are invalid. + * @deprecated use + * {@link #setStrokingColor(float, float, float) setStrokingColor(r/255f, g/255f, b/255f)} + */ + @Deprecated + public void setStrokingColor(int r, int g, int b) throws IOException + { + if (isOutside255Interval(r) || isOutside255Interval(g) || isOutside255Interval(b)) + { + throw new IllegalArgumentException("Parameters must be within 0..255, but are " + + String.format("(%d,%d,%d)", r, g, b)); + } + setStrokingColor(r / 255f, g / 255f, b / 255f); + } + + /** + * Set the stroking color in the DeviceCMYK color space. Range is 0..1 + * + * @param c The cyan value. + * @param m The magenta value. + * @param y The yellow value. + * @param k The black value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameters are invalid. + */ + public void setStrokingColor(float c, float m, float y, float k) throws IOException + { + if (isOutsideOneInterval(c) || isOutsideOneInterval(m) || isOutsideOneInterval(y) || isOutsideOneInterval(k)) + { + throw new IllegalArgumentException("Parameters must be within 0..1, but are " + + String.format("(%.2f,%.2f,%.2f,%.2f)", c, m, y, k)); + } + writeOperand(c); + writeOperand(m); + writeOperand(y); + writeOperand(k); + writeOperator(OperatorName.STROKING_COLOR_CMYK); + setStrokingColorSpaceStack(PDDeviceCMYK.INSTANCE); + } + + /** + * Set the stroking color in the DeviceGray color space. Range is 0..1. + * + * @param g The gray value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameter is invalid. + */ + public void setStrokingColor(float g) throws IOException + { + if (isOutsideOneInterval(g)) + { + throw new IllegalArgumentException("Parameter must be within 0..1, but is " + g); + } + writeOperand(g); + writeOperator(OperatorName.STROKING_COLOR_GRAY); + setStrokingColorSpaceStack(PDDeviceGray.INSTANCE); + } + + /** + * Sets the non-stroking color and, if necessary, the non-stroking color space. + * + * @param color Color in a specific color space. + * @throws IOException If an IO error occurs while writing to the stream. + */ + public void setNonStrokingColor(PDColor color) throws IOException + { + if (nonStrokingColorSpaceStack.isEmpty() || + nonStrokingColorSpaceStack.peek() != color.getColorSpace()) + { + writeOperand(getName(color.getColorSpace())); + writeOperator(OperatorName.NON_STROKING_COLORSPACE); + setNonStrokingColorSpaceStack(color.getColorSpace()); + } + + for (float value : color.getComponents()) + { + writeOperand(value); + } + + if (color.getColorSpace() instanceof PDPattern) + { + writeOperand(color.getPatternName()); + } + + if (color.getColorSpace() instanceof PDPattern || + color.getColorSpace() instanceof PDSeparation || + color.getColorSpace() instanceof PDDeviceN || + color.getColorSpace() instanceof PDICCBased) + { + writeOperator(OperatorName.NON_STROKING_COLOR_N); + } + else + { + writeOperator(OperatorName.NON_STROKING_COLOR); + } + } + + /** + * Set the non-stroking color using an AWT color. Conversion uses the default sRGB color space. + * + * @param color The color to set. + * @throws IOException If an IO error occurs while writing to the stream. + */ + public void setNonStrokingColor(Color color) throws IOException + { + float[] components = new float[] { + color.getRed() / 255f, color.getGreen() / 255f, color.getBlue() / 255f }; + PDColor pdColor = new PDColor(components, PDDeviceRGB.INSTANCE); + setNonStrokingColor(pdColor); + } + + /** + * Set the non-stroking color in the DeviceRGB color space. Range is 0..255. + * + * @param r The red value. + * @param g The green value. + * @param b The blue value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameters are invalid. + */ + public void setNonStrokingColor(float r, float g, float b) throws IOException + { + if (isOutsideOneInterval(r) || isOutsideOneInterval(g) || isOutsideOneInterval(b)) + { + throw new IllegalArgumentException("Parameters must be within 0..1, but are " + + String.format("(%.2f,%.2f,%.2f)", r, g, b)); + } + writeOperand(r); + writeOperand(g); + writeOperand(b); + writeOperator(OperatorName.NON_STROKING_RGB); + setNonStrokingColorSpaceStack(PDDeviceRGB.INSTANCE); + } + + /** + * Set the non stroking color in the DeviceRGB color space. Range is 0..255. + * + * @param r The red value + * @param g The green value. + * @param b The blue value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameters are invalid. + * @deprecated use + * {@link #setNonStrokingColor(float, float, float) setNonStrokingColor(r/255f, g/255f, b/255f)} + */ + @Deprecated + public void setNonStrokingColor(int r, int g, int b) throws IOException + { + if (isOutside255Interval(r) || isOutside255Interval(g) || isOutside255Interval(b)) + { + throw new IllegalArgumentException("Parameters must be within 0..255, but are " + + String.format("(%d,%d,%d)", r, g, b)); + } + setNonStrokingColor(r / 255f, g / 255f, b / 255f); + } + + /** + * Set the non-stroking color in the DeviceCMYK color space. Range is 0..255. + * + * @param c The cyan value. + * @param m The magenta value. + * @param y The yellow value. + * @param k The black value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameters are invalid. + */ + public void setNonStrokingColor(int c, int m, int y, int k) throws IOException + { + if (isOutside255Interval(c) || isOutside255Interval(m) || isOutside255Interval(y) || isOutside255Interval(k)) + { + throw new IllegalArgumentException("Parameters must be within 0..255, but are " + + String.format("(%d,%d,%d,%d)", c, m, y, k)); + } + setNonStrokingColor(c / 255f, m / 255f, y / 255f, k / 255f); + } + + /** + * Set the non-stroking color in the DeviceCMYK color space. Range is 0..1. + * + * @param c The cyan value. + * @param m The magenta value. + * @param y The yellow value. + * @param k The black value. + * @throws IOException If an IO error occurs while writing to the stream. + */ + public void setNonStrokingColor(float c, float m, float y, float k) throws IOException + { + if (isOutsideOneInterval(c) || isOutsideOneInterval(m) || isOutsideOneInterval(y) || isOutsideOneInterval(k)) + { + throw new IllegalArgumentException("Parameters must be within 0..1, but are " + + String.format("(%.2f,%.2f,%.2f,%.2f)", c, m, y, k)); + } + writeOperand(c); + writeOperand(m); + writeOperand(y); + writeOperand(k); + writeOperator(OperatorName.NON_STROKING_CMYK); + setNonStrokingColorSpaceStack(PDDeviceCMYK.INSTANCE); + } + + /** + * Set the non-stroking color in the DeviceGray color space. Range is 0..255. + * + * @param g The gray value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameter is invalid. + * @deprecated use {@link #setNonStrokingColor(float) setNonStrokingColor(g/255f)} + */ + public void setNonStrokingColor(int g) throws IOException + { + if (isOutside255Interval(g)) + { + throw new IllegalArgumentException("Parameter must be within 0..255, but is " + g); + } + setNonStrokingColor(g / 255f); + } + + /** + * Set the non-stroking color in the DeviceGray color space. Range is 0..1. + * + * @param g The gray value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameter is invalid. + */ + public void setNonStrokingColor(float g) throws IOException + { + if (isOutsideOneInterval(g)) + { + throw new IllegalArgumentException("Parameter must be within 0..1, but is " + g); + } + writeOperand(g); + writeOperator(OperatorName.NON_STROKING_GRAY); + setNonStrokingColorSpaceStack(PDDeviceGray.INSTANCE); + } + + /** + * Add a rectangle to the current path. + * + * @param x The lower left x coordinate. + * @param y The lower left y coordinate. + * @param width The width of the rectangle. + * @param height The height of the rectangle. + * @throws IOException If the content stream could not be written. + * @throws IllegalStateException If the method was called within a text block. + */ + public void addRect(float x, float y, float width, float height) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: addRect is not allowed within a text block."); + } + writeOperand(x); + writeOperand(y); + writeOperand(width); + writeOperand(height); + writeOperator(OperatorName.APPEND_RECT); + } + + /** + * Append a cubic Bézier curve to the current path. The curve extends from the current point to + * the point (x3, y3), using (x1, y1) and (x2, y2) as the Bézier control points. + * + * @param x1 x coordinate of the point 1 + * @param y1 y coordinate of the point 1 + * @param x2 x coordinate of the point 2 + * @param y2 y coordinate of the point 2 + * @param x3 x coordinate of the point 3 + * @param y3 y coordinate of the point 3 + * @throws IOException If the content stream could not be written. + * @throws IllegalStateException If the method was called within a text block. + */ + public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: curveTo is not allowed within a text block."); + } + writeOperand(x1); + writeOperand(y1); + writeOperand(x2); + writeOperand(y2); + writeOperand(x3); + writeOperand(y3); + writeOperator(OperatorName.CURVE_TO); + } + + /** + * Append a cubic Bézier curve to the current path. The curve extends from the current point to + * the point (x3, y3), using the current point and (x2, y2) as the Bézier control points. + * + * @param x2 x coordinate of the point 2 + * @param y2 y coordinate of the point 2 + * @param x3 x coordinate of the point 3 + * @param y3 y coordinate of the point 3 + * @throws IllegalStateException If the method was called within a text block. + * @throws IOException If the content stream could not be written. + */ + public void curveTo2(float x2, float y2, float x3, float y3) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: curveTo2 is not allowed within a text block."); + } + writeOperand(x2); + writeOperand(y2); + writeOperand(x3); + writeOperand(y3); + writeOperator(OperatorName.CURVE_TO_REPLICATE_INITIAL_POINT); + } + + /** + * Append a cubic Bézier curve to the current path. The curve extends from the current point to + * the point (x3, y3), using (x1, y1) and (x3, y3) as the Bézier control points. + * + * @param x1 x coordinate of the point 1 + * @param y1 y coordinate of the point 1 + * @param x3 x coordinate of the point 3 + * @param y3 y coordinate of the point 3 + * @throws IOException If the content stream could not be written. + * @throws IllegalStateException If the method was called within a text block. + */ + public void curveTo1(float x1, float y1, float x3, float y3) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: curveTo1 is not allowed within a text block."); + } + writeOperand(x1); + writeOperand(y1); + writeOperand(x3); + writeOperand(y3); + writeOperator(OperatorName.CURVE_TO_REPLICATE_FINAL_POINT); + } + + /** + * Move the current position to the given coordinates. + * + * @param x The x coordinate. + * @param y The y coordinate. + * @throws IOException If the content stream could not be written. + * @throws IllegalStateException If the method was called within a text block. + */ + public void moveTo(float x, float y) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: moveTo is not allowed within a text block."); + } + writeOperand(x); + writeOperand(y); + writeOperator(OperatorName.MOVE_TO); + } + + /** + * Draw a line from the current position to the given coordinates. + * + * @param x The x coordinate. + * @param y The y coordinate. + * @throws IOException If the content stream could not be written. + * @throws IllegalStateException If the method was called within a text block. + */ + public void lineTo(float x, float y) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: lineTo is not allowed within a text block."); + } + writeOperand(x); + writeOperand(y); + writeOperator(OperatorName.LINE_TO); + } + + /** + * Stroke the path. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void stroke() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: stroke is not allowed within a text block."); + } + writeOperator(OperatorName.STROKE_PATH); + } + + /** + * Close and stroke the path. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void closeAndStroke() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: closeAndStroke is not allowed within a text block."); + } + writeOperator(OperatorName.CLOSE_AND_STROKE); + } + + /** + * Fills the path using the nonzero winding number rule. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void fill() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: fill is not allowed within a text block."); + } + writeOperator(OperatorName.FILL_NON_ZERO); + } + + /** + * Fills the path using the even-odd winding rule. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void fillEvenOdd() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: fillEvenOdd is not allowed within a text block."); + } + writeOperator(OperatorName.FILL_EVEN_ODD); + } + + /** + * Fill and then stroke the path, using the nonzero winding number rule to determine the region + * to fill. This shall produce the same result as constructing two identical path objects, + * painting the first with {@link #fill() } and the second with {@link #stroke() }. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void fillAndStroke() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: fillAndStroke is not allowed within a text block."); + } + writeOperator(OperatorName.FILL_NON_ZERO_AND_STROKE); + } + + /** + * Fill and then stroke the path, using the even-odd rule to determine the region to + * fill. This shall produce the same result as constructing two identical path objects, painting + * the first with {@link #fillEvenOdd() } and the second with {@link #stroke() }. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void fillAndStrokeEvenOdd() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: fillAndStrokeEvenOdd is not allowed within a text block."); + } + writeOperator(OperatorName.FILL_EVEN_ODD_AND_STROKE); + } + + /** + * Close, fill, and then stroke the path, using the nonzero winding number rule to determine the + * region to fill. This shall have the same effect as the sequence {@link #closePath() } + * and then {@link #fillAndStroke() }. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void closeAndFillAndStroke() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: closeAndFillAndStroke is not allowed within a text block."); + } + writeOperator(OperatorName.CLOSE_FILL_NON_ZERO_AND_STROKE); + } + + /** + * Close, fill, and then stroke the path, using the even-odd rule to determine the region to + * fill. This shall have the same effect as the sequence {@link #closePath() } + * and then {@link #fillAndStrokeEvenOdd() }. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void closeAndFillAndStrokeEvenOdd() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: closeAndFillAndStrokeEvenOdd is not allowed within a text block."); + } + writeOperator(OperatorName.CLOSE_FILL_EVEN_ODD_AND_STROKE); + } + + /** + * Fills the clipping area with the given shading. + * + * @param shading Shading resource + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void shadingFill(PDShading shading) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: shadingFill is not allowed within a text block."); + } + + writeOperand(resources.add(shading)); + writeOperator(OperatorName.SHADING_FILL); + } + + /** + * Closes the current subpath. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void closePath() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: closePath is not allowed within a text block."); + } + writeOperator(OperatorName.CLOSE_PATH); + } + + /** + * Intersects the current clipping path with the current path, using the nonzero rule. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void clip() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: clip is not allowed within a text block."); + } + writeOperator(OperatorName.CLIP_NON_ZERO); + + // end path without filling or stroking + writeOperator(OperatorName.ENDPATH); + } + + /** + * Intersects the current clipping path with the current path, using the even-odd rule. + * + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void clipEvenOdd() throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: clipEvenOdd is not allowed within a text block."); + } + writeOperator(OperatorName.CLIP_EVEN_ODD); + + // end path without filling or stroking + writeOperator(OperatorName.ENDPATH); + } + + /** + * Set line width to the given value. + * + * @param lineWidth The width which is used for drawing. + * @throws IOException If the content stream could not be written + * @throws IllegalStateException If the method was called within a text block. + */ + public void setLineWidth(float lineWidth) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: setLineWidth is not allowed within a text block."); + } + writeOperand(lineWidth); + writeOperator(OperatorName.SET_LINE_WIDTH); + } + + /** + * Set the line join style. + * + * @param lineJoinStyle 0 for miter join, 1 for round join, and 2 for bevel join. + * @throws IOException If the content stream could not be written. + * @throws IllegalStateException If the method was called within a text block. + * @throws IllegalArgumentException If the parameter is not a valid line join style. + */ + public void setLineJoinStyle(int lineJoinStyle) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: setLineJoinStyle is not allowed within a text block."); + } + if (lineJoinStyle >= 0 && lineJoinStyle <= 2) + { + writeOperand(lineJoinStyle); + writeOperator(OperatorName.SET_LINE_JOINSTYLE); + } + else + { + throw new IllegalArgumentException("Error: unknown value for line join style"); + } + } + + /** + * Set the line cap style. + * + * @param lineCapStyle 0 for butt cap, 1 for round cap, and 2 for projecting square cap. + * @throws IOException If the content stream could not be written. + * @throws IllegalStateException If the method was called within a text block. + * @throws IllegalArgumentException If the parameter is not a valid line cap style. + */ + public void setLineCapStyle(int lineCapStyle) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: setLineCapStyle is not allowed within a text block."); + } + if (lineCapStyle >= 0 && lineCapStyle <= 2) + { + writeOperand(lineCapStyle); + writeOperator(OperatorName.SET_LINE_CAPSTYLE); + } + else + { + throw new IllegalArgumentException("Error: unknown value for line cap style"); + } + } + + /** + * Set the line dash pattern. + * + * @param pattern The pattern array + * @param phase The phase of the pattern + * @throws IOException If the content stream could not be written. + * @throws IllegalStateException If the method was called within a text block. + */ + public void setLineDashPattern(float[] pattern, float phase) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: setLineDashPattern is not allowed within a text block."); + } + write("["); + for (float value : pattern) + { + writeOperand(value); + } + write("] "); + writeOperand(phase); + writeOperator(OperatorName.SET_LINE_DASHPATTERN); + } + + /** + * Set the miter limit. + * + * @param miterLimit the new miter limit. + * @throws IOException If the content stream could not be written. + */ + public void setMiterLimit(float miterLimit) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: setMiterLimit is not allowed within a text block."); + } + if (miterLimit <= 0.0) + { + throw new IllegalArgumentException("A miter limit <= 0 is invalid and will not render in Acrobat Reader"); + } + writeOperand(miterLimit); + writeOperator(OperatorName.SET_LINE_MITERLIMIT); + } + + /** + * Begin a marked content sequence. + * + * @param tag the tag + * @throws IOException If the content stream could not be written + */ + public void beginMarkedContent(COSName tag) throws IOException + { + writeOperand(tag); + writeOperator(OperatorName.BEGIN_MARKED_CONTENT); + } + + /** + * Begin a marked content sequence with a reference to an entry in the page resources' + * Properties dictionary. + * + * @param tag the tag + * @param propertyList property list + * @throws IOException If the content stream could not be written + */ + public void beginMarkedContent(COSName tag, PDPropertyList propertyList) throws IOException + { + writeOperand(tag); + writeOperand(resources.add(propertyList)); + writeOperator(OperatorName.BEGIN_MARKED_CONTENT_SEQ); + } + + /** + * End a marked content sequence. + * + * @throws IOException If the content stream could not be written + */ + public void endMarkedContent() throws IOException + { + writeOperator(OperatorName.END_MARKED_CONTENT); + } + + /** + * Set an extended graphics state. + * + * @param state The extended graphics state. + * @throws IOException If the content stream could not be written. + */ + public void setGraphicsStateParameters(PDExtendedGraphicsState state) throws IOException + { + writeOperand(resources.add(state)); + writeOperator(OperatorName.SET_GRAPHICS_STATE_PARAMS); + } + + /** + * Write a comment line. + * + * @param comment + * @throws IOException If the content stream could not be written. + * @throws IllegalArgumentException If the comment contains a newline. This is not allowed, + * because the next line could be ordinary PDF content. + */ + public void addComment(String comment) throws IOException + { + if (comment.indexOf('\n') >= 0 || comment.indexOf('\r') >= 0) + { + throw new IllegalArgumentException("comment should not include a newline"); + } + outputStream.write('%'); + outputStream.write(comment.getBytes(Charsets.US_ASCII)); + outputStream.write('\n'); + } + + /** + * Writes a real number to the content stream. + * @param real + * @throws java.io.IOException + * @throws IllegalArgumentException if the parameter is not a finite number + */ + protected void writeOperand(float real) throws IOException + { + if (Float.isInfinite(real) || Float.isNaN(real)) + { + throw new IllegalArgumentException(real + " is not a finite number"); + } + + int byteCount = NumberFormatUtil.formatFloatFast(real, formatDecimal.getMaximumFractionDigits(), formatBuffer); + + if (byteCount == -1) + { + //Fast formatting failed + write(formatDecimal.format(real)); + } + else + { + outputStream.write(formatBuffer, 0, byteCount); + } + outputStream.write(' '); + } + + /** + * Writes an integer number to the content stream. + * @param integer + * @throws java.io.IOException + */ + protected void writeOperand(int integer) throws IOException + { + write(formatDecimal.format(integer)); + outputStream.write(' '); + } + + /** + * Writes a COSName to the content stream. + * @param name + * @throws java.io.IOException + */ + protected void writeOperand(COSName name) throws IOException + { + name.writePDF(outputStream); + outputStream.write(' '); + } + + /** + * Writes a string to the content stream as ASCII. + * @param text + * @throws java.io.IOException + */ + protected void writeOperator(String text) throws IOException + { + outputStream.write(text.getBytes(Charsets.US_ASCII)); + outputStream.write('\n'); + } + + /** + * Writes a string to the content stream as ASCII. + * @param text + * @throws java.io.IOException + */ + protected void write(String text) throws IOException + { + outputStream.write(text.getBytes(Charsets.US_ASCII)); + } + + /** + * Writes a byte[] to the content stream. + * @param data + * @throws java.io.IOException + */ + protected void write(byte[] data) throws IOException + { + outputStream.write(data); + } + + /** + * Writes a newline to the content stream as ASCII. + * @throws java.io.IOException + */ + protected void writeLine() throws IOException + { + outputStream.write('\n'); + } + + /** + * Writes binary data to the content stream. + * @param data + * @throws java.io.IOException + */ + protected void writeBytes(byte[] data) throws IOException + { + outputStream.write(data); + } + + /** + * Writes an AffineTransform to the content stream as an array. + */ + private void writeAffineTransform(AffineTransform transform) throws IOException + { + double[] values = new double[6]; + transform.getMatrix(values); + for (double v : values) + { + writeOperand((float) v); + } + } + + /** + * Close the content stream. This must be called when you are done with this object. + * + * @throws IOException If the underlying stream has a problem being written to. + */ + @Override + public void close() throws IOException + { + if (inTextMode) + { + LOG.warn("You did not call endText(), some viewers won't display your text"); + } + outputStream.close(); + } + + protected boolean isOutside255Interval(int val) + { + return val < 0 || val > 255; + } + + private boolean isOutsideOneInterval(double val) + { + return val < 0 || val > 1; + } + + protected void setStrokingColorSpaceStack(PDColorSpace colorSpace) + { + if (strokingColorSpaceStack.isEmpty()) + { + strokingColorSpaceStack.add(colorSpace); + } + else + { + strokingColorSpaceStack.pop(); + strokingColorSpaceStack.push(colorSpace); + } + } + + protected void setNonStrokingColorSpaceStack(PDColorSpace colorSpace) + { + if (nonStrokingColorSpaceStack.isEmpty()) + { + nonStrokingColorSpaceStack.add(colorSpace); + } + else + { + nonStrokingColorSpaceStack.pop(); + nonStrokingColorSpaceStack.push(colorSpace); + } + } + + /** + * Set the character spacing. The value shall be added to the horizontal or vertical component + * of the glyph's displacement, depending on the writing mode. + * + * @param spacing character spacing + * @throws IOException If the content stream could not be written. + */ + public void setCharacterSpacing(float spacing) throws IOException + { + writeOperand(spacing); + writeOperator(OperatorName.SET_CHAR_SPACING); + } + + /** + * Set the word spacing. The value shall be added to the horizontal or vertical component of the + * ASCII SPACE character, depending on the writing mode. + *

+ * This will have an effect only with Type1 and TrueType fonts, not with Type0 fonts. The PDF + * specification tells why: "Word spacing shall be applied to every occurrence of the + * single-byte character code 32 in a string when using a simple font or a composite font that + * defines code 32 as a single-byte code. It shall not apply to occurrences of the byte value 32 + * in multiple-byte codes." + * + * @param spacing word spacing + * @throws IOException If the content stream could not be written. + */ + public void setWordSpacing(float spacing) throws IOException + { + writeOperand(spacing); + writeOperator(OperatorName.SET_WORD_SPACING); + } + + /** + * Set the horizontal scaling to scale / 100. + * + * @param scale number specifying the percentage of the normal width. Default value: 100 (normal + * width). + * @throws IOException If the content stream could not be written. + */ + public void setHorizontalScaling(float scale) throws IOException + { + writeOperand(scale); + writeOperator(OperatorName.SET_TEXT_HORIZONTAL_SCALING); + } + + /** + * Set the text rendering mode. This determines whether showing text shall cause glyph outlines + * to be stroked, filled, used as a clipping boundary, or some combination of the three. + * + * @param rm The text rendering mode. + * @throws IOException If the content stream could not be written. + */ + public void setRenderingMode(RenderingMode rm) throws IOException + { + writeOperand(rm.intValue()); + writeOperator(OperatorName.SET_TEXT_RENDERINGMODE); + } + + /** + * Set the text rise value, i.e. move the baseline up or down. This is useful for drawing + * superscripts or subscripts. + * + * @param rise Specifies the distance, in unscaled text space units, to move the baseline up or + * down from its default location. 0 restores the default location. + * @throws IOException + */ + public void setTextRise(float rise) throws IOException + { + writeOperand(rise); + writeOperator(OperatorName.SET_TEXT_RISE); + } + +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAppearanceContentStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAppearanceContentStream.java new file mode 100644 index 00000000000..6a834305b53 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAppearanceContentStream.java @@ -0,0 +1,279 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel; + +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.pdfbox.contentstream.operator.OperatorName; +import org.apache.pdfbox.cos.COSArray; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; + +/** + * Provides the ability to write to an appearance content stream. + * + * @author Ben Litchfield + */ +public final class PDAppearanceContentStream extends PDAbstractContentStream implements Closeable +{ + /** + * Create a new appearance stream. + * + * @param appearance + * The appearance stream to write to. + * @throws IOException If there is an error writing to the content stream. + */ + public PDAppearanceContentStream(PDAppearanceStream appearance) throws IOException + { + this(appearance, appearance.getStream().createOutputStream()); + } + + /** + * Create a new appearance stream. + * + * @param appearance The appearance stream to write to. + * @param compress whether the content stream is to be compressed. Set this to true when + * creating long content streams. + * @throws IOException If there is an error writing to the content stream. + */ + public PDAppearanceContentStream(PDAppearanceStream appearance, boolean compress) throws IOException + { + this(appearance, appearance.getStream().createOutputStream(compress ? COSName.FLATE_DECODE : null)); + } + + /** + * Create a new appearance stream. + * + * @param appearance + * The appearance stream to add to. + * @param outputStream + * The appearances output stream to write to. + */ + public PDAppearanceContentStream(PDAppearanceStream appearance, OutputStream outputStream) + { + super(null, outputStream, appearance.getResources()); + } + + /** + * Set the stroking color. + * + *

+ * The command is only emitted if the color is not null and the number of + * components is > 0. + * + * @param color The colorspace to write. + * @throws IOException If there is an error writing to the content stream. + * @see PDAbstractContentStream#setStrokingColor(PDColor) + */ + public boolean setStrokingColorOnDemand(PDColor color) throws IOException + { + if (color != null) + { + float[] components = color.getComponents(); + if (components.length > 0) + { + setStrokingColor(components); + return true; + } + } + return false; + } + + /** + * Set the stroking color. + * + * @see PDAbstractContentStream#setStrokingColor(java.awt.Color) + * @param components + * the color components dependent on the color space being used. + * @throws IOException If there is an error writing to the content stream. + */ + public void setStrokingColor(float[] components) throws IOException + { + for (float value : components) + { + writeOperand(value); + } + + int numComponents = components.length; + switch (numComponents) + { + case 1: + writeOperator(OperatorName.STROKING_COLOR_GRAY); + break; + case 3: + writeOperator(OperatorName.STROKING_COLOR_RGB); + break; + case 4: + writeOperator(OperatorName.STROKING_COLOR_CMYK); + break; + default: + break; + } + //TODO shouldn't we set the stack? + //Or call the appropriate setStrokingColor() method from the base class? + } + + /** + * Set the non stroking color. + * + *

+ * The command is only emitted if the color is not null and the number of + * components is > 0. + * + * @param color The colorspace to write. + * @throws IOException If there is an error writing to the content stream. + * @see PDAbstractContentStream#setNonStrokingColor(PDColor) + */ + public boolean setNonStrokingColorOnDemand(PDColor color) throws IOException + { + if (color != null) + { + float[] components = color.getComponents(); + if (components.length > 0) + { + setNonStrokingColor(components); + return true; + } + } + return false; + } + + /** + * Set the non stroking color. + * + * @see PDAbstractContentStream#setNonStrokingColor(java.awt.Color) + * @param components + * the color components dependent on the color space being used. + * @throws IOException If there is an error writing to the content stream. + */ + public void setNonStrokingColor(float[] components) throws IOException + { + for (float value : components) + { + writeOperand(value); + } + + int numComponents = components.length; + switch (numComponents) + { + case 1: + writeOperator(OperatorName.NON_STROKING_GRAY); + break; + case 3: + writeOperator(OperatorName.NON_STROKING_RGB); + break; + case 4: + writeOperator(OperatorName.NON_STROKING_CMYK); + break; + default: + break; + } + //TODO shouldn't we set the stack? + //Or call the appropriate setNonStrokingColor() method from the base class? + } + + /** + * Convenience method for annotations: sets the line with and dash style. + * + * @param lineWidth The line width. + * @param bs The border style, may be null. + * @param border The border array, must have at least three entries. This is + * only used if the border style is null. + * + * @throws IOException If there is an error writing to the content stream. + */ + public void setBorderLine(float lineWidth, PDBorderStyleDictionary bs, + COSArray border) throws IOException + { + // Can't use PDBorderStyleDictionary.getDashStyle() as + // this will return a default dash style if non is existing + if (bs != null && bs.getCOSObject().containsKey(COSName.D) && + bs.getStyle().equals(PDBorderStyleDictionary.STYLE_DASHED)) + { + setLineDashPattern(bs.getDashStyle().getDashArray(), 0); + } + else if (bs == null && border.size() > 3 && border.getObject(3) instanceof COSArray) + { + setLineDashPattern(((COSArray) border.getObject(3)).toFloatArray(), 0); + } + setLineWidthOnDemand(lineWidth); + } + + /** + * Sets the line width. The command is only emitted if the lineWidth is + * different to 1. + * + * @param lineWidth the line width of the path. + * @throws IOException If there is an error writing to the content stream. + * @see PDAbstractContentStream#setLineWidth(float) + */ + public void setLineWidthOnDemand(float lineWidth) throws IOException + { + // Acrobat doesn't write a line width command + // for a line width of 1 as this is default. + // Will do the same. + if (!(Math.abs(lineWidth - 1) < 1e-6)) + { + setLineWidth(lineWidth); + } + } + + /** + * Draw a shape. + * + *

+ * Dependent on the lineWidth and whether or not there is a background to be generated there are + * different commands to be used for draw a shape. + * + * @param lineWidth the line width of the path. + * @param hasStroke shall there be a stroking color. + * @param hasFill shall there be a fill color. + * @throws IOException If there is an error writing to the content stream. + */ + public void drawShape(float lineWidth, boolean hasStroke, boolean hasFill) throws IOException + { + // initial setting if stroking shall be done + boolean resolvedHasStroke = hasStroke; + + // no stroking for very small lines + if (lineWidth < 1e-6) + { + resolvedHasStroke = false; + } + if (hasFill && resolvedHasStroke) + { + fillAndStroke(); + } + else if (resolvedHasStroke) + { + stroke(); + } + else if (hasFill) + { + fill(); + } + else + { + writeOperator(OperatorName.ENDPATH); + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDestinationNameTreeNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDestinationNameTreeNode.java index 30aeaa0cc17..9544ed726be 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDestinationNameTreeNode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDestinationNameTreeNode.java @@ -64,7 +64,7 @@ protected PDPageDestination convertCOSToPD( COSBase base ) throws IOException } @Override - protected PDNameTreeNode createChildNode( COSDictionary dic ) + protected PDNameTreeNode createChildNode( COSDictionary dic ) { return new PDDestinationNameTreeNode(dic); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java index fb3662fe4b3..cc2983213fb 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java @@ -16,6 +16,10 @@ */ package org.apache.pdfbox.pdmodel; +import java.awt.Point; +import java.awt.image.DataBuffer; +import java.awt.image.Raster; +import java.awt.image.WritableRaster; import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.File; @@ -24,18 +28,23 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.ttf.TrueTypeFont; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.cos.COSUpdateInfo; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.io.RandomAccessBuffer; @@ -48,16 +57,22 @@ import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; +import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; import org.apache.pdfbox.pdmodel.encryption.PDEncryption; import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy; import org.apache.pdfbox.pdmodel.encryption.SecurityHandler; import org.apache.pdfbox.pdmodel.encryption.SecurityHandlerFactory; import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.ExternalSigningSupport; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SigningSupport; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; import org.apache.pdfbox.pdmodel.interactive.form.PDField; import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; @@ -70,8 +85,44 @@ */ public class PDDocument implements Closeable { + /** + * For signing: large reserve byte range used as placeholder in the saved PDF until the actual + * length of the PDF is known. You'll need to fetch (with + * {@link PDSignature#getByteRange()} ) and reassign this yourself (with + * {@link PDSignature#setByteRange(int[])} ) only if you call + * {@link #saveIncrementalForExternalSigning(java.io.OutputStream) saveIncrementalForExternalSigning()} + * twice. + */ + private static final int[] RESERVE_BYTE_RANGE = new int[] { 0, 1000000000, 1000000000, 1000000000 }; + private static final Log LOG = LogFactory.getLog(PDDocument.class); + /** + * avoid concurrency issues with PDDeviceRGB and deadlock in COSNumber/COSInteger + */ + static + { + try + { + WritableRaster raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, 1, 1, 3, new Point(0, 0)); + PDDeviceRGB.INSTANCE.toRGBImage(raster); + } + catch (IOException ex) + { + LOG.debug("voodoo error", ex); + } + + try + { + COSNumber.get("0"); + COSNumber.get("1"); + } + catch (IOException ex) + { + // + } + } + private final COSDocument document; // cached values @@ -97,13 +148,22 @@ public class PDDocument implements Closeable // fonts to subset before saving private final Set fontsToSubset = new HashSet(); - + + // fonts to close when closing document + private final Set fontsToClose = new HashSet(); + // Signature interface private SignatureInterface signInterface; - + + // helper class used to create external signature + private SigningSupport signingSupport; + // document-wide cached resources private ResourceCache resourceCache = new DefaultResourceCache(); - + + // to make sure only one signature is added + private boolean signatureAdded = false; + /** * Creates an empty PDF document. * You need to add at least one page for the document to be valid. @@ -159,6 +219,42 @@ public PDDocument(MemoryUsageSetting memUsageSetting) pages.setItem(COSName.COUNT, COSInteger.ZERO); } + /** + * Constructor that uses an existing document. The COSDocument that is passed in must be valid. + * + * @param doc The COSDocument that this document wraps. + */ + public PDDocument(COSDocument doc) + { + this(doc, null); + } + + /** + * Constructor that uses an existing document. The COSDocument that is passed in must be valid. + * + * @param doc The COSDocument that this document wraps. + * @param source the parser which is used to read the pdf + */ + public PDDocument(COSDocument doc, RandomAccessRead source) + { + this(doc, source, null); + } + + /** + * Constructor that uses an existing document. The COSDocument that is passed in must be valid. + * + * @param doc The COSDocument that this document wraps. + * @param source the parser which is used to read the pdf + * @param permission he access permissions of the pdf + * + */ + public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission) + { + document = doc; + pdfSource = source; + accessPermission = permission; + } + /** * This will add a page to the document. This is a convenience method, that will add the page to the root of the * hierarchy and set the parent of the page to the root. @@ -171,11 +267,54 @@ public void addPage(PDPage page) } /** - * Add a signature. + * Add parameters of signature to be created externally using default signature options. See + * {@link #saveIncrementalForExternalSigning(OutputStream)} method description on external + * signature creation scenario details. + *

+ * Only one signature may be added in a document. To sign several times, + * load document, add signature, save incremental and close again. + * + * @param sigObject is the PDSignatureField model + * @throws IOException if there is an error creating required fields + * @throws IllegalStateException if one attempts to add several signature + * fields. + */ + public void addSignature(PDSignature sigObject) throws IOException + { + addSignature(sigObject, new SignatureOptions()); + } + + /** + * Add parameters of signature to be created externally. See + * {@link #saveIncrementalForExternalSigning(OutputStream)} method description on external + * signature creation scenario details. + *

+ * Only one signature may be added in a document. To sign several times, + * load document, add signature, save incremental and close again. + * + * @param sigObject is the PDSignatureField model + * @param options signature options + * @throws IOException if there is an error creating required fields + * @throws IllegalStateException if one attempts to add several signature + * fields. + */ + public void addSignature(PDSignature sigObject, SignatureOptions options) throws IOException + { + addSignature(sigObject, null, options); + } + + /** + * Add a signature to be created using the instance of given interface. + *

+ * Only one signature may be added in a document. To sign several times, + * load document, add signature, save incremental and close again. * * @param sigObject is the PDSignatureField model - * @param signatureInterface is an interface which provides signing capabilities + * @param signatureInterface is an interface whose implementation provides + * signing capabilities. Can be null if external signing if used. * @throws IOException if there is an error creating required fields + * @throws IllegalStateException if one attempts to add several signature + * fields. */ public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface) throws IOException { @@ -186,15 +325,27 @@ public void addSignature(PDSignature sigObject, SignatureInterface signatureInte * This will add a signature to the document. If the 0-based page number in the options * parameter is smaller than 0 or larger than max, the nearest valid page number will be used * (i.e. 0 or max) and no exception will be thrown. + *

+ * Only one signature may be added in a document. To sign several times, + * load document, add signature, save incremental and close again. * * @param sigObject is the PDSignatureField model - * @param signatureInterface is an interface which provides signing capabilities + * @param signatureInterface is an interface whose implementation provides + * signing capabilities. Can be null if external signing if used. * @param options signature options * @throws IOException if there is an error creating required fields + * @throws IllegalStateException if one attempts to add several signature + * fields. */ public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface, SignatureOptions options) throws IOException { + if (signatureAdded) + { + throw new IllegalStateException("Only one signature may be added in a document"); + } + signatureAdded = true; + // Reserve content // We need to reserve some space for the signature. Some signatures including // big certificate chain and we need enough space to store it. @@ -208,29 +359,26 @@ public void addSignature(PDSignature sigObject, SignatureInterface signatureInte sigObject.setContents(new byte[SignatureOptions.DEFAULT_SIGNATURE_SIZE]); } - // Reserve ByteRange - sigObject.setByteRange(new int[] { 0, 1000000000, 1000000000, 1000000000 }); + // Reserve ByteRange, will be overwritten in COSWriter + sigObject.setByteRange(RESERVE_BYTE_RANGE); signInterface = signatureInterface; - // - // Create SignatureForm for signature - // and appending it to the document - // + // Create SignatureForm for signature and append it to the document - // Get the first page - PDDocumentCatalog catalog = getDocumentCatalog(); - int pageCount = catalog.getPages().getCount(); + // Get the first valid page + int pageCount = getNumberOfPages(); if (pageCount == 0) { throw new IllegalStateException("Cannot sign an empty document"); } int startIndex = Math.min(Math.max(options.getPage(), 0), pageCount - 1); - PDPage page = catalog.getPages().get(startIndex); + PDPage page = getPage(startIndex); // Get the AcroForm from the Root-Dictionary and append the annotation - PDAcroForm acroForm = catalog.getAcroForm(); + PDDocumentCatalog catalog = getDocumentCatalog(); + PDAcroForm acroForm = catalog.getAcroForm(null); catalog.getCOSObject().setNeedToBeUpdated(true); if (acroForm == null) @@ -243,13 +391,18 @@ public void addSignature(PDSignature sigObject, SignatureInterface signatureInte acroForm.getCOSObject().setNeedToBeUpdated(true); } - List fields = acroForm.getFields(); - if (fields == null) + PDSignatureField signatureField = null; + COSBase cosFieldBase = acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS); + if (cosFieldBase instanceof COSArray) { - fields = new ArrayList(); - acroForm.setFields(fields); + COSArray fieldArray = (COSArray) cosFieldBase; + fieldArray.setNeedToBeUpdated(true); + signatureField = findSignatureField(acroForm.getFieldIterator(), sigObject); + } + else + { + acroForm.getCOSObject().setItem(COSName.FIELDS, new COSArray()); } - PDSignatureField signatureField = findSignatureField(fields, sigObject); if (signatureField == null) { signatureField = new PDSignatureField(acroForm); @@ -258,10 +411,22 @@ public void addSignature(PDSignature sigObject, SignatureInterface signatureInte // backward linking signatureField.getWidgets().get(0).setPage(page); } + else + { + sigObject.getCOSObject().setNeedToBeUpdated(true); + } + + // TODO This "overwrites" the settings of the original signature field which might not be intended by the user + // better make it configurable (not all users need/want PDF/A but their own setting): + // to conform PDF/A-1 requirement: // The /F key's Print flag bit shall be set to 1 and // its Hidden, Invisible and NoView flag bits shall be set to 0 signatureField.getWidgets().get(0).setPrinted(true); + // This may be troublesome if several form fields are signed, + // see thread from PDFBox users mailing list 17.2.2021 - 19.2.2021 + // https://mail-archives.apache.org/mod_mbox/pdfbox-users/202102.mbox/thread + // better set the printed flag in advance // Set the AcroForm Fields List acroFormFields = acroForm.getFields(); @@ -269,7 +434,15 @@ public void addSignature(PDSignature sigObject, SignatureInterface signatureInte acroForm.setSignaturesExist(true); acroForm.setAppendOnly(true); - boolean checkFields = checkSignatureField(acroFormFields, signatureField); + boolean checkFields = checkSignatureField(acroForm.getFieldIterator(), signatureField); + if (checkFields) + { + signatureField.getCOSObject().setNeedToBeUpdated(true); + } + else + { + acroFormFields.add(signatureField); + } // Get the object from the visual signature COSDocument visualSignature = options.getVisualSignature(); @@ -280,7 +453,7 @@ public void addSignature(PDSignature sigObject, SignatureInterface signatureInte prepareNonVisibleSignature(signatureField); return; } - + prepareVisibleSignature(signatureField, acroForm, visualSignature); // Create Annotation / Field for signature @@ -298,49 +471,84 @@ public void addSignature(PDSignature sigObject, SignatureInterface signatureInte ((COSArrayList) annotations).toList().equals(((COSArrayList) acroFormFields).toList()) && checkFields)) { - annotations.add(signatureField.getWidgets().get(0)); + PDAnnotationWidget widget = signatureField.getWidgets().get(0); + // use check to prevent the annotation widget from appearing twice + if (checkSignatureAnnotation(annotations, widget)) + { + widget.getCOSObject().setNeedToBeUpdated(true); + } + else + { + annotations.add(widget); + } } page.getCOSObject().setNeedToBeUpdated(true); } - // search acroform field list for signature field with specific signature dictionary - private PDSignatureField findSignatureField(List fields, PDSignature sigObject) + /** + * Search acroform fields for signature field with specific signature dictionary. + * + * @param fieldIterator iterator on all fields. + * @param sigObject signature object (the /V part). + * @return a signature field if found, or null if none was found. + */ + private PDSignatureField findSignatureField(Iterator fieldIterator, PDSignature sigObject) { PDSignatureField signatureField = null; - for (PDField pdField : fields) + while (fieldIterator.hasNext()) { + PDField pdField = fieldIterator.next(); if (pdField instanceof PDSignatureField) { PDSignature signature = ((PDSignatureField) pdField).getSignature(); if (signature != null && signature.getCOSObject().equals(sigObject.getCOSObject())) { signatureField = (PDSignatureField) pdField; + break; } } } return signatureField; } - // return true if the field already existed in the field list, in that case, it is marked for update - private boolean checkSignatureField(List acroFormFields, PDSignatureField signatureField) + /** + * Check if the field already exists in the field list. + * + * @param fieldIterator iterator on all fields. + * @param signatureField the signature field. + * @return true if the field already existed in the field list, false if not. + */ + private boolean checkSignatureField(Iterator fieldIterator, PDSignatureField signatureField) { - boolean checkFields = false; - for (PDField field : acroFormFields) + while (fieldIterator.hasNext()) { + PDField field = fieldIterator.next(); if (field instanceof PDSignatureField && field.getCOSObject().equals(signatureField.getCOSObject())) { - checkFields = true; - signatureField.getCOSObject().setNeedToBeUpdated(true); - break; + return true; } - // fixme: this code does not check non-terminal fields, there could be a descendant signature } - if (!checkFields) + return false; + } + + /** + * Check if the widget already exists in the annotation list + * + * @param annotations the list of PDAnnotation fields. + * @param widget the annotation widget. + * @return true if the widget already existed in the annotation list, false if not. + */ + private boolean checkSignatureAnnotation(List annotations, PDAnnotationWidget widget) + { + for (PDAnnotation annotation : annotations) { - acroFormFields.add(signatureField); + if (annotation.getCOSObject().equals(widget.getCOSObject())) + { + return true; + } } - return checkFields; + return false; } private void prepareVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm, @@ -391,8 +599,14 @@ private void assignSignatureRectangle(PDSignatureField signatureField, COSDictio { // Read and set the rectangle for visual signature COSArray rectArray = (COSArray) annotDict.getDictionaryObject(COSName.RECT); - PDRectangle rect = new PDRectangle(rectArray); - signatureField.getWidgets().get(0).setRectangle(rect); + PDRectangle existingRectangle = signatureField.getWidgets().get(0).getRectangle(); + + //in case of an existing field keep the original rect + if (existingRectangle == null || existingRectangle.getCOSArray().size() != 4) + { + PDRectangle rect = new PDRectangle(rectArray); + signatureField.getWidgets().get(0).setRectangle(rect); + } } private void assignAppearanceDictionary(PDSignatureField signatureField, COSDictionary apDict) @@ -403,43 +617,69 @@ private void assignAppearanceDictionary(PDSignatureField signatureField, COSDict signatureField.getWidgets().get(0).setAppearance(ap); } - private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary dict) + private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary newDict) { - // read and set AcroForm default resource dictionary /DR if available - COSBase base = dict.getDictionaryObject(COSName.DR); - if (base instanceof COSDictionary) + // read and set/update AcroForm default resource dictionary /DR if available + COSBase newBase = newDict.getDictionaryObject(COSName.DR); + if (newBase instanceof COSDictionary) { - COSDictionary dr = (COSDictionary) base; - dr.setDirect(true); - dr.setNeedToBeUpdated(true); - acroForm.getCOSObject().setItem(COSName.DR, dr); + COSDictionary newDR = (COSDictionary) newBase; + PDResources defaultResources = acroForm.getDefaultResources(); + if (defaultResources == null) + { + acroForm.getCOSObject().setItem(COSName.DR, newDR); + newDR.setDirect(true); + newDR.setNeedToBeUpdated(true); + } + else + { + COSDictionary oldDR = defaultResources.getCOSObject(); + COSBase newXObjectBase = newDR.getItem(COSName.XOBJECT); + COSBase oldXObjectBase = oldDR.getItem(COSName.XOBJECT); + if (newXObjectBase instanceof COSDictionary && + oldXObjectBase instanceof COSDictionary) + { + ((COSDictionary) oldXObjectBase).addAll((COSDictionary) newXObjectBase); + oldDR.setNeedToBeUpdated(true); + } + } } } private void prepareNonVisibleSignature(PDSignatureField signatureField) - throws IOException { // "Signature fields that are not intended to be visible shall // have an annotation rectangle that has zero height and width." // Set rectangle for non-visual signature to rectangle array [ 0 0 0 0 ] signatureField.getWidgets().get(0).setRectangle(new PDRectangle()); + + // The visual appearance must also exist for an invisible signature but may be empty. + PDAppearanceDictionary appearanceDictionary = new PDAppearanceDictionary(); + PDAppearanceStream appearanceStream = new PDAppearanceStream(this); + appearanceStream.setBBox(new PDRectangle()); + appearanceDictionary.setNormalAppearance(appearanceStream); + signatureField.getWidgets().get(0).setAppearance(appearanceDictionary); } /** - * This will add a signature field to the document. + * This will add a list of signature fields to the document. * * @param sigFields are the PDSignatureFields that should be added to the document - * @param signatureInterface is a interface which provides signing capabilities + * @param signatureInterface is an interface whose implementation provides + * signing capabilities. Can be null if external signing if used. * @param options signature options * @throws IOException if there is an error creating required fields + * @deprecated The method is misleading, because only one signature may be + * added in a document. The method will be removed in the future. */ + @Deprecated public void addSignatureField(List sigFields, SignatureInterface signatureInterface, SignatureOptions options) throws IOException { PDDocumentCatalog catalog = getDocumentCatalog(); catalog.getCOSObject().setNeedToBeUpdated(true); - PDAcroForm acroForm = catalog.getAcroForm(); + PDAcroForm acroForm = catalog.getAcroForm(null); if (acroForm == null) { acroForm = new PDAcroForm(this); @@ -461,7 +701,15 @@ public void addSignatureField(List sigFields, SignatureInterfa sigField.getCOSObject().setNeedToBeUpdated(true); // Check if the field already exists - checkSignatureField(acroformFields, sigField); + boolean checkSignatureField = checkSignatureField(acroForm.getFieldIterator(), sigField); + if (checkSignatureField) + { + sigField.getCOSObject().setNeedToBeUpdated(true); + } + else + { + acroformFields.add(sigField); + } // Check if we need to add a signature if (sigField.getSignature() != null) @@ -497,79 +745,47 @@ public void removePage(int pageNumber) } /** - * This will import and copy the contents from another location. Currently the content stream is stored in a scratch - * file. The scratch file is associated with the document. If you are adding a page to this document from another - * document and want to copy the contents to this document's scratch file then use this method otherwise just use - * the {@link #addPage} method. - * - * Unlike {@link #addPage}, this method does a deep copy. If your page has annotations, and if - * these link to pages not in the target document, then the target document might become huge. - * What you need to do is to delete page references of such annotations. See + * This will import and copy the contents from another location. Currently the content stream is + * stored in a scratch file. The scratch file is associated with the document. If you are adding + * a page to this document from another document and want to copy the contents to this + * document's scratch file then use this method otherwise just use the {@link #addPage addPage()} + * method. + *

+ * Unlike {@link #addPage addPage()}, this method creates a new PDPage object. If your page has + * annotations, and if these link to pages not in the target document, then the target document + * might become huge. What you need to do is to delete page references of such annotations. See * here for how to do this. + *

+ * Inherited (global) resources are ignored because these can contain resources not needed for + * this page which could bloat your document, see + * PDFBOX-28 and related issues. + * If you need them, call importedPage.setResources(page.getResources()); + *

+ * This method should only be used to import a page from a loaded document, not from a generated + * document because these can contain unfinished parts, e.g. font subsetting information. * * @param page The page to import. * @return The page that was imported. - * + * * @throws IOException If there is an error copying the page. */ public PDPage importPage(PDPage page) throws IOException { PDPage importedPage = new PDPage(new COSDictionary(page.getCOSObject()), resourceCache); - InputStream in = null; - try + PDStream dest = new PDStream(this, page.getContents(), COSName.FLATE_DECODE); + importedPage.setContents(dest); + addPage(importedPage); + importedPage.setCropBox(new PDRectangle(page.getCropBox().getCOSArray())); + importedPage.setMediaBox(new PDRectangle(page.getMediaBox().getCOSArray())); + importedPage.setRotation(page.getRotation()); + if (page.getResources() != null && !page.getCOSObject().containsKey(COSName.RESOURCES)) { - in = page.getContents(); - if (in != null) - { - PDStream dest = new PDStream(this, page.getContents(), COSName.FLATE_DECODE); - importedPage.setContents(dest); - } - addPage(importedPage); - } - catch (IOException e) - { - IOUtils.closeQuietly(in); + LOG.warn("inherited resources of source document are not imported to destination page"); + LOG.warn("call importedPage.setResources(page.getResources()) to do this"); } - return importedPage; } - /** - * Constructor that uses an existing document. The COSDocument that is passed in must be valid. - * - * @param doc The COSDocument that this document wraps. - */ - public PDDocument(COSDocument doc) - { - this(doc, null); - } - - /** - * Constructor that uses an existing document. The COSDocument that is passed in must be valid. - * - * @param doc The COSDocument that this document wraps. - * @param source the parser which is used to read the pdf - */ - public PDDocument(COSDocument doc, RandomAccessRead source) - { - this(doc, source, null); - } - - /** - * Constructor that uses an existing document. The COSDocument that is passed in must be valid. - * - * @param doc The COSDocument that this document wraps. - * @param source the parser which is used to read the pdf - * @param permission he access permissions of the pdf - * - */ - public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission) - { - document = doc; - pdfSource = source; - accessPermission = permission; - } - /** * This will get the low level document. * @@ -581,16 +797,21 @@ public COSDocument getDocument() } /** - * This will get the document info dictionary. This is guaranteed to not return null. - * - * @return The documents /Info dictionary + * This will get the document info dictionary. If it doesn't exist, an empty document info + * dictionary is created in the document trailer. + *

+ * In PDF 2.0 this is deprecated except for two entries, /CreationDate and /ModDate. For any other + * document level metadata, a metadata stream should be used instead, see + * {@link PDDocumentCatalog#getMetadata()}. + * + * @return The documents /Info dictionary, never null. */ public PDDocumentInformation getDocumentInformation() { if (documentInformation == null) { COSDictionary trailer = document.getTrailer(); - COSDictionary infoDic = (COSDictionary) trailer.getDictionaryObject(COSName.INFO); + COSDictionary infoDic = trailer.getCOSDictionary(COSName.INFO); if (infoDic == null) { infoDic = new COSDictionary(); @@ -603,7 +824,11 @@ public PDDocumentInformation getDocumentInformation() /** * This will set the document information for this document. - * + *

+ * In PDF 2.0 this is deprecated except for two entries, /CreationDate and /ModDate. For any other + * document level metadata, a metadata stream should be used instead, see + * {@link PDDocumentCatalog#setMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) PDDocumentCatalog#setMetadata(PDMetadata)}. + * * @param info The updated document information. */ public void setDocumentInformation(PDDocumentInformation info) @@ -647,7 +872,7 @@ public boolean isEncrypted() /** * This will get the encryption dictionary for this document. This will still return the parameters if the document - * was decrypted. As the encryption architecture in PDF documents is plugable this returns an abstract class, + * was decrypted. As the encryption architecture in PDF documents is pluggable this returns an abstract class, * but the only supported subclass at this time is a * PDStandardEncryption object. * @@ -675,7 +900,8 @@ public void setEncryptionDictionary(PDEncryption encryption) throws IOException } /** - * This will return the last signature. + * This will return the last signature from the field tree. Note that this may not be the + * last in time when empty signature fields are created first but signed after other fields. * * @return the last signature as PDSignatureField. * @throws IOException if no document catalog can be found. @@ -700,11 +926,10 @@ public PDSignature getLastSignatureDictionary() throws IOException public List getSignatureFields() throws IOException { List fields = new ArrayList(); - PDAcroForm acroForm = getDocumentCatalog().getAcroForm(); + PDAcroForm acroForm = getDocumentCatalog().getAcroForm(null); if (acroForm != null) { - // fixme: non-terminal fields are ignored, could have descendant signatures - for (PDField field : acroForm.getFields()) + for (PDField field : acroForm.getFieldTree()) { if (field instanceof PDSignatureField) { @@ -735,6 +960,18 @@ public List getSignatureDictionaries() throws IOException return signatures; } + /** + * For internal PDFBox use when creating PDF documents: register a TrueTypeFont to make sure it + * is closed when the PDDocument is closed to avoid memory leaks. Users don't have to call this + * method, it is done by the appropriate PDFont classes. + * + * @param ttf + */ + public void registerTrueTypeFontForClosing(TrueTypeFont ttf) + { + fontsToClose.add(ttf); + } + /** * Returns the list of fonts which will be subset before the document is saved. */ @@ -750,6 +987,7 @@ Set getFontsToSubset() * * @return loaded document * + * @throws InvalidPasswordException If the file required a non-empty password. * @throws IOException in case of a file reading or parsing error */ public static PDDocument load(File file) throws IOException @@ -765,9 +1003,11 @@ public static PDDocument load(File file) throws IOException * * @return loaded document * + * @throws InvalidPasswordException If the file required a non-empty password. * @throws IOException in case of a file reading or parsing error */ - public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) throws IOException + public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) + throws IOException { return load(file, "", null, null, memUsageSetting); } @@ -780,9 +1020,11 @@ public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) thr * * @return loaded document * + * @throws InvalidPasswordException If the password is incorrect. * @throws IOException in case of a file reading or parsing error */ - public static PDDocument load(File file, String password) throws IOException + public static PDDocument load(File file, String password) + throws IOException { return load(file, password, null, null, MemoryUsageSetting.setupMainMemoryOnly()); } @@ -796,9 +1038,11 @@ public static PDDocument load(File file, String password) throws IOException * * @return loaded document * + * @throws InvalidPasswordException If the password is incorrect. * @throws IOException in case of a file reading or parsing error */ - public static PDDocument load(File file, String password, MemoryUsageSetting memUsageSetting) throws IOException + public static PDDocument load(File file, String password, MemoryUsageSetting memUsageSetting) + throws IOException { return load(file, password, null, null, memUsageSetting); } @@ -837,21 +1081,11 @@ public static PDDocument load(File file, String password, InputStream keyStore, public static PDDocument load(File file, String password, InputStream keyStore, String alias, MemoryUsageSetting memUsageSetting) throws IOException { + @SuppressWarnings({"squid:S2095"}) // raFile not closed here, may be needed for signing RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file); try { - ScratchFile scratchFile = new ScratchFile(memUsageSetting); - try - { - PDFParser parser = new PDFParser(raFile, password, keyStore, alias, scratchFile); - parser.parse(); - return parser.getPDDocument(); - } - catch (IOException ioe) - { - IOUtils.closeQuietly(scratchFile); - throw ioe; - } + return load(raFile, password, keyStore, alias, memUsageSetting); } catch (IOException ioe) { @@ -860,15 +1094,34 @@ public static PDDocument load(File file, String password, InputStream keyStore, } } + private static PDDocument load(RandomAccessBufferedFileInputStream raFile, String password, + InputStream keyStore, String alias, + MemoryUsageSetting memUsageSetting) throws IOException + { + ScratchFile scratchFile = new ScratchFile(memUsageSetting); + try + { + PDFParser parser = new PDFParser(raFile, password, keyStore, alias, scratchFile); + parser.parse(); + return parser.getPDDocument(); + } + catch (IOException ioe) + { + IOUtils.closeQuietly(scratchFile); + throw ioe; + } + } + /** - * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. - * Unrestricted main memory will be used for buffering PDF streams. + * Parses a PDF. The given input stream is copied to the memory to enable random access to the + * pdf. Unrestricted main memory will be used for buffering PDF streams. * - * @param input stream that contains the document. + * @param input stream that contains the document. Don't forget to close it after loading. * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the PDF required a non-empty password. + * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input) throws IOException { @@ -876,32 +1129,34 @@ public static PDDocument load(InputStream input) throws IOException } /** - * Parses a PDF. Depending on the memory settings parameter the given input - * stream is either copied to main memory or to a temporary file to enable - * random access to the pdf. + * Parses a PDF. Depending on the memory settings parameter the given input stream is either + * copied to main memory or to a temporary file to enable random access to the pdf. * - * @param input stream that contains the document. + * @param input stream that contains the document. Don't forget to close it after loading. * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the PDF required a non-empty password. + * @throws IOException In case of a reading or parsing error. */ - public static PDDocument load(InputStream input, MemoryUsageSetting memUsageSetting) throws IOException + public static PDDocument load(InputStream input, MemoryUsageSetting memUsageSetting) + throws IOException { return load(input, "", null, null, memUsageSetting); } /** - * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. - * Unrestricted main memory will be used for buffering PDF streams. - * - * @param input stream that contains the document. + * Parses a PDF. The given input stream is copied to the memory to enable random access to the + * pdf. Unrestricted main memory will be used for buffering PDF streams. + * + * @param input stream that contains the document. Don't forget to close it after loading. * @param password password to be used for decryption - * + * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, String password) throws IOException @@ -910,17 +1165,17 @@ public static PDDocument load(InputStream input, String password) } /** - * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. - * Unrestricted main memory will be used for buffering PDF streams. - * - * @param input stream that contains the document. + * Parses a PDF. The given input stream is copied to the memory to enable random access to the + * pdf. Unrestricted main memory will be used for buffering PDF streams. + * + * @param input stream that contains the document. Don't forget to close it after loading. * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias) throws IOException @@ -929,17 +1184,17 @@ public static PDDocument load(InputStream input, String password, InputStream ke } /** - * Parses a PDF. Depending on the memory settings parameter the given input - * stream is either copied to main memory or to a temporary file to enable - * random access to the pdf. - * - * @param input stream that contains the document. + * Parses a PDF. Depending on the memory settings parameter the given input stream is either + * copied to main memory or to a temporary file to enable random access to the pdf. + * + * @param input stream that contains the document. Don't forget to close it after loading. * @param password password to be used for decryption * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, String password, MemoryUsageSetting memUsageSetting) throws IOException @@ -948,11 +1203,10 @@ public static PDDocument load(InputStream input, String password, MemoryUsageSet } /** - * Parses a PDF. Depending on the memory settings parameter the given input - * stream is either copied to memory or to a temporary file to enable - * random access to the pdf. - * - * @param input stream that contains the document. + * Parses a PDF. Depending on the memory settings parameter the given input stream is either + * copied to memory or to a temporary file to enable random access to the pdf. + * + * @param input stream that contains the document. Don't forget to close it after loading. * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security @@ -960,7 +1214,8 @@ public static PDDocument load(InputStream input, String password, MemoryUsageSet * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias, MemoryUsageSetting memUsageSetting) throws IOException @@ -987,7 +1242,8 @@ public static PDDocument load(InputStream input, String password, InputStream ke * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the PDF required a non-empty password. + * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(byte[] input) throws IOException { @@ -1002,9 +1258,11 @@ public static PDDocument load(byte[] input) throws IOException * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException In case of a reading or parsing error. */ - public static PDDocument load(byte[] input, String password) throws IOException + public static PDDocument load(byte[] input, String password) + throws IOException { return load(input, password, null, null); } @@ -1019,7 +1277,8 @@ public static PDDocument load(byte[] input, String password) throws IOException * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(byte[] input, String password, InputStream keyStore, String alias) throws IOException @@ -1038,7 +1297,8 @@ public static PDDocument load(byte[] input, String password, InputStream keyStor * * @return loaded document * - * @throws IOException in case of a file reading or parsing error + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException In case of a reading or parsing error. */ public static PDDocument load(byte[] input, String password, InputStream keyStore, String alias, MemoryUsageSetting memUsageSetting) throws IOException @@ -1052,6 +1312,10 @@ public static PDDocument load(byte[] input, String password, InputStream keyStor /** * Save the document to a file. + *

+ * If encryption has been activated (with + * {@link #protect(org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy) protect(ProtectionPolicy)}), + * do not use the document after saving because the contents are now encrypted. * * @param fileName The file to save as. * @@ -1064,6 +1328,10 @@ public void save(String fileName) throws IOException /** * Save the document to a file. + *

+ * If encryption has been activated (with + * {@link #protect(org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy) protect(ProtectionPolicy)}), + * do not use the document after saving because the contents are now encrypted. * * @param file The file to save as. * @@ -1076,8 +1344,13 @@ public void save(File file) throws IOException /** * This will save the document to an output stream. - * - * @param output The stream to write to. + *

+ * If encryption has been activated (with + * {@link #protect(org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy) protect(ProtectionPolicy)}), + * do not use the document after saving because the contents are now encrypted. + * + * @param output The stream to write to. It will be closed when done. It is recommended to wrap + * it in a {@link java.io.BufferedOutputStream}, unless it is already buffered. * * @throws IOException if the output could not be written */ @@ -1100,7 +1373,6 @@ public void save(OutputStream output) throws IOException try { writer.write(this); - writer.close(); } finally { @@ -1108,21 +1380,34 @@ public void save(OutputStream output) throws IOException } } - /** - * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a file. + /** + * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a + * file or a stream, not if the document was created in PDFBox itself. There must be a path of + * objects that have {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document + * catalog. For signatures this is taken care by PDFBox itself. + *

+ * Other usages of this method are for experienced users only. You will usually never need it. + * It is useful only if you are required to keep the current revision and append the changes. A + * typical use case is changing a signed file without invalidating the signature. * - * @param output stream to write + * @param output stream to write to. It will be closed when done. It + * must never point to the source file or that one will be + * harmed! * @throws IOException if the output could not be written - * @throws IllegalStateException if the document was not loaded from a file. + * @throws IllegalStateException if the document was not loaded from a file or a stream. */ + public void saveIncremental(OutputStream output) throws IOException { COSWriter writer = null; try { + if (pdfSource == null) + { + throw new IllegalStateException("document was not loaded from a file or a stream"); + } writer = new COSWriter(output, pdfSource); writer.write(this, signInterface); - writer.close(); } finally { @@ -1134,9 +1419,123 @@ public void saveIncremental(OutputStream output) throws IOException } /** - * Returns the page at the given index. + * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a + * file or a stream, not if the document was created in PDFBox itself. This allows to include + * objects even if there is no path of objects that have + * {@link COSUpdateInfo#isNeedToBeUpdated()} set so the incremental update gets smaller. Only + * dictionaries are supported; if you need to update other objects classes, then add their + * parent dictionary. + *

+ * This method is for experienced users only. You will usually never need it. It is useful only + * if you are required to keep the current revision and append the changes. A typical use case + * is changing a signed file without invalidating the signature. To know which objects are + * getting changed, you need to have some understanding of the PDF specification, and look at + * the saved file with an editor to verify that you are updating the correct objects. You should + * also inspect the page and document structures of the file with PDFDebugger. + * + * @param output stream to write to. It will be closed when done. It + * must never point to the source file or that one will be harmed! + * @param objectsToWrite objects that must be part of the incremental saving. + * @throws IOException if the output could not be written + * @throws IllegalStateException if the document was not loaded from a file or a stream. + */ + public void saveIncremental(OutputStream output, Set objectsToWrite) throws IOException + { + if (pdfSource == null) + { + throw new IllegalStateException("document was not loaded from a file or a stream"); + } + COSWriter writer = null; + try + { + writer = new COSWriter(output, pdfSource, objectsToWrite); + writer.write(this, signInterface); + } + finally + { + if (writer != null) + { + writer.close(); + } + } + } + + /** + *

+ * (This is a new feature for 2.0.3. The API for external signing might change based on feedback after release!) + *

+ * Save PDF incrementally without closing for external signature creation scenario. The general + * sequence is: + *

+     *    PDDocument pdDocument = ...;
+     *    OutputStream outputStream = ...;
+     *    SignatureOptions signatureOptions = ...; // options to specify fine tuned signature options or null for defaults
+     *    PDSignature pdSignature = ...;
+     *
+     *    // add signature parameters to be used when creating signature dictionary
+     *    pdDocument.addSignature(pdSignature, signatureOptions);
+     *    // prepare PDF for signing and obtain helper class to be used
+     *    ExternalSigningSupport externalSigningSupport = pdDocument.saveIncrementalForExternalSigning(outputStream);
+     *    // get data to be signed
+     *    InputStream dataToBeSigned = externalSigningSupport.getContent();
+     *    // invoke signature service
+     *    byte[] signature = sign(dataToBeSigned);
+     *    // set resulted CMS signature
+     *    externalSigningSupport.setSignature(signature);
      *
-     * @param pageIndex the page index
+     *    // last step is to close the document
+     *    pdDocument.close();
+     * 
+ *

+ * Note that after calling this method, only {@code close()} method may invoked for + * {@code PDDocument} instance and only AFTER {@link ExternalSigningSupport} instance is used. + *

+ * + * @param output stream to write the final PDF. It will be closed when the + * document is closed. It must never point to the source file + * or that one will be harmed! + * @return instance to be used for external signing and setting CMS signature + * @throws IOException if the output could not be written + * @throws IllegalStateException if the document was not loaded from a file or a stream or + * signature options were not set. + */ + public ExternalSigningSupport saveIncrementalForExternalSigning(OutputStream output) throws IOException + { + if (pdfSource == null) + { + throw new IllegalStateException("document was not loaded from a file or a stream"); + } + // PDFBOX-3978: getLastSignatureDictionary() not helpful if signing into a template + // that is not the last signature. So give higher priority to signature with update flag. + PDSignature foundSignature = null; + for (PDSignature sig : getSignatureDictionaries()) + { + foundSignature = sig; + if (sig.getCOSObject().isNeedToBeUpdated()) + { + break; + } + } + int[] byteRange = foundSignature.getByteRange(); + if (!Arrays.equals(byteRange, RESERVE_BYTE_RANGE)) + { + throw new IllegalStateException("signature reserve byte range has been changed " + + "after addSignature(), please set the byte range that existed after addSignature()"); + } + COSWriter writer = new COSWriter(output, pdfSource); + writer.write(this); + signingSupport = new SigningSupport(writer); + return signingSupport; + } + + /** + * Returns the page at the given 0-based index. + *

+ * This method is too slow to get all the pages from a large PDF document + * (1000 pages or more). For such documents, use the iterator of + * {@link PDDocument#getPages()} instead. + * + * @param pageIndex the 0-based page index * @return the page at the given index. */ public PDPage getPage(int pageIndex) // todo: REPLACE most calls to this method with BELOW method @@ -1174,13 +1573,38 @@ public void close() throws IOException { if (!document.isClosed()) { + // Make sure that: + // - first Exception is kept + // - all IO resources are closed + // - there's a way to see which errors occurred + + IOException firstException = null; + + // close resources and COSWriter + if (signingSupport != null) + { + firstException = IOUtils.closeAndLogException(signingSupport, LOG, "SigningSupport", firstException); + } + // close all intermediate I/O streams - document.close(); + firstException = IOUtils.closeAndLogException(document, LOG, "COSDocument", firstException); // close the source PDF stream, if we read from one if (pdfSource != null) { - pdfSource.close(); + firstException = IOUtils.closeAndLogException(pdfSource, LOG, "RandomAccessRead pdfSource", firstException); + } + + // close fonts + for (TrueTypeFont ttf : fontsToClose) + { + firstException = IOUtils.closeAndLogException(ttf, LOG, "TrueTypeFont", firstException); + } + + // rethrow first exception to keep method contract + if (firstException != null) + { + throw firstException; } } } @@ -1190,6 +1614,8 @@ public void close() throws IOException * encrypted when it will be saved. This method only marks the document for encryption. It also * calls {@link #setAllSecurityToBeRemoved(boolean)} with a false argument if it was set to true * previously and logs a warning. + *

+ * Do not use the document after saving, because the structures are encrypted. * * @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy @@ -1260,7 +1686,7 @@ public void setAllSecurityToBeRemoved(boolean removeAllSecurity) /** * Provides the document ID. * - * @return the dcoument ID + * @return the document ID */ public Long getDocumentId() { @@ -1344,6 +1770,8 @@ public void setVersion(float newVersion) /** * Returns the resource cache associated with this document, or null if there is none. + * + * @return the resource cache or null. */ public ResourceCache getResourceCache() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentCatalog.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentCatalog.java index 455e917ad4e..a953397e9ef 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentCatalog.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentCatalog.java @@ -20,6 +20,9 @@ import java.util.ArrayList; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; @@ -33,6 +36,8 @@ import org.apache.pdfbox.pdmodel.common.PDPageLabels; import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDMarkInfo; import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot; +import org.apache.pdfbox.pdmodel.fixup.AcroFormDefaultFixup; +import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup; import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent; import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties; import org.apache.pdfbox.pdmodel.interactive.action.PDActionFactory; @@ -53,12 +58,16 @@ */ public class PDDocumentCatalog implements COSObjectable { + private static final Log LOG = LogFactory.getLog(PDDocumentCatalog.class); + private final COSDictionary root; private final PDDocument document; + private PDDocumentFixup acroFormFixupApplied; private PDAcroForm cachedAcroForm; /** - * Constructor. AcroForm. + * Constructor. Internal PDFBox use only! If you need to get the document catalog, call + * {@link PDDocument#getDocumentCatalog()}. * * @param doc The document that this catalog is part of. */ @@ -71,7 +80,8 @@ public PDDocumentCatalog(PDDocument doc) } /** - * Constructor. + * Constructor. Internal PDFBox use only! If you need to get the document catalog, call + * {@link PDDocument#getDocumentCatalog()}. * * @param doc The document that this catalog is part of. * @param rootDictionary The root dictionary that this object wraps. @@ -100,6 +110,34 @@ public COSDictionary getCOSObject() */ public PDAcroForm getAcroForm() { + return getAcroForm(new AcroFormDefaultFixup(document)); + } + + /** + * Get the documents AcroForm. This will return null if no AcroForm is part of the document. + * + * Dependent on setting acroFormFixup some fixing/changes will be done to the AcroForm. + * If you need to ensure that there are no fixes applied call getAcroForm with null. + * + * Using getAcroForm(PDDocumentFixup acroFormFixup) might change the original content and + * subsequent calls with getAcroForm(null) will return the changed content. + * + * @param acroFormFixup the fix up action or null + * @return The document's AcroForm. + */ + public PDAcroForm getAcroForm(PDDocumentFixup acroFormFixup) + { + if (acroFormFixup != null && acroFormFixup != acroFormFixupApplied) + { + acroFormFixup.apply(); + cachedAcroForm = null; + acroFormFixupApplied = acroFormFixup; + } + else if (acroFormFixupApplied != null) + { + LOG.debug("AcroForm content has already been retrieved with fixes applied - original content changed because of that"); + } + if (cachedAcroForm == null) { COSDictionary dict = (COSDictionary)root.getDictionaryObject(COSName.ACRO_FORM); @@ -121,6 +159,8 @@ public void setAcroForm(PDAcroForm acroForm) /** * Returns all pages in the document, as a page tree. + * + * @return the page tree of all pages */ public PDPageTree getPages() { @@ -156,8 +196,8 @@ public void setViewerPreferences(PDViewerPreferences prefs) */ public PDDocumentOutline getDocumentOutline() { - COSDictionary dict = (COSDictionary)root.getDictionaryObject(COSName.OUTLINES); - return dict == null ? null : new PDDocumentOutline(dict); + COSBase cosObj = root.getDictionaryObject(COSName.OUTLINES); + return cosObj instanceof COSDictionary ? new PDDocumentOutline((COSDictionary)cosObj) : null; } /** @@ -172,6 +212,8 @@ public void setDocumentOutline(PDDocumentOutline outlines) /** * Returns the document's article threads. + * + * @return a list containing all article threads. */ public List getThreads() { @@ -194,7 +236,7 @@ public List getThreads() * * @param threads The list of threads, or null to clear it. */ - public void setThreads(List threads) + public void setThreads(List threads) { root.setItem(COSName.THREADS, COSArrayList.converterToCOSArray(threads)); } @@ -244,11 +286,7 @@ public void setOpenAction(PDDestinationOrAction action) public PDDestinationOrAction getOpenAction() throws IOException { COSBase openAction = root.getDictionaryObject(COSName.OPEN_ACTION); - if (openAction == null) - { - return null; - } - else if (openAction instanceof COSDictionary) + if (openAction instanceof COSDictionary) { return PDActionFactory.createAction((COSDictionary)openAction); } @@ -258,7 +296,7 @@ else if (openAction instanceof COSArray) } else { - throw new IOException("Unknown OpenAction " + openAction); + return null; } } /** @@ -431,13 +469,22 @@ public void setOutputIntents(List outputIntents) /** * Returns the page display mode. + * + * @return the page mode. */ public PageMode getPageMode() { String mode = root.getNameAsString(COSName.PAGE_MODE); if (mode != null) { - return PageMode.fromString(mode); + try + { + return PageMode.fromString(mode); + } + catch (IllegalArgumentException e) + { + return PageMode.USE_NONE; + } } else { @@ -457,6 +504,8 @@ public void setPageMode(PageMode mode) /** * Returns the page layout. + * + * @return the page layout. */ public PageLayout getPageLayout() { @@ -483,6 +532,8 @@ public void setPageLayout(PageLayout layout) /** * Returns the document-level URI. + * + * @return the document-level URI */ public PDURIDictionary getURI() { @@ -502,10 +553,12 @@ public void setURI(PDURIDictionary uri) /** * Get the document's structure tree root, or null if none exists. + * + * @return the structure tree root. */ public PDStructureTreeRoot getStructureTreeRoot() { - COSDictionary dict = (COSDictionary)root.getDictionaryObject(COSName.STRUCT_TREE_ROOT); + COSDictionary dict = root.getCOSDictionary(COSName.STRUCT_TREE_ROOT); return dict == null ? null : new PDStructureTreeRoot(dict); } @@ -521,6 +574,8 @@ public void setStructureTreeRoot(PDStructureTreeRoot treeRoot) /** * Returns the language for the document, or null. + * + * @return the language or null. */ public String getLanguage() { @@ -591,7 +646,8 @@ public PDOptionalContentProperties getOCProperties() } /** - * Sets the optional content properties dictionary. + * Sets the optional content properties dictionary. The document version is incremented to 1.5 + * if lower. * * @param ocProperties the optional properties dictionary */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentInformation.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentInformation.java index d418ad4c99d..0d27d8bcb07 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentInformation.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentInformation.java @@ -298,6 +298,8 @@ public void setCustomMetadataValue( String fieldName, String fieldValue ) * 'True', 'False', or 'Unknown'. * * @param value The new trapped value for the document. + * + * @throws IllegalArgumentException if the parameter is invalid. */ public void setTrapped( String value ) { @@ -306,7 +308,7 @@ public void setTrapped( String value ) !value.equals( "False" ) && !value.equals( "Unknown" ) ) { - throw new RuntimeException( "Valid values for trapped are " + + throw new IllegalArgumentException( "Valid values for trapped are " + "'True', 'False', or 'Unknown'" ); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentNameDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentNameDictionary.java index 1be58f93765..3e4aa5b7df3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentNameDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocumentNameDictionary.java @@ -75,8 +75,8 @@ public COSDictionary getCOSObject() } /** - * Get the destination named tree node. The value in this name tree will be PDDestination - * objects. + * Get the destination name tree node. The values in this name tree will be + * PDPageDestination objects. * * @return The destination name tree node. */ @@ -118,8 +118,8 @@ public void setDests( PDDestinationNameTreeNode dests ) } /** - * Get the embedded files named tree node. The value in this name tree will be PDComplexFileSpecification - * objects. + * Get the embedded files named tree node. The values in this name tree will + * be PDComplexFileSpecification objects. * * @return The embedded files name tree node. */ @@ -148,9 +148,11 @@ public void setEmbeddedFiles( PDEmbeddedFilesNameTreeNode ef ) } /** - * Get the document level javascript entries. The value in this name tree will be PDTextStream. + * Get the document level JavaScript name tree. When the document is opened, all the JavaScript + * actions in it shall be executed, defining JavaScript functions for use by other scripts in + * the document. * - * @return The document level named javascript. + * @return The document level JavaScript name tree. */ public PDJavascriptNameTreeNode getJavaScript() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDEmbeddedFilesNameTreeNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDEmbeddedFilesNameTreeNode.java index ae179705f58..a7463c0bf68 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDEmbeddedFilesNameTreeNode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDEmbeddedFilesNameTreeNode.java @@ -54,7 +54,7 @@ protected PDComplexFileSpecification convertCOSToPD( COSBase base ) throws IOExc } @Override - protected PDNameTreeNode createChildNode( COSDictionary dic ) + protected PDNameTreeNode createChildNode( COSDictionary dic ) { return new PDEmbeddedFilesNameTreeNode(dic); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDFormContentStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDFormContentStream.java new file mode 100644 index 00000000000..276e63e9c1f --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDFormContentStream.java @@ -0,0 +1,39 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel; + +import java.io.IOException; + +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; + +/** + * + * @author Tilman Hausherr + */ +public final class PDFormContentStream extends PDAbstractContentStream +{ + /** + * Create a new form XObject content stream. + * + * @param form The form XObject stream to write to. + * + * @throws IOException If there is an error writing to the form contents. + */ + public PDFormContentStream(PDFormXObject form) throws IOException + { + super(null, form.getContentStream().createOutputStream(), form.getResources()); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDJavascriptNameTreeNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDJavascriptNameTreeNode.java index 7c4ebf39c0f..c6870ce0296 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDJavascriptNameTreeNode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDJavascriptNameTreeNode.java @@ -59,7 +59,7 @@ protected PDActionJavaScript convertCOSToPD( COSBase base ) throws IOException } @Override - protected PDNameTreeNode createChildNode( COSDictionary dic ) + protected PDNameTreeNode createChildNode( COSDictionary dic ) { return new PDJavascriptNameTreeNode(dic); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPage.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPage.java index 44a4e116bfd..cd74cf11641 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPage.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPage.java @@ -40,7 +40,9 @@ import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.interactive.action.PDPageAdditionalActions; +import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.measurement.PDViewportDictionary; import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead; import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDTransition; import org.apache.pdfbox.util.Matrix; @@ -139,6 +141,13 @@ else if (base instanceof COSArray && ((COSArray) base).size() > 0) return streams.iterator(); } + /** + * Returns the content stream(s) of this page as a single input stream. + * + * @return An InputStream, never null. Multiple content streams are concatenated and separated + * with a newline. An empty stream is returned if the page doesn't have any content stream. + * @throws IOException If the stream could not be read + */ @Override public InputStream getContents() throws IOException { @@ -154,17 +163,21 @@ else if (base instanceof COSArray && ((COSArray) base).size() > 0) List inputStreams = new ArrayList(); for (int i = 0; i < streams.size(); i++) { - COSStream stream = (COSStream)streams.getObject(i); - inputStreams.add(stream.createInputStream()); - inputStreams.add(new ByteArrayInputStream(delimiter)); + COSBase strm = streams.getObject(i); + if (strm instanceof COSStream) + { + COSStream stream = (COSStream) strm; + inputStreams.add(stream.createInputStream()); + inputStreams.add(new ByteArrayInputStream(delimiter)); + } } return new SequenceInputStream(Collections.enumeration(inputStreams)); } - return null; + return new ByteArrayInputStream(new byte[0]); } /** - * Returns true if this page has contents. + * Returns true if this page has one or more content streams. */ public boolean hasContents() { @@ -188,13 +201,12 @@ public PDResources getResources() { if (pageResources == null) { - COSDictionary resources = (COSDictionary) - PDPageTree.getInheritableAttribute(page, COSName.RESOURCES); + COSBase base = PDPageTree.getInheritableAttribute(page, COSName.RESOURCES); // note: it's an error for resources to not be present - if (resources != null) + if (base instanceof COSDictionary) { - pageResources = new PDResources(resources, resourceCache); + pageResources = new PDResources((COSDictionary) base, resourceCache); } } return pageResources; @@ -221,11 +233,12 @@ public void setResources(PDResources resources) /** * This will get the key of this Page in the structural parent tree. * - * @return the integer key of the page's entry in the structural parent tree + * @return the integer key of the page's entry in the structural parent tree or -1 if + * there isn't any. */ public int getStructParents() { - return page.getInt(COSName.STRUCT_PARENTS, 0); + return page.getInt(COSName.STRUCT_PARENTS); } /** @@ -252,17 +265,19 @@ public Matrix getMatrix() } /** - * A rectangle, expressed in default user space units, defining the boundaries of the physical - * medium on which the page is intended to be displayed or printed. + * A rectangle, expressed in default user space units, defining the boundaries of the physical medium on which the + * page is intended to be displayed or printed. + * + * @return the media box. */ public PDRectangle getMediaBox() { if (mediaBox == null) { - COSArray array = (COSArray) PDPageTree.getInheritableAttribute(page, COSName.MEDIA_BOX); - if (array != null) + COSBase base = PDPageTree.getInheritableAttribute(page, COSName.MEDIA_BOX); + if (base instanceof COSArray) { - mediaBox = new PDRectangle(array); + mediaBox = new PDRectangle((COSArray) base); } } if (mediaBox == null) @@ -292,16 +307,17 @@ public void setMediaBox(PDRectangle mediaBox) } /** - * A rectangle, expressed in default user space units, defining the visible region of default - * user space. When the page is displayed or printed, its contents are to be clipped (cropped) - * to this rectangle. + * A rectangle, expressed in default user space units, defining the visible region of default user space. When the + * page is displayed or printed, its contents are to be clipped (cropped) to this rectangle. + * + * @return the crop box. */ public PDRectangle getCropBox() { - COSArray array = (COSArray) PDPageTree.getInheritableAttribute(page, COSName.CROP_BOX); - if (array != null) + COSBase base = PDPageTree.getInheritableAttribute(page, COSName.CROP_BOX); + if (base instanceof COSArray) { - return clipToMediaBox(new PDRectangle(array)); + return clipToMediaBox(new PDRectangle((COSArray) base)); } else { @@ -335,17 +351,15 @@ public void setCropBox(PDRectangle cropBox) */ public PDRectangle getBleedBox() { - PDRectangle retval; - COSArray array = (COSArray) page.getDictionaryObject(COSName.BLEED_BOX); - if (array != null) + COSBase base = page.getDictionaryObject(COSName.BLEED_BOX); + if (base instanceof COSArray) { - retval = clipToMediaBox(new PDRectangle(array)); + return clipToMediaBox(new PDRectangle((COSArray) base)); } else { - retval = getCropBox(); + return getCropBox(); } - return retval; } /** @@ -373,17 +387,15 @@ public void setBleedBox(PDRectangle bleedBox) */ public PDRectangle getTrimBox() { - PDRectangle retval; - COSArray array = (COSArray) page.getDictionaryObject(COSName.TRIM_BOX); - if (array != null) + COSBase base = page.getDictionaryObject(COSName.TRIM_BOX); + if (base instanceof COSArray) { - retval = clipToMediaBox(new PDRectangle(array)); + return clipToMediaBox(new PDRectangle((COSArray) base)); } else { - retval = getCropBox(); + return getCropBox(); } - return retval; } /** @@ -412,17 +424,15 @@ public void setTrimBox(PDRectangle trimBox) */ public PDRectangle getArtBox() { - PDRectangle retval; - COSArray array = (COSArray) page.getDictionaryObject(COSName.ART_BOX); - if (array != null) + COSBase base = page.getDictionaryObject(COSName.ART_BOX); + if (base instanceof COSArray) { - retval = clipToMediaBox(new PDRectangle(array)); + return clipToMediaBox(new PDRectangle((COSArray) base)); } else { - retval = getCropBox(); + return getCropBox(); } - return retval; } /** @@ -514,10 +524,11 @@ public void setContents(List contents) } /** - * This will get a list of PDThreadBead objects, which are article threads in the document. - * This will return an empty list if there are no thread beads. - * - * @return A list of article threads on this page. + * This will get a list of PDThreadBead objects, which are article threads in the document. This + * will return an empty list if there are no thread beads. + * + * @return A list of article threads on this page, never null. The returned list is backed by + * the beads COSArray, so any adding or deleting in this list will change the document too. */ public List getThreadBeads() { @@ -526,20 +537,19 @@ public List getThreadBeads() { beads = new COSArray(); } - List pdObjects = new ArrayList(); + List pdObjects = new ArrayList(beads.size()); for (int i = 0; i < beads.size(); i++) { - COSDictionary beadDic = (COSDictionary) beads.getObject(i); + COSBase base = beads.getObject(i); PDThreadBead bead = null; // in some cases the bead is null - if (beadDic != null) + if (base instanceof COSDictionary) { - bead = new PDThreadBead(beadDic); + bead = new PDThreadBead((COSDictionary) base); } pdObjects.add(bead); } return new COSArrayList(pdObjects, beads); - } /** @@ -561,10 +571,10 @@ public void setThreadBeads(List beads) public PDMetadata getMetadata() { PDMetadata retval = null; - COSStream stream = (COSStream) page.getDictionaryObject(COSName.METADATA); - if (stream != null) + COSBase base = page.getDictionaryObject(COSName.METADATA); + if (base instanceof COSStream) { - retval = new PDMetadata(stream); + retval = new PDMetadata((COSStream) base); } return retval; } @@ -586,8 +596,13 @@ public void setMetadata(PDMetadata meta) */ public PDPageAdditionalActions getActions() { - COSDictionary addAct = (COSDictionary) page.getDictionaryObject(COSName.AA); - if (addAct == null) + COSDictionary addAct; + COSBase base = page.getDictionaryObject(COSName.AA); + if (base instanceof COSDictionary) + { + addAct = (COSDictionary) base; + } + else { addAct = new COSDictionary(); page.setItem(COSName.AA, addAct); @@ -610,8 +625,8 @@ public void setActions(PDPageAdditionalActions actions) */ public PDTransition getTransition() { - COSDictionary transitionDictionary = (COSDictionary) page.getDictionaryObject(COSName.TRANS); - return transitionDictionary == null ? null : new PDTransition(transitionDictionary); + COSBase base = page.getDictionaryObject(COSName.TRANS); + return base instanceof COSDictionary ? new PDTransition((COSDictionary) base) : null; } /** @@ -634,25 +649,42 @@ public void setTransition(PDTransition transition, float duration) page.setItem(COSName.TRANS, transition); page.setItem(COSName.DUR, new COSFloat(duration)); } - + /** - * This will return a list of the Annotations for this page. + * This will return a list of the annotations for this page. + * + * @return List of the PDAnnotation objects, never null. The returned list is backed by the + * annotations COSArray, so any adding or deleting in this list will change the document too. * - * @return List of the PDAnnotation objects, never null. * @throws IOException If there is an error while creating the annotation list. */ public List getAnnotations() throws IOException { - COSArrayList retval; - COSArray annots = (COSArray) page.getDictionaryObject(COSName.ANNOTS); - if (annots == null) + return getAnnotations(new AnnotationFilter() { - annots = new COSArray(); - page.setItem(COSName.ANNOTS, annots); - retval = new COSArrayList(new ArrayList(), annots); - } - else + @Override + public boolean accept(PDAnnotation annotation) + { + return true; + } + }); + } + + /** + * This will return a list of the annotations for this page. + * + * @param annotationFilter the annotation filter provided allowing to filter out specific annotations + * @return List of the PDAnnotation objects, never null. The returned list is backed by the + * annotations COSArray, so any adding or deleting in this list will change the document too. + * + * @throws IOException If there is an error while creating the annotation list. + */ + public List getAnnotations(AnnotationFilter annotationFilter) throws IOException + { + COSBase base = page.getDictionaryObject(COSName.ANNOTS); + if (base instanceof COSArray) { + COSArray annots = (COSArray) base; List actuals = new ArrayList(); for (int i = 0; i < annots.size(); i++) { @@ -661,11 +693,15 @@ public List getAnnotations() throws IOException { continue; } - actuals.add(PDAnnotation.createAnnotation(item)); + PDAnnotation createdAnnotation = PDAnnotation.createAnnotation(item); + if (annotationFilter.accept(createdAnnotation)) + { + actuals.add(createdAnnotation); + } } - retval = new COSArrayList(actuals, annots); + return new COSArrayList(actuals, annots); } - return retval; + return new COSArrayList(page, COSName.ANNOTS); } /** @@ -692,9 +728,89 @@ public int hashCode() /** * Returns the resource cache associated with this page, or null if there is none. + * + * @return the resource cache associated to this page. */ public ResourceCache getResourceCache() { return resourceCache; } + + /** + * Get the viewports. + * + * @return a list of viewports or null if there is no /VP entry. + */ + public List getViewports() + { + COSBase base = page.getDictionaryObject(COSName.VP); + if (!(base instanceof COSArray)) + { + return null; + } + COSArray array = (COSArray) base; + List viewports = new ArrayList(); + for (int i = 0; i < array.size(); ++i) + { + COSBase base2 = array.getObject(i); + if (base2 instanceof COSDictionary) + { + viewports.add(new PDViewportDictionary((COSDictionary) base2)); + } + else + { + LOG.warn("Array element " + base2 + " is skipped, must be a (viewport) dictionary"); + } + } + return viewports; + } + + /** + * Set the viewports. + * + * @param viewports A list of viewports, or null if the entry is to be deleted. + */ + public void setViewports(List viewports) + { + if (viewports == null) + { + page.removeItem(COSName.VP); + return; + } + COSArray array = new COSArray(); + for (PDViewportDictionary viewport : viewports) + { + array.add(viewport); + } + page.setItem(COSName.VP, array); + } + + /** + * Get the user unit. This is a positive number that shall give the size of default user space + * units, in multiples of 1/72 inch, or 1 if it hasn't been set. This is supported by PDF 1.6 + * and higher. + * + * @return the user unit. + */ + public float getUserUnit() + { + float userUnit = page.getFloat(COSName.USER_UNIT, 1.0f); + return userUnit > 0 ? userUnit : 1.0f; + } + + /** + * Get the user unit. This is a positive number that shall give the size of default user space + * units, in multiples of 1/72 inch. This is supported by PDF 1.6 and higher. + * + * @param userUnit + * throws IllegalArgumentException if the parameter is not positive. + */ + public void setUserUnit(float userUnit) + { + if (userUnit <= 0) + { + throw new IllegalArgumentException("User unit must be positive"); + } + page.setFloat(COSName.USER_UNIT, userUnit); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java index 860c51cc046..6f55917c3ae 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java @@ -27,6 +27,7 @@ import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSName; @@ -48,11 +49,14 @@ import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage; +import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; import org.apache.pdfbox.pdmodel.graphics.shading.PDShading; import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; import org.apache.pdfbox.util.Charsets; import org.apache.pdfbox.util.Matrix; +import org.apache.pdfbox.util.NumberFormatUtil; /** * Provides the ability to write to a page content stream. @@ -105,9 +109,13 @@ public boolean isPrepend() // number format private final NumberFormat formatDecimal = NumberFormat.getNumberInstance(Locale.US); + private final byte[] formatBuffer = new byte[32]; + + private boolean sourcePageHadContents = false; /** - * Create a new PDPage content stream. + * Create a new PDPage content stream. This constructor overwrites all existing content streams + * of this page. * * @param document The document the page is part of. * @param sourcePage The page to write the contents to. @@ -116,6 +124,10 @@ public boolean isPrepend() public PDPageContentStream(PDDocument document, PDPage sourcePage) throws IOException { this(document, sourcePage, AppendMode.OVERWRITE, true, false); + if (sourcePageHadContents) + { + LOG.warn("You are overwriting an existing content, you should use the append mode"); + } } /** @@ -137,7 +149,10 @@ public PDPageContentStream(PDDocument document, PDPage sourcePage, boolean appen } /** - * Create a new PDPage content stream. + * Create a new PDPage content stream. If the appendContent parameter is set to + * {@link AppendMode#APPEND}, you may want to use + * {@link #PDPageContentStream(PDDocument, PDPage, PDPageContentStream.AppendMode, boolean, boolean)} + * instead, with the fifth parameter set to true. * * @param document The document the page is part of. * @param sourcePage The page to write the contents to. @@ -159,7 +174,7 @@ public PDPageContentStream(PDDocument document, PDPage sourcePage, AppendMode ap * @param appendContent Indicates whether content will be overwritten. If false all previous * content is deleted. * @param compress Tell if the content stream should compress the page contents. - * @param resetContext Tell if the graphic context should be reseted. You should use this when + * @param resetContext Tell if the graphic context should be reset. You should use this when * appending to an existing stream, because the existing stream may have changed graphic * properties (e.g. scaling, rotation). * @throws IOException If there is an error writing to the page contents. @@ -247,10 +262,7 @@ public PDPageContentStream(PDDocument document, PDPage sourcePage, AppendMode ap } else { - if (sourcePage.hasContents()) - { - LOG.warn("You are overwriting an existing content, you should use the append mode"); - } + sourcePageHadContents = sourcePage.hasContents(); PDStream contents = new PDStream(document); sourcePage.setContents(contents); output = contents.createOutputStream(filter); @@ -266,7 +278,7 @@ public PDPageContentStream(PDDocument document, PDPage sourcePage, AppendMode ap } // configure NumberFormat - formatDecimal.setMaximumFractionDigits(10); + formatDecimal.setMaximumFractionDigits(5); formatDecimal.setGroupingUsed(false); } @@ -302,6 +314,46 @@ public PDPageContentStream(PDDocument doc, PDAppearanceStream appearance, Output formatDecimal.setGroupingUsed(false); } + /** + * Create a new appearance stream. Note that this is not actually a "page" content stream. + * + * @param doc The document the appearance is part of. + * @param form The XObject form to add to. + * @param outputStream The output stream to write to. + * @throws IOException If there is an error writing to the page contents. + */ + public PDPageContentStream(PDDocument doc, PDFormXObject form, OutputStream outputStream) + throws IOException + { + this.document = doc; + + output = outputStream; + this.resources = form.getResources(); + + formatDecimal.setMaximumFractionDigits(4); + formatDecimal.setGroupingUsed(false); + } + + /** + * Create a new appearance stream. Note that this is not actually a "page" content stream. + * + * @param doc The document the appearance is part of. + * @param pattern The pattern to add to. + * @param outputStream The output stream to write to. + * @throws IOException If there is an error writing to the page contents. + */ + public PDPageContentStream(PDDocument doc, PDTilingPattern pattern, OutputStream outputStream) + throws IOException + { + this.document = doc; + + output = outputStream; + this.resources = pattern.getResources(); + + formatDecimal.setMaximumFractionDigits(4); + formatDecimal.setGroupingUsed(false); + } + /** * Begin some text operations. * @@ -315,7 +367,7 @@ public void beginText() throws IOException { throw new IllegalStateException("Error: Nested beginText() calls are not allowed."); } - writeOperator("BT"); + writeOperator(OperatorName.BEGIN_TEXT); inTextMode = true; } @@ -332,7 +384,7 @@ public void endText() throws IOException { throw new IllegalStateException("Error: You must call beginText() before calling endText."); } - writeOperator("ET"); + writeOperator(OperatorName.END_TEXT); inTextMode = false; } @@ -361,7 +413,7 @@ public void setFont(PDFont font, float fontSize) throws IOException writeOperand(resources.add(font)); writeOperand(fontSize); - writeOperator("Tf"); + writeOperator(OperatorName.SET_FONT_AND_SIZE); } /** @@ -377,13 +429,63 @@ public void drawString(String text) throws IOException showText(text); } + /** + * Shows the given text at the location specified by the current text matrix with the given + * interspersed positioning. This allows the user to efficiently position each glyph or sequence + * of glyphs. + * + * @param textWithPositioningArray An array consisting of String and Float types. Each String is + * output to the page using the current text matrix. Using the default coordinate system, each + * interspersed number adjusts the current text matrix by translating to the left or down for + * horizontal and vertical text respectively. The number is expressed in thousands of a text + * space unit, and may be negative. + * + * @throws IOException if an io exception occurs. + */ + public void showTextWithPositioning(Object[] textWithPositioningArray) throws IOException + { + write("["); + for (Object obj : textWithPositioningArray) + { + if (obj instanceof String) + { + showTextInternal((String) obj); + } + else if (obj instanceof Float) + { + writeOperand((Float) obj); + } + else + { + throw new IllegalArgumentException("Argument must consist of array of Float and String types"); + } + } + write("] "); + writeOperator(OperatorName.SHOW_TEXT_ADJUSTED); + } + /** * Shows the given text at the location specified by the current text matrix. * * @param text The Unicode text to show. * @throws IOException If an io exception occurs. + * @throws IllegalArgumentException if a character isn't supported by the current font */ public void showText(String text) throws IOException + { + showTextInternal(text); + write(" "); + writeOperator(OperatorName.SHOW_TEXT); + } + + /** + * Outputs a string using the correct encoding and subsetting as required. + * + * @param text The Unicode text to show. + * + * @throws IOException If an io exception occurs. + */ + protected void showTextInternal(String text) throws IOException { if (!inTextMode) { @@ -400,7 +502,8 @@ public void showText(String text) throws IOException // Unicode code points to keep when subsetting if (font.willBeSubset()) { - for (int offset = 0; offset < text.length(); ) + int offset = 0; + while (offset < text.length()) { int codePoint = text.codePointAt(offset); font.addToSubset(codePoint); @@ -409,9 +512,6 @@ public void showText(String text) throws IOException } COSWriter.writeString(font.encode(text), output); - write(" "); - - writeOperator("Tj"); } /** @@ -419,11 +519,24 @@ public void showText(String text) throws IOException * * @param leading The leading in unscaled text units. * @throws IOException If there is an error writing to the stream. + * @deprecated use {@link #setLeading(float) setLeading(float)} */ + @Deprecated public void setLeading(double leading) throws IOException { - writeOperand((float) leading); - writeOperator("TL"); + setLeading((float) leading); + } + + /** + * Sets the text leading. + * + * @param leading The leading in unscaled text units. + * @throws IOException If there is an error writing to the stream. + */ + public void setLeading(float leading) throws IOException + { + writeOperand(leading); + writeOperator(OperatorName.SET_TEXT_LEADING); } /** @@ -438,7 +551,7 @@ public void newLine() throws IOException { throw new IllegalStateException("Must call beginText() before newLine()"); } - writeOperator("T*"); + writeOperator(OperatorName.NEXT_LINE); } /** @@ -472,7 +585,7 @@ public void newLineAtOffset(float tx, float ty) throws IOException } writeOperand(tx); writeOperand(ty); - writeOperator("Td"); + writeOperator(OperatorName.MOVE_TEXT); } /** @@ -521,7 +634,7 @@ public void setTextMatrix(Matrix matrix) throws IOException throw new IllegalStateException("Error: must call beginText() before setTextMatrix"); } writeAffineTransform(matrix.createAffineTransform()); - writeOperator("Tm"); + writeOperator(OperatorName.SET_MATRIX); } /** @@ -608,7 +721,34 @@ public void drawImage(PDImageXObject image, float x, float y, float width, float transform(new Matrix(transform)); writeOperand(resources.add(image)); - writeOperator("Do"); + writeOperator(OperatorName.DRAW_OBJECT); + + restoreGraphicsState(); + } + + /** + * Draw an image at the origin with the given transformation matrix. + * + * @param image The image to draw. + * @param matrix The transformation matrix to apply to the image. + * + * @throws IOException If there is an error writing to the stream. + * @throws IllegalStateException If the method was called within a text block. + */ + public void drawImage(PDImageXObject image, Matrix matrix) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: drawImage is not allowed within a text block."); + } + + saveGraphicsState(); + + AffineTransform transform = matrix.createAffineTransform(); + transform(new Matrix(transform)); + + writeOperand(resources.add(image)); + writeOperator(OperatorName.DRAW_OBJECT); restoreGraphicsState(); } @@ -685,7 +825,7 @@ public void drawImage(PDInlineImage inlineImage, float x, float y, float width, // create the image dictionary StringBuilder sb = new StringBuilder(); - sb.append("BI"); + sb.append(OperatorName.BEGIN_INLINE_IMAGE); sb.append("\n /W "); sb.append(inlineImage.getWidth()); @@ -697,11 +837,12 @@ public void drawImage(PDInlineImage inlineImage, float x, float y, float width, sb.append("/"); sb.append(inlineImage.getColorSpace().getName()); - if (inlineImage.getDecode() != null && inlineImage.getDecode().size() > 0) + COSArray decodeArray = inlineImage.getDecode(); + if (decodeArray != null && decodeArray.size() > 0) { sb.append("\n /D "); sb.append("["); - for (COSBase base : inlineImage.getDecode()) + for (COSBase base : decodeArray) { sb.append(((COSNumber) base).intValue()); sb.append(" "); @@ -722,10 +863,10 @@ public void drawImage(PDInlineImage inlineImage, float x, float y, float width, writeLine(); // binary data - writeOperator("ID"); + writeOperator(OperatorName.BEGIN_INLINE_IMAGE_DATA); writeBytes(inlineImage.getData()); writeLine(); - writeOperator("EI"); + writeOperator(OperatorName.END_INLINE_IMAGE); restoreGraphicsState(); } @@ -757,7 +898,9 @@ public void drawXObject(PDXObject xobject, float x, float y, float width, float * @param transform the transformation matrix * @throws IOException If there is an error writing to the stream. * @throws IllegalStateException If the method was called within a text block. - * @deprecated Use {@link #drawImage} or {@link #drawForm} instead. + * @deprecated Use {@link #drawImage(PDImageXObject, Matrix) drawImage(PDImageXObject, Matrix)} + * or {@link #drawForm(PDFormXObject) drawForm(PDFormXObject)} with + * {@link #transform(Matrix) transform(Matrix)} instead. */ @Deprecated public void drawXObject(PDXObject xobject, AffineTransform transform) throws IOException @@ -782,7 +925,7 @@ public void drawXObject(PDXObject xobject, AffineTransform transform) throws IOE transform(new Matrix(transform)); writeOperand(objMapping); - writeOperator("Do"); + writeOperator(OperatorName.DRAW_OBJECT); restoreGraphicsState(); } @@ -802,7 +945,7 @@ public void drawForm(PDFormXObject form) throws IOException } writeOperand(resources.add(form)); - writeOperator("Do"); + writeOperator(OperatorName.DRAW_OBJECT); } /** @@ -836,15 +979,23 @@ public void concatenate2CTM(AffineTransform at) throws IOException } /** - * The cm operator. Concatenates the given matrix with the CTM. + * The cm operator. Concatenates the given matrix with the current transformation matrix (CTM), + * which maps user space coordinates used within a PDF content stream into output device + * coordinates. More details on coordinates can be found in the PDF 32000 specification, 8.3.2 + * Coordinate Spaces. * * @param matrix the transformation matrix * @throws IOException If there is an error writing to the stream. */ public void transform(Matrix matrix) throws IOException { + if (inTextMode) + { + LOG.warn("Modifying the current transformation matrix is not allowed within text objects."); + } + writeAffineTransform(matrix.createAffineTransform()); - writeOperator("cm"); + writeOperator(OperatorName.CONCAT); } /** @@ -853,6 +1004,11 @@ public void transform(Matrix matrix) throws IOException */ public void saveGraphicsState() throws IOException { + if (inTextMode) + { + LOG.warn("Saving the graphics state is not allowed within text objects."); + } + if (!fontStack.isEmpty()) { fontStack.push(fontStack.peek()); @@ -865,7 +1021,7 @@ public void saveGraphicsState() throws IOException { nonStrokingColorSpaceStack.push(nonStrokingColorSpaceStack.peek()); } - writeOperator("q"); + writeOperator(OperatorName.SAVE); } /** @@ -874,6 +1030,11 @@ public void saveGraphicsState() throws IOException */ public void restoreGraphicsState() throws IOException { + if (inTextMode) + { + LOG.warn("Restoring the graphics state is not allowed within text objects."); + } + if (!fontStack.isEmpty()) { fontStack.pop(); @@ -886,7 +1047,7 @@ public void restoreGraphicsState() throws IOException { nonStrokingColorSpaceStack.pop(); } - writeOperator("Q"); + writeOperator(OperatorName.RESTORE); } /** @@ -902,7 +1063,7 @@ public void setStrokingColorSpace(PDColorSpace colorSpace) throws IOException { setStrokingColorSpaceStack(colorSpace); writeOperand(getName(colorSpace)); - writeOperator("CS"); + writeOperator(OperatorName.STROKING_COLORSPACE); } /** @@ -911,14 +1072,14 @@ public void setStrokingColorSpace(PDColorSpace colorSpace) throws IOException * * @param colorSpace The colorspace to write. * @throws IOException If there is an error writing the colorspace. - * @deprecated Use {@link #setNonStrokingColor} instead. + * @deprecated Use {@link #setNonStrokingColor(PDColor)} instead. */ @Deprecated public void setNonStrokingColorSpace(PDColorSpace colorSpace) throws IOException { setNonStrokingColorSpaceStack(colorSpace); writeOperand(getName(colorSpace)); - writeOperator("cs"); + writeOperator(OperatorName.NON_STROKING_COLORSPACE); } private COSName getName(PDColorSpace colorSpace) throws IOException @@ -947,7 +1108,7 @@ public void setStrokingColor(PDColor color) throws IOException strokingColorSpaceStack.peek() != color.getColorSpace()) { writeOperand(getName(color.getColorSpace())); - writeOperator("CS"); + writeOperator(OperatorName.STROKING_COLORSPACE); setStrokingColorSpaceStack(color.getColorSpace()); } @@ -966,11 +1127,11 @@ public void setStrokingColor(PDColor color) throws IOException color.getColorSpace() instanceof PDDeviceN || color.getColorSpace() instanceof PDICCBased) { - writeOperator("SCN"); + writeOperator(OperatorName.STROKING_COLOR_N); } else { - writeOperator("SC"); + writeOperator(OperatorName.STROKING_COLOR); } } @@ -1003,9 +1164,9 @@ public void setStrokingColor(float[] components) throws IOException throw new IllegalStateException("The color space must be set before setting a color"); } - for (int i = 0; i < components.length; i++) + for (float component : components) { - writeOperand(components[i]); + writeOperand(component); } PDColorSpace currentStrokingColorSpace = strokingColorSpaceStack.peek(); @@ -1014,12 +1175,35 @@ public void setStrokingColor(float[] components) throws IOException currentStrokingColorSpace instanceof PDPattern || currentStrokingColorSpace instanceof PDICCBased) { - writeOperator("SCN"); + writeOperator(OperatorName.STROKING_COLOR_N); } else { - writeOperator("SC"); + writeOperator(OperatorName.STROKING_COLOR); + } + } + + /** + * Set the stroking color in the DeviceRGB color space. Range is 0..1. + * + * @param r The red value + * @param g The green value. + * @param b The blue value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameters are invalid. + */ + public void setStrokingColor(float r, float g, float b) throws IOException + { + if (isOutsideOneInterval(r) || isOutsideOneInterval(g) || isOutsideOneInterval(b)) + { + throw new IllegalArgumentException("Parameters must be within 0..1, but are " + + String.format("(%.2f,%.2f,%.2f)", r, g, b)); } + writeOperand(r); + writeOperand(g); + writeOperand(b); + writeOperator(OperatorName.STROKING_COLOR_RGB); + setStrokingColorSpaceStack(PDDeviceRGB.INSTANCE); } /** @@ -1030,7 +1214,10 @@ public void setStrokingColor(float[] components) throws IOException * @param b The blue value. * @throws IOException If an IO error occurs while writing to the stream. * @throws IllegalArgumentException If the parameters are invalid. + * @deprecated use + * {@link #setStrokingColor(float, float, float) setStrokingColor(r/255f, g/255f, b/255f)} */ + @Deprecated public void setStrokingColor(int r, int g, int b) throws IOException { if (isOutside255Interval(r) || isOutside255Interval(g) || isOutside255Interval(b)) @@ -1038,11 +1225,7 @@ public void setStrokingColor(int r, int g, int b) throws IOException throw new IllegalArgumentException("Parameters must be within 0..255, but are " + String.format("(%d,%d,%d)", r, g, b)); } - writeOperand(r / 255f); - writeOperand(g / 255f); - writeOperand(b / 255f); - writeOperator("RG"); - setStrokingColorSpaceStack(PDDeviceRGB.INSTANCE); + setStrokingColor(r / 255f, g / 255f, b / 255f); } /** @@ -1054,7 +1237,7 @@ public void setStrokingColor(int r, int g, int b) throws IOException * @param k The black value. * @throws IOException If an IO error occurs while writing to the stream. * @throws IllegalArgumentException If the parameters are invalid. - * @deprecated Use {@link #setStrokingColor(float, float, float, float)} instead. + * @deprecated Use {@link #setStrokingColor(float, float, float, float) setStrokingColor(c/255f, m/255f, y/255f, k/255f)} instead. */ @Deprecated public void setStrokingColor(int c, int m, int y, int k) throws IOException @@ -1088,7 +1271,7 @@ public void setStrokingColor(float c, float m, float y, float k) throws IOExcept writeOperand(m); writeOperand(y); writeOperand(k); - writeOperator("K"); + writeOperator(OperatorName.STROKING_COLOR_CMYK); setStrokingColorSpaceStack(PDDeviceCMYK.INSTANCE); } @@ -1098,7 +1281,7 @@ public void setStrokingColor(float c, float m, float y, float k) throws IOExcept * @param g The gray value. * @throws IOException If an IO error occurs while writing to the stream. * @throws IllegalArgumentException If the parameter is invalid. - * @deprecated Use {@link #setStrokingColor(double)} instead. + * @deprecated Use {@link #setStrokingColor(float) setStrokingColor(g/255f)} instead. */ @Deprecated public void setStrokingColor(int g) throws IOException @@ -1116,15 +1299,29 @@ public void setStrokingColor(int g) throws IOException * @param g The gray value. * @throws IOException If an IO error occurs while writing to the stream. * @throws IllegalArgumentException If the parameter is invalid. + * @deprecated use {@link #setStrokingColor(float) setStrokingColor(float)} */ + @Deprecated public void setStrokingColor(double g) throws IOException + { + setStrokingColor((float) g); + } + + /** + * Set the stroking color in the DeviceGray color space. Range is 0..1. + * + * @param g The gray value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameter is invalid. + */ + public void setStrokingColor(float g) throws IOException { if (isOutsideOneInterval(g)) { throw new IllegalArgumentException("Parameter must be within 0..1, but is " + g); } - writeOperand((float) g); - writeOperator("G"); + writeOperand(g); + writeOperator(OperatorName.STROKING_COLOR_GRAY); setStrokingColorSpaceStack(PDDeviceGray.INSTANCE); } @@ -1140,7 +1337,7 @@ public void setNonStrokingColor(PDColor color) throws IOException nonStrokingColorSpaceStack.peek() != color.getColorSpace()) { writeOperand(getName(color.getColorSpace())); - writeOperator("cs"); + writeOperator(OperatorName.NON_STROKING_COLORSPACE); setNonStrokingColorSpaceStack(color.getColorSpace()); } @@ -1159,11 +1356,11 @@ public void setNonStrokingColor(PDColor color) throws IOException color.getColorSpace() instanceof PDDeviceN || color.getColorSpace() instanceof PDICCBased) { - writeOperator("scn"); + writeOperator(OperatorName.NON_STROKING_COLOR_N); } else { - writeOperator("sc"); + writeOperator(OperatorName.NON_STROKING_COLOR); } } @@ -1196,9 +1393,9 @@ public void setNonStrokingColor(float[] components) throws IOException throw new IllegalStateException("The color space must be set before setting a color"); } - for (int i = 0; i < components.length; i++) + for (float component : components) { - writeOperand(components[i]); + writeOperand(component); } PDColorSpace currentNonStrokingColorSpace = nonStrokingColorSpaceStack.peek(); @@ -1207,11 +1404,11 @@ public void setNonStrokingColor(float[] components) throws IOException currentNonStrokingColorSpace instanceof PDPattern || currentNonStrokingColorSpace instanceof PDICCBased) { - writeOperator("scn"); + writeOperator(OperatorName.NON_STROKING_COLOR_N); } else { - writeOperator("sc"); + writeOperator(OperatorName.NON_STROKING_COLOR); } } @@ -1224,6 +1421,32 @@ public void setNonStrokingColor(float[] components) throws IOException * @throws IOException If an IO error occurs while writing to the stream. * @throws IllegalArgumentException If the parameters are invalid. */ + public void setNonStrokingColor(float r, float g, float b) throws IOException + { + if (isOutsideOneInterval(r) || isOutsideOneInterval(g) || isOutsideOneInterval(b)) + { + throw new IllegalArgumentException("Parameters must be within 0..1, but are " + + String.format("(%.2f,%.2f,%.2f)", r, g, b)); + } + writeOperand(r); + writeOperand(g); + writeOperand(b); + writeOperator(OperatorName.NON_STROKING_RGB); + setNonStrokingColorSpaceStack(PDDeviceRGB.INSTANCE); + } + + /** + * Set the non stroking color in the DeviceRGB color space. Range is 0..255. + * + * @param r The red value + * @param g The green value. + * @param b The blue value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameters are invalid. + * @deprecated use + * {@link #setNonStrokingColor(float, float, float) setNonStrokingColor(r/255f, g/255f, b/255f)} + */ + @Deprecated public void setNonStrokingColor(int r, int g, int b) throws IOException { if (isOutside255Interval(r) || isOutside255Interval(g) || isOutside255Interval(b)) @@ -1231,11 +1454,7 @@ public void setNonStrokingColor(int r, int g, int b) throws IOException throw new IllegalArgumentException("Parameters must be within 0..255, but are " + String.format("(%d,%d,%d)", r, g, b)); } - writeOperand(r / 255f); - writeOperand(g / 255f); - writeOperand(b / 255f); - writeOperator("rg"); - setNonStrokingColorSpaceStack(PDDeviceRGB.INSTANCE); + setNonStrokingColor(r / 255f, g / 255f, b / 255f); } /** @@ -1259,26 +1478,43 @@ public void setNonStrokingColor(int c, int m, int y, int k) throws IOException } /** - * Set the non-stroking color in the DeviceRGB color space. Range is 0..1. + * Set the non-stroking color in the DeviceCMYK color space. Range is 0..1. * * @param c The cyan value. * @param m The magenta value. * @param y The yellow value. * @param k The black value. * @throws IOException If an IO error occurs while writing to the stream. + * @deprecated use + * {@link #setNonStrokingColor(float, float, float, float) setNonStrokingColor(float, float, float, float)} */ + @Deprecated public void setNonStrokingColor(double c, double m, double y, double k) throws IOException + { + setNonStrokingColor((float) c, (float) m, (float) y, (float) k); + } + + /** + * Set the non-stroking color in the DeviceCMYK color space. Range is 0..1. + * + * @param c The cyan value. + * @param m The magenta value. + * @param y The yellow value. + * @param k The black value. + * @throws IOException If an IO error occurs while writing to the stream. + */ + public void setNonStrokingColor(float c, float m, float y, float k) throws IOException { if (isOutsideOneInterval(c) || isOutsideOneInterval(m) || isOutsideOneInterval(y) || isOutsideOneInterval(k)) { throw new IllegalArgumentException("Parameters must be within 0..1, but are " + String.format("(%.2f,%.2f,%.2f,%.2f)", c, m, y, k)); } - writeOperand((float) c); - writeOperand((float) m); - writeOperand((float) y); - writeOperand((float) k); - writeOperator("k"); + writeOperand(c); + writeOperand(m); + writeOperand(y); + writeOperand(k); + writeOperator(OperatorName.NON_STROKING_CMYK); setNonStrokingColorSpaceStack(PDDeviceCMYK.INSTANCE); } @@ -1288,6 +1524,7 @@ public void setNonStrokingColor(double c, double m, double y, double k) throws I * @param g The gray value. * @throws IOException If an IO error occurs while writing to the stream. * @throws IllegalArgumentException If the parameter is invalid. + * @deprecated use {@link #setNonStrokingColor(float) setNonStrokingColor(g/255f)} */ public void setNonStrokingColor(int g) throws IOException { @@ -1304,15 +1541,29 @@ public void setNonStrokingColor(int g) throws IOException * @param g The gray value. * @throws IOException If an IO error occurs while writing to the stream. * @throws IllegalArgumentException If the parameter is invalid. + * @deprecated use {@link #setNonStrokingColor(float) setNonStrokingColor(float)} */ + @Deprecated public void setNonStrokingColor(double g) throws IOException + { + setNonStrokingColor((float) g); + } + + /** + * Set the non-stroking color in the DeviceGray color space. Range is 0..1. + * + * @param g The gray value. + * @throws IOException If an IO error occurs while writing to the stream. + * @throws IllegalArgumentException If the parameter is invalid. + */ + public void setNonStrokingColor(float g) throws IOException { if (isOutsideOneInterval(g)) { throw new IllegalArgumentException("Parameter must be within 0..1, but is " + g); } - writeOperand((float) g); - writeOperator("g"); + writeOperand(g); + writeOperator(OperatorName.NON_STROKING_GRAY); setNonStrokingColorSpaceStack(PDDeviceGray.INSTANCE); } @@ -1336,11 +1587,11 @@ public void addRect(float x, float y, float width, float height) throws IOExcept writeOperand(y); writeOperand(width); writeOperand(height); - writeOperator("re"); + writeOperator(OperatorName.APPEND_RECT); } /** - * Draw a rectangle on the page using the current non stroking color. + * Fill a rectangle on the page using the current non stroking color. * * @param x The lower left x coordinate. * @param y The lower left y coordinate. @@ -1404,7 +1655,7 @@ public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) writeOperand(y2); writeOperand(x3); writeOperand(y3); - writeOperator("c"); + writeOperator(OperatorName.CURVE_TO); } /** @@ -1445,7 +1696,7 @@ public void curveTo2(float x2, float y2, float x3, float y3) throws IOException writeOperand(y2); writeOperand(x3); writeOperand(y3); - writeOperator("v"); + writeOperator(OperatorName.CURVE_TO_REPLICATE_INITIAL_POINT); } /** @@ -1486,7 +1737,7 @@ public void curveTo1(float x1, float y1, float x3, float y3) throws IOException writeOperand(y1); writeOperand(x3); writeOperand(y3); - writeOperator("y"); + writeOperator(OperatorName.CURVE_TO_REPLICATE_FINAL_POINT); } /** @@ -1505,7 +1756,7 @@ public void moveTo(float x, float y) throws IOException } writeOperand(x); writeOperand(y); - writeOperator("m"); + writeOperator(OperatorName.MOVE_TO); } /** @@ -1524,7 +1775,7 @@ public void lineTo(float x, float y) throws IOException } writeOperand(x); writeOperand(y); - writeOperator("l"); + writeOperator(OperatorName.LINE_TO); } /** @@ -1536,7 +1787,8 @@ public void lineTo(float x, float y) throws IOException * @param yEnd The end y coordinate. * @throws IOException If there is an error while adding the line. * @throws IllegalStateException If the method was called within a text block. - * @deprecated Use {@link #moveTo} followed by {@link #lineTo}. + * @deprecated Use {@link #moveTo moveto(xStart,yStart)} followed by + * {@link #lineTo lineTo(xEnd,yEnd)}. */ @Deprecated public void addLine(float xStart, float yStart, float xEnd, float yEnd) throws IOException @@ -1550,7 +1802,7 @@ public void addLine(float xStart, float yStart, float xEnd, float yEnd) throws I } /** - * Draw a line on the page using the current non stroking color and the current line width. + * Draw a line on the page using the current stroking color and the current line width. * * @param xStart The start x coordinate. * @param yStart The start y coordinate. @@ -1558,7 +1810,8 @@ public void addLine(float xStart, float yStart, float xEnd, float yEnd) throws I * @param yEnd The end y coordinate. * @throws IOException If there is an error while drawing on the screen. * @throws IllegalStateException If the method was called within a text block. - * @deprecated Use {@link #moveTo} followed by {@link #lineTo} followed by {@link #stroke}. + * @deprecated Use {@link #moveTo moveto(xStart,yStart)} followed by + * {@link #lineTo lineTo(xEnd,yEnd)} followed by {@link #stroke stroke()}. */ @Deprecated public void drawLine(float xStart, float yStart, float xEnd, float yEnd) throws IOException @@ -1607,7 +1860,7 @@ public void addPolygon(float[] x, float[] y) throws IOException } /** - * Draw a polygon on the page using the current non stroking color. + * Draw a polygon on the page using the current stroking color. * @param x x coordinate of each points * @param y y coordinate of each points * @throws IOException If there is an error while drawing on the screen. @@ -1626,7 +1879,7 @@ public void drawPolygon(float[] x, float[] y) throws IOException } /** - * Draw and fill a polygon on the page using the current non stroking color. + * Draw and fill a polygon on the page using the current stroking / non stroking colors. * @param x x coordinate of each points * @param y y coordinate of each points * @throws IOException If there is an error while drawing on the screen. @@ -1656,7 +1909,7 @@ public void stroke() throws IOException { throw new IllegalStateException("Error: stroke is not allowed within a text block."); } - writeOperator("S"); + writeOperator(OperatorName.STROKE_PATH); } /** @@ -1671,7 +1924,7 @@ public void closeAndStroke() throws IOException { throw new IllegalStateException("Error: closeAndStroke is not allowed within a text block."); } - writeOperator("s"); + writeOperator(OperatorName.CLOSE_AND_STROKE); } /** @@ -1711,7 +1964,7 @@ public void fill() throws IOException { throw new IllegalStateException("Error: fill is not allowed within a text block."); } - writeOperator("f"); + writeOperator(OperatorName.FILL_NON_ZERO); } /** @@ -1726,7 +1979,7 @@ public void fillEvenOdd() throws IOException { throw new IllegalStateException("Error: fillEvenOdd is not allowed within a text block."); } - writeOperator("f*"); + writeOperator(OperatorName.FILL_EVEN_ODD); } /** @@ -1743,7 +1996,7 @@ public void fillAndStroke() throws IOException { throw new IllegalStateException("Error: fillAndStroke is not allowed within a text block."); } - writeOperator("B"); + writeOperator(OperatorName.FILL_NON_ZERO_AND_STROKE); } /** @@ -1760,7 +2013,7 @@ public void fillAndStrokeEvenOdd() throws IOException { throw new IllegalStateException("Error: fillAndStrokeEvenOdd is not allowed within a text block."); } - writeOperator("B*"); + writeOperator(OperatorName.FILL_EVEN_ODD_AND_STROKE); } /** @@ -1777,7 +2030,7 @@ public void closeAndFillAndStroke() throws IOException { throw new IllegalStateException("Error: closeAndFillAndStroke is not allowed within a text block."); } - writeOperator("b"); + writeOperator(OperatorName.CLOSE_FILL_NON_ZERO_AND_STROKE); } /** @@ -1794,7 +2047,7 @@ public void closeAndFillAndStrokeEvenOdd() throws IOException { throw new IllegalStateException("Error: closeAndFillAndStrokeEvenOdd is not allowed within a text block."); } - writeOperator("b*"); + writeOperator(OperatorName.CLOSE_FILL_EVEN_ODD_AND_STROKE); } /** @@ -1812,7 +2065,7 @@ public void shadingFill(PDShading shading) throws IOException } writeOperand(resources.add(shading)); - writeOperator("sh"); + writeOperator(OperatorName.SHADING_FILL); } /** @@ -1839,7 +2092,7 @@ public void closePath() throws IOException { throw new IllegalStateException("Error: closePath is not allowed within a text block."); } - writeOperator("h"); + writeOperator(OperatorName.CLOSE_PATH); } /** @@ -1859,17 +2112,17 @@ public void clipPath(int windingRule) throws IOException } if (windingRule == PathIterator.WIND_NON_ZERO) { - writeOperator("W"); + writeOperator(OperatorName.CLIP_NON_ZERO); } else if (windingRule == PathIterator.WIND_EVEN_ODD) { - writeOperator("W*"); + writeOperator(OperatorName.CLIP_EVEN_ODD); } else { throw new IllegalArgumentException("Error: unknown value for winding rule"); } - writeOperator("n"); + writeOperator(OperatorName.ENDPATH); } /** @@ -1884,10 +2137,10 @@ public void clip() throws IOException { throw new IllegalStateException("Error: clip is not allowed within a text block."); } - writeOperator("W"); + writeOperator(OperatorName.CLIP_NON_ZERO); // end path without filling or stroking - writeOperator("n"); + writeOperator(OperatorName.ENDPATH); } /** @@ -1902,16 +2155,16 @@ public void clipEvenOdd() throws IOException { throw new IllegalStateException("Error: clipEvenOdd is not allowed within a text block."); } - writeOperator("W*"); + writeOperator(OperatorName.CLIP_EVEN_ODD); // end path without filling or stroking - writeOperator("n"); + writeOperator(OperatorName.ENDPATH); } /** * Set line width to the given value. * - * @param lineWidth The width which is used for drwaing. + * @param lineWidth The width which is used for drawing. * @throws IOException If the content stream could not be written * @throws IllegalStateException If the method was called within a text block. */ @@ -1922,7 +2175,7 @@ public void setLineWidth(float lineWidth) throws IOException throw new IllegalStateException("Error: setLineWidth is not allowed within a text block."); } writeOperand(lineWidth); - writeOperator("w"); + writeOperator(OperatorName.SET_LINE_WIDTH); } /** @@ -1942,7 +2195,7 @@ public void setLineJoinStyle(int lineJoinStyle) throws IOException if (lineJoinStyle >= 0 && lineJoinStyle <= 2) { writeOperand(lineJoinStyle); - writeOperator("j"); + writeOperator(OperatorName.SET_LINE_JOINSTYLE); } else { @@ -1967,7 +2220,7 @@ public void setLineCapStyle(int lineCapStyle) throws IOException if (lineCapStyle >= 0 && lineCapStyle <= 2) { writeOperand(lineCapStyle); - writeOperator("J"); + writeOperator(OperatorName.SET_LINE_CAPSTYLE); } else { @@ -1996,7 +2249,27 @@ public void setLineDashPattern(float[] pattern, float phase) throws IOException } write("] "); writeOperand(phase); - writeOperator("d"); + writeOperator(OperatorName.SET_LINE_DASHPATTERN); + } + + /** + * Set the miter limit. + * + * @param miterLimit the new miter limit. + * @throws IOException If the content stream could not be written. + */ + public void setMiterLimit(float miterLimit) throws IOException + { + if (inTextMode) + { + throw new IllegalStateException("Error: setMiterLimit is not allowed within a text block."); + } + if (miterLimit <= 0.0) + { + throw new IllegalArgumentException("A miter limit <= 0 is invalid and will not render in Acrobat Reader"); + } + writeOperand(miterLimit); + writeOperator(OperatorName.SET_LINE_MITERLIMIT); } /** @@ -2021,7 +2294,7 @@ public void beginMarkedContentSequence(COSName tag) throws IOException public void beginMarkedContent(COSName tag) throws IOException { writeOperand(tag); - writeOperator("BMC"); + writeOperator(OperatorName.BEGIN_MARKED_CONTENT); } /** @@ -2038,7 +2311,7 @@ public void beginMarkedContentSequence(COSName tag, COSName propsName) throws IO { writeOperand(tag); writeOperand(propsName); - writeOperator("BDC"); + writeOperator(OperatorName.BEGIN_MARKED_CONTENT_SEQ); } /** @@ -2053,7 +2326,7 @@ public void beginMarkedContent(COSName tag, PDPropertyList propertyList) throws { writeOperand(tag); writeOperand(resources.add(propertyList)); - writeOperator("BDC"); + writeOperator(OperatorName.BEGIN_MARKED_CONTENT_SEQ); } /** @@ -2075,7 +2348,7 @@ public void endMarkedContentSequence() throws IOException */ public void endMarkedContent() throws IOException { - writeOperator("EMC"); + writeOperator(OperatorName.END_MARKED_CONTENT); } /** @@ -2083,7 +2356,7 @@ public void endMarkedContent() throws IOException * * @param commands The commands to append to the stream. * @throws IOException If an error occurs while writing to the stream. - * @deprecated This method will be removed in a future release. + * @deprecated Usage of this method is discouraged. */ @Deprecated public void appendRawCommands(String commands) throws IOException @@ -2096,7 +2369,7 @@ public void appendRawCommands(String commands) throws IOException * * @param commands The commands to append to the stream. * @throws IOException If an error occurs while writing to the stream. - * @deprecated This method will be removed in a future release. + * @deprecated Usage of this method is discouraged. */ @Deprecated public void appendRawCommands(byte[] commands) throws IOException @@ -2109,7 +2382,7 @@ public void appendRawCommands(byte[] commands) throws IOException * * @param data Append a raw byte to the stream. * @throws IOException If an error occurs while writing to the stream. - * @deprecated This method will be removed in a future release. + * @deprecated Usage of this method is discouraged. */ @Deprecated public void appendRawCommands(int data) throws IOException @@ -2122,7 +2395,7 @@ public void appendRawCommands(int data) throws IOException * * @param data Append a formatted double value to the stream. * @throws IOException If an error occurs while writing to the stream. - * @deprecated This method will be removed in a future release. + * @deprecated Usage of this method is discouraged. */ @Deprecated public void appendRawCommands(double data) throws IOException @@ -2135,7 +2408,7 @@ public void appendRawCommands(double data) throws IOException * * @param data Append a formatted float value to the stream. * @throws IOException If an error occurs while writing to the stream. - * @deprecated This method will be removed in a future release. + * @deprecated Usage of this method is discouraged. */ @Deprecated public void appendRawCommands(float data) throws IOException @@ -2148,7 +2421,7 @@ public void appendRawCommands(float data) throws IOException * * @param name the name * @throws IOException If an error occurs while writing to the stream. - * @deprecated This method will be removed in a future release. + * @deprecated Usage of this method is discouraged. */ @Deprecated public void appendCOSName(COSName name) throws IOException @@ -2165,20 +2438,60 @@ public void appendCOSName(COSName name) throws IOException public void setGraphicsStateParameters(PDExtendedGraphicsState state) throws IOException { writeOperand(resources.add(state)); - writeOperator("gs"); + writeOperator(OperatorName.SET_GRAPHICS_STATE_PARAMS); } /** - * Writes a real real to the content stream. + * Write a comment line. + * + * @param comment the comment to be added to the content stream. + * + * @throws IOException If the content stream could not be written. + * @throws IllegalArgumentException If the comment contains a newline. This is not allowed, because the next line + * could be ordinary PDF content. */ - private void writeOperand(float real) throws IOException + public void addComment(String comment) throws IOException { - write(formatDecimal.format(real)); - output.write(' '); + if (comment.indexOf('\n') >= 0 || comment.indexOf('\r') >= 0) + { + throw new IllegalArgumentException("comment should not include a newline"); + } + output.write('%'); + output.write(comment.getBytes(Charsets.US_ASCII)); + output.write('\n'); } /** * Writes a real number to the content stream. + * + * @param real the float value to be added to the content stream. + * + * @throws IOException if something went wrong + * @throws IllegalArgumentException if the parameter is not a finite number + */ + protected void writeOperand(float real) throws IOException + { + if (Float.isInfinite(real) || Float.isNaN(real)) + { + throw new IllegalArgumentException(real + " is not a finite number"); + } + + int byteCount = NumberFormatUtil.formatFloatFast(real, formatDecimal.getMaximumFractionDigits(), formatBuffer); + + if (byteCount == -1) + { + //Fast formatting failed + write(formatDecimal.format(real)); + } + else + { + output.write(formatBuffer, 0, byteCount); + } + output.write(' '); + } + + /** + * Writes an integer number to the content stream. */ private void writeOperand(int integer) throws IOException { @@ -2213,7 +2526,7 @@ private void write(String text) throws IOException } /** - * Writes a string to the content stream as ASCII. + * Writes a newline to the content stream as ASCII. */ private void writeLine() throws IOException { @@ -2249,7 +2562,15 @@ private void writeAffineTransform(AffineTransform transform) throws IOException @Override public void close() throws IOException { - output.close(); + if (inTextMode) + { + LOG.warn("You did not call endText(), some viewers won't display your text"); + } + if (output != null) + { + output.close(); + output = null; + } } private boolean isOutside255Interval(int val) @@ -2285,4 +2606,76 @@ private void setNonStrokingColorSpaceStack(PDColorSpace colorSpace) nonStrokingColorSpaceStack.setElementAt(colorSpace, nonStrokingColorSpaceStack.size() - 1); } } + + /** + * Set the text rendering mode. This determines whether showing text shall cause glyph outlines + * to be stroked, filled, used as a clipping boundary, or some combination of the three. + * + * @param rm The text rendering mode. + * @throws IOException If the content stream could not be written. + */ + public void setRenderingMode(RenderingMode rm) throws IOException + { + writeOperand(rm.intValue()); + writeOperator(OperatorName.SET_TEXT_RENDERINGMODE); + } + + /** + * Set the character spacing. The value shall be added to the horizontal or vertical component + * of the glyph's displacement, depending on the writing mode. + * + * @param spacing character spacing + * @throws IOException If the content stream could not be written. + */ + public void setCharacterSpacing(float spacing) throws IOException + { + writeOperand(spacing); + writeOperator(OperatorName.SET_CHAR_SPACING); + } + + /** + * Set the word spacing. The value shall be added to the horizontal or vertical component of the + * ASCII SPACE character, depending on the writing mode. + *

+ * This will have an effect only with Type1 and TrueType fonts, not with Type0 fonts. The PDF + * specification tells why: "Word spacing shall be applied to every occurrence of the + * single-byte character code 32 in a string when using a simple font or a composite font that + * defines code 32 as a single-byte code. It shall not apply to occurrences of the byte value 32 + * in multiple-byte codes." + * + * @param spacing word spacing + * @throws IOException If the content stream could not be written. + */ + public void setWordSpacing(float spacing) throws IOException + { + writeOperand(spacing); + writeOperator(OperatorName.SET_WORD_SPACING); + } + + /** + * Set the horizontal scaling to scale / 100. + * + * @param scale number specifying the percentage of the normal width. Default value: 100 (normal + * width). + * @throws IOException If the content stream could not be written. + */ + public void setHorizontalScaling(float scale) throws IOException + { + writeOperand(scale); + writeOperator(OperatorName.SET_TEXT_HORIZONTAL_SCALING); + } + + /** + * Set the text rise value, i.e. move the baseline up or down. This is useful for drawing + * superscripts or subscripts. + * + * @param rise Specifies the distance, in unscaled text space units, to move the baseline up or + * down from its default location. 0 restores the default location. + * @throws IOException + */ + public void setTextRise(float rise) throws IOException + { + writeOperand(rise); + writeOperator(OperatorName.SET_TEXT_RISE); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageTree.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageTree.java index eef34b6e98e..36b7ee17937 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageTree.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageTree.java @@ -28,7 +28,12 @@ import org.apache.pdfbox.pdmodel.common.COSObjectable; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.NoSuchElementException; +import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; /** * The page tree, which defines the ordering of pages in the document in an efficient manner. @@ -37,6 +42,7 @@ */ public class PDPageTree implements COSObjectable, Iterable { + private static final Log LOG = LogFactory.getLog(PDPageTree.class); private final COSDictionary root; private final PDDocument document; // optional @@ -72,7 +78,7 @@ public PDPageTree(COSDictionary root) { if (root == null) { - throw new IllegalArgumentException("root cannot be null"); + throw new IllegalArgumentException("page tree root cannot be null"); } // repair bad PDFs which contain a Page dict instead of a page tree, see PDFBOX-3154 if (COSName.PAGE.equals(root.getCOSName(COSName.TYPE))) @@ -105,10 +111,14 @@ public static COSBase getInheritableAttribute(COSDictionary node, COSName key) return value; } - COSDictionary parent = (COSDictionary) node.getDictionaryObject(COSName.PARENT, COSName.P); - if (parent != null) + COSBase base = node.getDictionaryObject(COSName.PARENT, COSName.P); + if (base instanceof COSDictionary) { - return getInheritableAttribute(parent, key); + COSDictionary parent = (COSDictionary) base; + if (COSName.PAGES.equals(parent.getDictionaryObject(COSName.TYPE))) + { + return getInheritableAttribute(parent, key); + } } return null; @@ -132,7 +142,7 @@ private List getKids(COSDictionary node) { List result = new ArrayList(); - COSArray kids = (COSArray)node.getDictionaryObject(COSName.KIDS); + COSArray kids = node.getCOSArray(COSName.KIDS); if (kids == null) { // probably a malformed PDF @@ -141,7 +151,16 @@ private List getKids(COSDictionary node) for (int i = 0, size = kids.size(); i < size; i++) { - result.add((COSDictionary)kids.getObject(i)); + COSBase base = kids.getObject(i); + if (base instanceof COSDictionary) + { + result.add((COSDictionary) base); + } + else + { + LOG.warn("COSDictionary expected, but got " + + (base == null ? "null" : base.getClass().getSimpleName())); + } } return result; @@ -153,10 +172,12 @@ private List getKids(COSDictionary node) private final class PageIterator implements Iterator { private final Queue queue = new ArrayDeque(); + private Set set = new HashSet(); private PageIterator(COSDictionary node) { enqueueKids(node); + set = null; // release memory, we don't use this anymore } private void enqueueKids(COSDictionary node) @@ -166,6 +187,16 @@ private void enqueueKids(COSDictionary node) List kids = getKids(node); for (COSDictionary kid : kids) { + if (set.contains(kid)) + { + // PDFBOX-5009, PDFBOX-3953: prevent stack overflow with malformed PDFs + LOG.error("This page tree node has already been visited"); + continue; + } + else if (kid.containsKey(COSName.KIDS)) + { + set.add(kid); + } enqueueKids(kid); } } @@ -184,6 +215,10 @@ public boolean hasNext() @Override public PDPage next() { + if (!hasNext()) + { + throw new NoSuchElementException(); + } COSDictionary next = queue.poll(); sanitizeType(next); @@ -203,6 +238,10 @@ public void remove() * Returns the page at the given index. * * @param index zero-based index + * + * @throws IllegalStateException if the requested index isn't found or doesn't point to a valid + * page dictionary + * @throws IndexOutOfBoundsException if the requested index is higher than the page count */ public PDPage get(int index) { @@ -235,6 +274,8 @@ private static void sanitizeType(COSDictionary dictionary) * @param node page tree node to search * @param encountered number of pages encountered so far * @return COS dictionary of the Page object + * @throws IllegalStateException if the requested page number isn't found + * @throws IndexOutOfBoundsException if the requested page number is higher than the page count */ private COSDictionary get(int pageNum, COSDictionary node, int encountered) { @@ -277,11 +318,11 @@ private COSDictionary get(int pageNum, COSDictionary node, int encountered) } } - throw new IllegalStateException(); + throw new IllegalStateException("1-based index not found: " + pageNum); } else { - throw new IndexOutOfBoundsException("Index out of bounds: " + pageNum); + throw new IndexOutOfBoundsException("1-based index out of bounds: " + pageNum); } } else @@ -292,7 +333,7 @@ private COSDictionary get(int pageNum, COSDictionary node, int encountered) } else { - throw new IllegalStateException(); + throw new IllegalStateException("1-based index not found: " + pageNum); } } } @@ -304,8 +345,8 @@ private boolean isPageTreeNode(COSDictionary node ) { // some files such as PDFBOX-2250-229205.pdf don't have Pages set as the Type, so we have // to check for the presence of Kids too - return node.getCOSName(COSName.TYPE) == COSName.PAGES || - node.containsKey(COSName.KIDS); + return node != null && + (node.getCOSName(COSName.TYPE) == COSName.PAGES || node.containsKey(COSName.KIDS)); } /** @@ -358,13 +399,14 @@ private SearchContext(PDPage page) private void visitPage(COSDictionary current) { index++; - found = searched.equals(current); + found = searched == current; } } /** - * Returns the number of leaf nodes (page objects) that are descendants of this root within the - * page tree. + * Returns the number of leaf nodes (page objects) that are descendants of this root within the page tree. + * + * @return the number of leaf nodes. */ public int getCount() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPatternContentStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPatternContentStream.java new file mode 100644 index 00000000000..c4a6bf88e54 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPatternContentStream.java @@ -0,0 +1,39 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel; + +import java.io.IOException; + +import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; + +/** + * + * @author Tilman Hausherr + */ +public final class PDPatternContentStream extends PDAbstractContentStream +{ + /** + * Create a new tiling pattern content stream. + * + * @param pattern The tiling pattern stream to write to. + * + * @throws IOException If there is an error writing to the form contents. + */ + public PDPatternContentStream(PDTilingPattern pattern) throws IOException + { + super(null, pattern.getContentStream().createOutputStream(), pattern.getResources()); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java index 4a66c7dc946..1c41da3d8f8 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java @@ -17,7 +17,10 @@ package org.apache.pdfbox.pdmodel; import java.io.IOException; +import java.lang.ref.SoftReference; import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; @@ -48,6 +51,11 @@ public final class PDResources implements COSObjectable { private final COSDictionary resources; private final ResourceCache cache; + + // PDFBOX-3442 cache fonts that are not indirect objects, as these aren't cached in ResourceCache + // and this would result in huge memory footprint in text extraction + private final Map > directFontCache = + new HashMap>(); /** * Constructor for embedding. @@ -102,7 +110,10 @@ public COSDictionary getCOSObject() * Returns the font resource with the given name, or null if none exists. * * @param name Name of the font resource. - * @throws java.io.IOException if something went wrong. + * + * @return the font resource with the given name. + * + * @throws IOException if something went wrong. */ public PDFont getFont(COSName name) throws IOException { @@ -115,18 +126,34 @@ public PDFont getFont(COSName name) throws IOException return cached; } } + else if (indirect == null) + { + SoftReference ref = directFontCache.get(name); + if (ref != null) + { + PDFont cached = ref.get(); + if (cached != null) + { + return cached; + } + } + } PDFont font = null; - COSDictionary dict = (COSDictionary)get(COSName.FONT, name); - if (dict != null) + COSBase base = get(COSName.FONT, name); + if (base instanceof COSDictionary) { - font = PDFontFactory.createFont(dict); + font = PDFontFactory.createFont((COSDictionary) base, cache); } - if (cache != null) + if (cache != null && indirect != null) { cache.put(indirect, font); } + else if (indirect == null) + { + directFontCache.put(name, new SoftReference(font)); + } return font; } @@ -177,7 +204,7 @@ public PDColorSpace getColorSpace(COSName name, boolean wasDefault) throws IOExc } // we can't cache PDPattern, because it holds page resources, see PDFBOX-2370 - if (cache != null && !(colorSpace instanceof PDPattern)) + if (cache != null && indirect != null && !(colorSpace instanceof PDPattern)) { cache.put(indirect, colorSpace); } @@ -188,6 +215,8 @@ public PDColorSpace getColorSpace(COSName name, boolean wasDefault) throws IOExc * Returns true if the given color space name exists in these resources. * * @param name Name of the color space resource. + * + * @return true if the color space with the given name exists. */ public boolean hasColorSpace(COSName name) { @@ -195,10 +224,11 @@ public boolean hasColorSpace(COSName name) } /** - * Returns the extended graphics state resource with the given name, or null - * if none exists. + * Returns the extended graphics state resource with the given name, or null if none exists. * * @param name Name of the graphics state resource. + * + * @return the extended graphics state resource with the given name. */ public PDExtendedGraphicsState getExtGState(COSName name) { @@ -214,13 +244,13 @@ public PDExtendedGraphicsState getExtGState(COSName name) // get the instance PDExtendedGraphicsState extGState = null; - COSDictionary dict = (COSDictionary)get(COSName.EXT_G_STATE, name); - if (dict != null) + COSBase base = get(COSName.EXT_G_STATE, name); + if (base instanceof COSDictionary) { - extGState = new PDExtendedGraphicsState(dict); + extGState = new PDExtendedGraphicsState((COSDictionary) base); } - if (cache != null) + if (cache != null && indirect != null) { cache.put(indirect, extGState); } @@ -231,7 +261,10 @@ public PDExtendedGraphicsState getExtGState(COSName name) * Returns the shading resource with the given name, or null if none exists. * * @param name Name of the shading resource. - * @throws java.io.IOException if something went wrong. + * + * @return the shading resource of the given name. + * + * @throws IOException if something went wrong. */ public PDShading getShading(COSName name) throws IOException { @@ -247,13 +280,13 @@ public PDShading getShading(COSName name) throws IOException // get the instance PDShading shading = null; - COSDictionary dict = (COSDictionary)get(COSName.SHADING, name); - if (dict != null) + COSBase base = get(COSName.SHADING, name); + if (base instanceof COSDictionary) { - shading = PDShading.create(dict); + shading = PDShading.create((COSDictionary) base); } - if (cache != null) + if (cache != null && indirect != null) { cache.put(indirect, shading); } @@ -264,7 +297,10 @@ public PDShading getShading(COSName name) throws IOException * Returns the pattern resource with the given name, or null if none exists. * * @param name Name of the pattern resource. - * @throws java.io.IOException if something went wrong. + * + * @return the pattern resource of the given name. + * + * @throws IOException if something went wrong. */ public PDAbstractPattern getPattern(COSName name) throws IOException { @@ -280,13 +316,13 @@ public PDAbstractPattern getPattern(COSName name) throws IOException // get the instance PDAbstractPattern pattern = null; - COSDictionary dict = (COSDictionary)get(COSName.PATTERN, name); - if (dict != null) + COSBase base = get(COSName.PATTERN, name); + if (base instanceof COSDictionary) { - pattern = PDAbstractPattern.create(dict); + pattern = PDAbstractPattern.create((COSDictionary) base, getResourceCache()); } - if (cache != null) + if (cache != null && indirect != null) { cache.put(indirect, pattern); } @@ -297,6 +333,8 @@ public PDAbstractPattern getPattern(COSName name) throws IOException * Returns the property list resource with the given name, or null if none exists. * * @param name Name of the property list resource. + * + * @return the property list resource of the given name. */ public PDPropertyList getProperties(COSName name) { @@ -312,13 +350,13 @@ public PDPropertyList getProperties(COSName name) // get the instance PDPropertyList propertyList = null; - COSDictionary dict = (COSDictionary)get(COSName.PROPERTIES, name); - if (dict != null) + COSBase base = get(COSName.PROPERTIES, name); + if (base instanceof COSDictionary) { - propertyList = PDPropertyList.create(dict); + propertyList = PDPropertyList.create((COSDictionary) base); } - if (cache != null) + if (cache != null && indirect != null) { cache.put(indirect, propertyList); } @@ -355,7 +393,10 @@ else if (value instanceof COSObject) * Returns the XObject resource with the given name, or null if none exists. * * @param name Name of the XObject resource. - * @throws java.io.IOException if something went wrong. + * + * @return the XObject resource of the given name. + * + * @throws IOException if something went wrong. */ public PDXObject getXObject(COSName name) throws IOException { @@ -384,15 +425,43 @@ else if (value instanceof COSObject) { xobject = PDXObject.createXObject(value, this); } - - // we can't cache PDImageXObject, because it holds page resources, see PDFBOX-2370 - if (cache != null && !(xobject instanceof PDImageXObject)) + if (cache != null && indirect != null && isAllowedCache(xobject)) { cache.put(indirect, xobject); } return xobject; } + private boolean isAllowedCache(PDXObject xobject) + { + if (xobject instanceof PDImageXObject) + { + COSBase colorSpace = xobject.getCOSObject().getDictionaryObject(COSName.COLORSPACE); + if (colorSpace instanceof COSName) + { + // don't cache if it might use page resources, see PDFBOX-2370 and PDFBOX-3484 + COSName colorSpaceName = (COSName) colorSpace; + if (colorSpaceName.equals(COSName.DEVICECMYK) && hasColorSpace(COSName.DEFAULT_CMYK)) + { + return false; + } + if (colorSpaceName.equals(COSName.DEVICERGB) && hasColorSpace(COSName.DEFAULT_RGB)) + { + return false; + } + if (colorSpaceName.equals(COSName.DEVICEGRAY) && hasColorSpace(COSName.DEFAULT_GRAY)) + { + return false; + } + if (hasColorSpace(colorSpaceName)) + { + return false; + } + } + } + return true; + } + /** * Returns the resource with the given name and kind as an indirect object, or null. */ @@ -408,6 +477,7 @@ private COSObject getIndirect(COSName kind, COSName name) { return (COSObject)base; } + // not an indirect object. Resource may have been added at runtime. return null; } @@ -426,6 +496,8 @@ private COSBase get(COSName kind, COSName name) /** * Returns the names of the color space resources, if any. + * + * @return the names of all color space resources. */ public Iterable getColorSpaceNames() { @@ -434,6 +506,8 @@ public Iterable getColorSpaceNames() /** * Returns the names of the XObject resources, if any. + * + * @return the names of all XObject resources. */ public Iterable getXObjectNames() { @@ -442,6 +516,8 @@ public Iterable getXObjectNames() /** * Returns the names of the font resources, if any. + * + * @return the names of all font resources. */ public Iterable getFontNames() { @@ -450,6 +526,8 @@ public Iterable getFontNames() /** * Returns the names of the property list resources, if any. + * + * @return the names of all property list resources. */ public Iterable getPropertiesNames() { @@ -458,6 +536,8 @@ public Iterable getPropertiesNames() /** * Returns the names of the shading resources, if any. + * + * @return the names of all shading resources. */ public Iterable getShadingNames() { @@ -466,6 +546,8 @@ public Iterable getShadingNames() /** * Returns the names of the pattern resources, if any. + * + * @return the names of all pattern resources. */ public Iterable getPatternNames() { @@ -474,6 +556,8 @@ public Iterable getPatternNames() /** * Returns the names of the extended graphics state resources, if any. + * + * @return the names of all extended graphics state resources. */ public Iterable getExtGStateNames() { @@ -482,6 +566,8 @@ public Iterable getExtGStateNames() /** * Returns the resource names of the given kind. + * + * @return the names of all resources of the given kind. */ private Iterable getNames(COSName kind) { @@ -621,6 +707,20 @@ private COSName add(COSName kind, String prefix, COSObjectable object) return dict.getKeyForValue(object.getCOSObject()); } + // PDFBOX-4509: It could exist as an indirect object, happens when a font is taken from the + // AcroForm default resources of a loaded PDF. + if (dict != null && COSName.FONT.equals(kind)) + { + for (Map.Entry entry : dict.entrySet()) + { + if (entry.getValue() instanceof COSObject && + object.getCOSObject() == ((COSObject) entry.getValue()).getObject()) + { + return entry.getKey(); + } + } + } + // add the item with a new key COSName name = createKey(kind, prefix); put(kind, name, object); @@ -743,6 +843,8 @@ public void put(COSName name, PDXObject xobject) /** * Returns the resource cache associated with the Resources, or null if there is none. + * + * @return the resource cache associated with the resources. */ public ResourceCache getResourceCache() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDStructureElementNameTreeNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDStructureElementNameTreeNode.java index 427aaad6b0b..d4520e7e7df 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDStructureElementNameTreeNode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDStructureElementNameTreeNode.java @@ -35,7 +35,7 @@ public class PDStructureElementNameTreeNode extends PDNameTreeNode createChildNode( COSDictionary dic ) { return new PDStructureElementNameTreeNode(dic); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PageLayout.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PageLayout.java index b268eca779f..ddb79010b94 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PageLayout.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PageLayout.java @@ -44,25 +44,12 @@ public enum PageLayout public static PageLayout fromString(String value) { - if (value.equals("SinglePage")) + for (PageLayout instance : PageLayout.values()) { - return SINGLE_PAGE; - } - else if (value.equals("OneColumn")) - { - return ONE_COLUMN; - } - else if (value.equals("TwoColumnLeft")) - { - return TWO_COLUMN_LEFT; - } - else if (value.equals("TwoPageLeft")) - { - return TWO_PAGE_LEFT; - } - else if (value.equals("TwoPageRight")) - { - return TWO_PAGE_RIGHT; + if (instance.value.equals(value)) + { + return instance; + } } throw new IllegalArgumentException(value); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PageMode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PageMode.java index 3b0bd7fe870..5501b5d8b95 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PageMode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PageMode.java @@ -44,29 +44,12 @@ public enum PageMode public static PageMode fromString(String value) { - if (value.equals("UseNone")) + for (PageMode instance : PageMode.values()) { - return USE_NONE; - } - else if (value.equals("UseOutlines")) - { - return USE_OUTLINES; - } - else if (value.equals("UseThumbs")) - { - return USE_THUMBS; - } - else if (value.equals("FullScreen")) - { - return FULL_SCREEN; - } - else if (value.equals("UseOC")) - { - return USE_OPTIONAL_CONTENT; - } - else if (value.equals("UseAttachments")) - { - return USE_ATTACHMENTS; + if (instance.value.equals(value)) + { + return instance; + } } throw new IllegalArgumentException(value); } @@ -80,6 +63,8 @@ else if (value.equals("UseAttachments")) /** * Returns the string value, as used in a PDF file. + * + * @return the string value. */ public String stringValue() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ResourceCache.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ResourceCache.java index 85a1aee5e31..31012dec4ff 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ResourceCache.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ResourceCache.java @@ -36,72 +36,135 @@ public interface ResourceCache { /** * Returns the font resource for the given indirect object, if it is in the cache. + * + * @param indirect the indirect object + * + * @return the font resource of the given indirect object. + * @throws IOException if something went wrong. */ PDFont getFont(COSObject indirect) throws IOException; /** * Returns the color space resource for the given indirect object, if it is in the cache. + * + * @param indirect the indirect object + * + * @return the color space resource of the given indirect object. + * @throws IOException if something went wrong. */ PDColorSpace getColorSpace(COSObject indirect) throws IOException; /** - * Returns the external graphics state resource for the given indirect object, if it is in the - * cache. + * Returns the extended graphics state resource for the given indirect object, if it is in the cache. + * + * @param indirect the indirect object + * + * @return the extended graphics resource of the given indirect object. */ PDExtendedGraphicsState getExtGState(COSObject indirect); /** * Returns the shading resource for the given indirect object, if it is in the cache. + * + * @param indirect the indirect object + * + * @return the shading resource of the given indirect object. + * @throws IOException if something went wrong. */ PDShading getShading(COSObject indirect) throws IOException; /** - *Returns the pattern resource for the given indirect object, if it is in the cache. + * Returns the pattern resource for the given indirect object, if it is in the cache. + * + * @param indirect the indirect object + * + * @return the pattern resource of the given indirect object. + * @throws IOException if something went wrong. */ PDAbstractPattern getPattern(COSObject indirect) throws IOException; /** * Returns the property list resource for the given indirect object, if it is in the cache. + * + * @param indirect the indirect object + * + * @return the property list resource of the given indirect object. */ PDPropertyList getProperties(COSObject indirect); /** * Returns the XObject resource for the given indirect object, if it is in the cache. + * + * @param indirect the indirect object + * + * @return the XObject resource of the given indirect object. + * @throws IOException if something went wrong. */ PDXObject getXObject(COSObject indirect) throws IOException; /** * Puts the given indirect font resource in the cache. + * + * @param indirect the indirect object of the resource. + * @param font the font resource. + * + * @throws IOException if something went wrong. */ void put(COSObject indirect, PDFont font) throws IOException; /** * Puts the given indirect color space resource in the cache. + * + * @param indirect the indirect object of the resource. + * @param colorSpace the color space resource. + * + * @throws IOException if something went wrong. */ void put(COSObject indirect, PDColorSpace colorSpace) throws IOException; /** * Puts the given indirect extended graphics state resource in the cache. + * + * @param indirect the indirect object of the resource. + * @param extGState the extended graphics state resource. */ void put(COSObject indirect, PDExtendedGraphicsState extGState); /** * Puts the given indirect shading resource in the cache. + * + * @param indirect the indirect object of the resource. + * @param shading the shading resource. + * + * @throws IOException if something went wrong. */ void put(COSObject indirect, PDShading shading) throws IOException; /** * Puts the given indirect pattern resource in the cache. + * + * @param indirect the indirect object of the resource. + * @param pattern the pattern resource. + * + * @throws IOException if something went wrong. */ void put(COSObject indirect, PDAbstractPattern pattern) throws IOException; /** * Puts the given indirect property list resource in the cache. + * + * @param indirect the indirect object of the resource. + * @param propertyList the property list resource. */ void put(COSObject indirect, PDPropertyList propertyList); /** * Puts the given indirect XObject resource in the cache. + * + * @param indirect the indirect object of the resource. + * @param xobject the XObject resource. + * + * @throws IOException if something went wrong. */ void put(COSObject indirect, PDXObject xobject) throws IOException; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/COSArrayList.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/COSArrayList.java index 73c1aa5785c..4c9195069bf 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/COSArrayList.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/COSArrayList.java @@ -43,6 +43,10 @@ public class COSArrayList implements List private final COSArray array; private final List actual; + // indicates that the list has been filtered + // i.e. the number of entries in array and actual differ + private boolean isFiltered = false; + private COSDictionary parentDict; private COSName dictKey; @@ -56,7 +60,15 @@ public COSArrayList() } /** - * Constructor. + * Create the COSArrayList specifying the List and the backing COSArray. + * + *

User of this constructor need to ensure that the entries in the List and + * the backing COSArray are matching i.e. the COSObject of the List entry is + * included in the COSArray. + * + *

If the number of entries in the List and the COSArray differ + * it is assumed that the List has been filtered. In that case the COSArrayList + * shall only be used for reading purposes and no longer for updating. * * @param actualList The list of standard java objects * @param cosArray The COS array object to sync to. @@ -65,8 +77,30 @@ public COSArrayList( List actualList, COSArray cosArray ) { actual = actualList; array = cosArray; + + // if the number of entries differs this may come from a filter being + // applied at the PDModel level + if (actual.size() != array.size()) { + isFiltered = true; + } } + /** + * This constructor is to be used if the array doesn't exist, but is to be created and added to + * the parent dictionary as soon as the first element is added to the array. + * + * @param dictionary The dictionary that holds the item, and will hold the array if an item is + * added. + * @param dictionaryKey The key into the dictionary to set the item. + */ + public COSArrayList(COSDictionary dictionary, COSName dictionaryKey) + { + array = new COSArray(); + actual = new ArrayList(); + parentDict = dictionary; + dictKey = dictionaryKey; + } + /** * This is a really special constructor. Sometimes the PDF spec says * that a dictionary entry can either be a single item or an array of those @@ -184,6 +218,11 @@ public boolean add(E o) @Override public boolean remove(Object o) { + + if (isFiltered) { + throw new UnsupportedOperationException("removing entries from a filtered List is not permitted"); + } + boolean retval = true; int index = actual.indexOf( o ); if( index >= 0 ) @@ -213,6 +252,10 @@ public boolean containsAll(Collection c) @Override public boolean addAll(Collection c) { + if (isFiltered) { + throw new UnsupportedOperationException("Adding to a filtered List is not permitted"); + } + //when adding if there is a parentDict then change the item //in the dictionary from a single item to an array. if( parentDict != null && c.size() > 0) @@ -232,6 +275,11 @@ public boolean addAll(Collection c) @Override public boolean addAll(int index, Collection c) { + + if (isFiltered) { + throw new UnsupportedOperationException("Inserting to a filtered List is not permitted"); + } + //when adding if there is a parentDict then change the item //in the dictionary from a single item to an array. if( parentDict != null && c.size() > 0) @@ -291,10 +339,18 @@ public static List convertFloatCOSArrayToList( COSArray floatArray ) List retval = null; if( floatArray != null ) { - List numbers = new ArrayList(); + List numbers = new ArrayList(floatArray.size()); for( int i=0; i( numbers, floatArray ); } @@ -405,10 +461,8 @@ public static COSArray converterToCOSArray( List cosObjectableList ) else { array = new COSArray(); - Iterator iter = cosObjectableList.iterator(); - while( iter.hasNext() ) + for (Object next : cosObjectableList) { - Object next = iter.next(); if( next instanceof String ) { array.add( new COSString( (String)next ) ); @@ -443,11 +497,9 @@ else if( next == null ) private List toCOSObjectList( Collection list ) { - List cosObjects = new ArrayList(); - Iterator iter = list.iterator(); - while( iter.hasNext() ) + List cosObjects = new ArrayList(list.size()); + for (Object next : list) { - Object next = iter.next(); if( next instanceof String ) { cosObjects.add( new COSString( (String)next ) ); @@ -467,7 +519,19 @@ private List toCOSObjectList( Collection list ) @Override public boolean removeAll(Collection c) { - array.removeAll( toCOSObjectList( c ) ); + for (Iterator iterator = c.iterator(); iterator.hasNext();) + { + COSBase itemCOSBase = ((COSObjectable)iterator.next()).getCOSObject(); + // remove all indirect objects too by dereferencing them + // before doing the comparison + for (int i=array.size()-1; i>=0; i--) + { + if (itemCOSBase.equals(array.getObject(i))) + { + array.remove(i); + } + } + } return actual.removeAll( c ); } @@ -477,7 +541,19 @@ public boolean removeAll(Collection c) @Override public boolean retainAll(Collection c) { - array.retainAll( toCOSObjectList( c ) ); + for (Iterator iterator = c.iterator(); iterator.hasNext();) + { + COSBase itemCOSBase = ((COSObjectable)iterator.next()).getCOSObject(); + // remove all indirect objects too by dereferencing them + // before doing the comparison + for (int i=array.size()-1; i>=0; i--) + { + if (!itemCOSBase.equals(array.getObject(i))) + { + array.remove(i); + } + } + } return actual.retainAll( c ); } @@ -531,6 +607,11 @@ public E get(int index) @Override public E set(int index, E element) { + + if (isFiltered) { + throw new UnsupportedOperationException("Replacing an element in a filtered List is not permitted"); + } + if( element instanceof String ) { COSString item = new COSString( (String)element ); @@ -557,6 +638,10 @@ public E set(int index, E element) @Override public void add(int index, E element) { + if (isFiltered) { + throw new UnsupportedOperationException("Adding an element in a filtered List is not permitted"); + } + //when adding if there is a parentDict then change the item //in the dictionary from a single item to an array. if( parentDict != null ) @@ -583,8 +668,12 @@ public void add(int index, E element) @Override public E remove(int index) { - array.remove( index ); - return actual.remove( index ); + if (isFiltered) { + throw new UnsupportedOperationException("removing entries from a filtered List is not permitted"); + } + + array.remove(index); + return actual.remove(index); } /** @@ -602,8 +691,7 @@ public int indexOf(Object o) @Override public int lastIndexOf(Object o) { - return actual.indexOf( o ); - + return actual.lastIndexOf( o ); } /** @@ -643,10 +731,22 @@ public String toString() } /** - * This will return then underlying COSArray. + * This will return the underlying COSArray. + * + * @return the COSArray + */ + public COSArray getCOSArray() + { + return array; + } + + /** + * This will return the underlying COSArray. * + * @deprecated use {@link #getCOSArray()} instead. * @return the COSArray */ + @Deprecated public COSArray toList() { return array; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/COSDictionaryMap.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/COSDictionaryMap.java index 643110d985e..32ca4c20068 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/COSDictionaryMap.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/COSDictionaryMap.java @@ -78,7 +78,7 @@ public boolean isEmpty() @Override public boolean containsKey(Object key) { - return map.keySet().contains( key ); + return actuals.containsKey( key ); } /** @@ -127,7 +127,7 @@ public V remove(Object key) @Override public void putAll(Map t) { - throw new RuntimeException( "Not yet implemented" ); + throw new UnsupportedOperationException("Not yet implemented"); } /** @@ -176,7 +176,7 @@ public boolean equals(Object o) boolean retval = false; if( o instanceof COSDictionaryMap ) { - COSDictionaryMap other = (COSDictionaryMap)o; + COSDictionaryMap other = (COSDictionaryMap) o; retval = other.map.equals( this.map ); } return retval; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDNameTreeNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDNameTreeNode.java index 0d463f30927..6289e523be5 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDNameTreeNode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDNameTreeNode.java @@ -36,13 +36,15 @@ * This class represents a node in a name tree. * * @author Ben Litchfield + * + * @param The type of the values in this name tree. */ public abstract class PDNameTreeNode implements COSObjectable { private static final Log LOG = LogFactory.getLog(PDNameTreeNode.class); private final COSDictionary node; - private PDNameTreeNode parent; + private PDNameTreeNode parent; /** * Constructor. @@ -78,7 +80,7 @@ public COSDictionary getCOSObject() * * @return parent node */ - public PDNameTreeNode getParent() + public PDNameTreeNode getParent() { return parent; } @@ -88,7 +90,7 @@ public PDNameTreeNode getParent() * * @param parentNode the node to be set as parent */ - public void setParent(PDNameTreeNode parentNode) + public void setParent(PDNameTreeNode parentNode) { parent = parentNode; calculateLimits(); @@ -112,7 +114,7 @@ public boolean isRootNode() public List> getKids() { List> retval = null; - COSArray kids = (COSArray)node.getDictionaryObject( COSName.KIDS ); + COSArray kids = node.getCOSArray(COSName.KIDS); if( kids != null ) { List> pdObjects = new ArrayList>(); @@ -129,13 +131,15 @@ public List> getKids() /** * Set the children of this named tree. * - * @param kids The children of this named tree. + * @param kids The children of this named tree. These have to be in sorted order. Because of + * that, it is usually easier to call {@link setNames} with a map and pass a single element list + * here. */ public void setKids( List> kids ) { - if (kids != null && kids.size() > 0) + if (kids != null && !kids.isEmpty()) { - for (PDNameTreeNode kidsNode : kids) + for (PDNameTreeNode kidsNode : kids) { kidsNode.setParent(this); } @@ -165,10 +169,10 @@ private void calculateLimits() else { List> kids = getKids(); - if (kids != null && kids.size() > 0) + if (kids != null && !kids.isEmpty()) { - PDNameTreeNode firstKid = kids.get(0); - PDNameTreeNode lastKid = kids.get(kids.size() - 1); + PDNameTreeNode firstKid = kids.get(0); + PDNameTreeNode lastKid = kids.get(kids.size() - 1); String lowerLimit = firstKid.getLowerLimit(); setLowerLimit(lowerLimit); String upperLimit = lastKid.getUpperLimit(); @@ -225,8 +229,11 @@ public T getValue( String name ) throws IOException for( int i=0; i childNode = kids.get( i ); - if( childNode.getLowerLimit().compareTo( name ) <= 0 && - childNode.getUpperLimit().compareTo( name ) >= 0 ) + String upperLimit = childNode.getUpperLimit(); + String lowerLimit = childNode.getLowerLimit(); + if (upperLimit == null || lowerLimit == null || + upperLimit.compareTo(lowerLimit) < 0 || + (lowerLimit.compareTo(name) <= 0 && upperLimit.compareTo(name) >= 0)) { retval = childNode.getValue( name ); } @@ -241,22 +248,33 @@ public T getValue( String name ) throws IOException } /** - * This will return a map of names. The key will be a string, and the - * value will depend on where this class is being used. + * This will return a map of names on this level. The key will be a string, + * and the value will depend on where this class is being used. + * + * @return ordered map of COS objects or null if the dictionary + * contains no 'Names' entry on this level. * - * @return ordered map of cos objects or null if dictionary - * contains no 'Names' entry * @throws IOException If there is an error while creating the sub types. + * @see #getKids() */ public Map getNames() throws IOException { - COSArray namesArray = (COSArray)node.getDictionaryObject( COSName.NAMES ); + COSArray namesArray = node.getCOSArray(COSName.NAMES); if( namesArray != null ) { Map names = new LinkedHashMap(); - for( int i=0; i getNames() throws IOException protected abstract PDNameTreeNode createChildNode( COSDictionary dic ); /** - * Set the names of for this node. The keys should be java.lang.String and the - * values must be a COSObjectable. This method will set the appropriate upper and lower - * limits based on the keys in the map. + * Set the names for this node. This method will set the appropriate upper and lower limits + * based on the keys in the map and take care of the ordering. * - * @param names map of names to objects, or null + * @param names map of names to objects, or null for nothing. */ public void setNames( Map names ) { @@ -324,7 +341,7 @@ public void setNames( Map names ) public String getUpperLimit() { String retval = null; - COSArray arr = (COSArray)node.getDictionaryObject( COSName.LIMITS ); + COSArray arr = node.getCOSArray(COSName.LIMITS); if( arr != null ) { retval = arr.getString( 1 ); @@ -339,7 +356,7 @@ public String getUpperLimit() */ private void setUpperLimit( String upper ) { - COSArray arr = (COSArray)node.getDictionaryObject( COSName.LIMITS ); + COSArray arr = node.getCOSArray(COSName.LIMITS); if( arr == null ) { arr = new COSArray(); @@ -358,7 +375,7 @@ private void setUpperLimit( String upper ) public String getLowerLimit() { String retval = null; - COSArray arr = (COSArray)node.getDictionaryObject( COSName.LIMITS ); + COSArray arr = node.getCOSArray(COSName.LIMITS); if( arr != null ) { retval = arr.getString( 0 ); @@ -373,7 +390,7 @@ public String getLowerLimit() */ private void setLowerLimit( String lower ) { - COSArray arr = (COSArray)node.getDictionaryObject( COSName.LIMITS ); + COSArray arr = node.getCOSArray(COSName.LIMITS); if( arr == null ) { arr = new COSArray(); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDNumberTreeNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDNumberTreeNode.java index 4cf0e6d2325..db438ab6241 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDNumberTreeNode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDNumberTreeNode.java @@ -17,7 +17,6 @@ package org.apache.pdfbox.pdmodel.common; import java.io.IOException; -import java.lang.reflect.Constructor; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -31,6 +30,7 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNull; /** * This class represents a PDF Number tree. See the PDF Reference 1.7 section @@ -60,7 +60,7 @@ public PDNumberTreeNode( Class valueClass ) /** * Constructor. * - * @param dict The dictionary that holds the name information. + * @param dict The dictionary that holds the number information. * @param valueClass The PD Model type of object that is the value. */ public PDNumberTreeNode( COSDictionary dict, Class valueClass ) @@ -105,11 +105,13 @@ public List getKids() /** * Set the children of this number tree. * - * @param kids The children of this number tree. + * @param kids The children of this number tree. These have to be in sorted order. Because of + * that, it is usually easier to call {@link setNumbers} with a map and pass a single element + * list here. */ public void setKids( List kids ) { - if (kids != null && kids.size() > 0) + if (kids != null && !kids.isEmpty()) { PDNumberTreeNode firstKid = kids.get(0); PDNumberTreeNode lastKid = kids.get(kids.size() - 1); @@ -135,38 +137,34 @@ else if ( node.getDictionaryObject( COSName.NUMS ) == null ) * * @throws IOException If there is a problem creating the values. */ - public Object getValue( Integer index ) throws IOException + public Object getValue(Integer index) throws IOException { - Object retval = null; - Map names = getNumbers(); - if( names != null ) + Map numbers = getNumbers(); + if (numbers != null) { - retval = names.get( index ); + return numbers.get(index); } - else + Object retval = null; + List kids = getKids(); + if (kids != null) { - List kids = getKids(); - if ( kids != null ) + for (int i = 0; i < kids.size() && retval == null; i++) { - for( int i=0; i= 0) { - PDNumberTreeNode childNode = kids.get( i ); - if( childNode.getLowerLimit().compareTo( index ) <= 0 && - childNode.getUpperLimit().compareTo( index ) >= 0 ) - { - retval = childNode.getValue( index ); - } + retval = childNode.getValue(index); } } - else - { - LOG.warn("NumberTreeNode does not have \"nums\" nor \"kids\" objects."); - } + } + else + { + LOG.warn("NumberTreeNode does not have \"nums\" nor \"kids\" objects."); } return retval; } - /** * This will return a map of numbers. The key will be a java.lang.Integer, the value will * depend on where this class is being used. @@ -178,16 +176,26 @@ public Object getValue( Integer index ) throws IOException public Map getNumbers() throws IOException { Map indices = null; - COSArray namesArray = (COSArray)node.getDictionaryObject( COSName.NUMS ); - if( namesArray != null ) + COSBase numBase = node.getDictionaryObject(COSName.NUMS); + if (numBase instanceof COSArray) { - indices = new HashMap(); - for( int i=0; i(); + if (numbersArray.size() % 2 != 0) { - COSInteger key = (COSInteger)namesArray.getObject(i); - COSBase cosValue = namesArray.getObject( i+1 ); - COSObjectable pdValue = convertCOSToPD( cosValue ); - indices.put( key.intValue(), pdValue ); + LOG.warn("Numbers array has odd size: " + numbersArray.size()); + } + for (int i = 0; i + 1 < numbersArray.size(); i += 2) + { + COSBase base = numbersArray.getObject(i); + if (!(base instanceof COSInteger)) + { + LOG.error("page labels ignored, index " + i + " should be a number, but is " + base); + return null; + } + COSInteger key = (COSInteger) base; + COSBase cosValue = numbersArray.getObject(i + 1); + indices.put(key.intValue(), cosValue == null ? null : convertCOSToPD(cosValue)); } indices = Collections.unmodifiableMap(indices); } @@ -195,27 +203,25 @@ public Map getNumbers() throws IOException } /** - * Method to convert the COS value in the name tree to the PD Model object. The - * default implementation will simply use reflection to create the correct object - * type. Subclasses can do whatever they want. + * Method to convert the COS value in the number tree to the PD Model object. The default + * implementation will simply use reflection to create the correct object type. Subclasses can + * do whatever they want. * * @param base The COS object to convert. * @return The converted PD Model object. * @throws IOException If there is an error during creation. */ - protected COSObjectable convertCOSToPD( COSBase base ) throws IOException + protected COSObjectable convertCOSToPD(COSBase base) throws IOException { - COSObjectable retval = null; + // valueType (passed in constructor here) must have a constructor of type of COSBase as parameter try { - Constructor ctor = valueType.getConstructor( new Class[] { base.getClass() } ); - retval = ctor.newInstance( new Object[] { base } ); + return valueType.getDeclaredConstructor(base.getClass()).newInstance(base); } catch( Throwable t ) { - throw new IOException( "Error while trying to create value in number tree:" + t.getMessage(), t); + throw new IOException("Error while trying to create value in number tree:" + t.getMessage(), t); } - return retval; } /** @@ -230,11 +236,10 @@ protected PDNumberTreeNode createChildNode( COSDictionary dic ) } /** - * Set the names of for this node. The keys should be java.lang.String and the - * values must be a COSObjectable. This method will set the appropriate upper and lower - * limits based on the keys in the map. + * Set the numbers for this node. This method will set the appropriate upper and lower limits + * based on the keys in the map and take care of the ordering. * - * @param numbers The map of names to objects. + * @param numbers The map of numbers to objects, or null for nothing. */ public void setNumbers( Map numbers ) { @@ -252,11 +257,11 @@ public void setNumbers( Map numbers ) { array.add( COSInteger.get( key ) ); COSObjectable obj = numbers.get( key ); - array.add( obj ); + array.add(obj == null ? COSNull.NULL : obj); } Integer lower = null; Integer upper = null; - if( keys.size() > 0 ) + if (!keys.isEmpty()) { lower = keys.get( 0 ); upper = keys.get( keys.size()-1 ); @@ -268,7 +273,7 @@ public void setNumbers( Map numbers ) } /** - * Get the highest value for a key in the name map. + * Get the highest value for a key in the number map. * * @return The highest value for a key in the map. */ @@ -309,7 +314,7 @@ private void setUpperLimit( Integer upper ) } /** - * Get the lowest value for a key in the name map. + * Get the lowest value for a key in the number map. * * @return The lowest value for a key in the map. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDPageLabels.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDPageLabels.java index 7844250e5b1..6d4a68fbfb2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDPageLabels.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDPageLabels.java @@ -24,6 +24,8 @@ import java.util.NoSuchElementException; import java.util.TreeMap; import java.util.Map.Entry; +import java.util.NavigableSet; +import java.util.TreeSet; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; @@ -91,28 +93,31 @@ public PDPageLabels(PDDocument document, COSDictionary dict) throws IOException { return; } - PDNumberTreeNode root = new PDNumberTreeNode(dict, COSDictionary.class); + PDNumberTreeNode root = new PDNumberTreeNode(dict, PDPageLabelRange.class); findLabels(root); } private void findLabels(PDNumberTreeNode node) throws IOException { + List kids = node.getKids(); if (node.getKids() != null) { - List kids = node.getKids(); for (PDNumberTreeNode kid : kids) { findLabels(kid); } } - else if (node.getNumbers() != null) + else { - Map numbers = node.getNumbers(); - for (Entry i : numbers.entrySet()) + Map numbers = node.getNumbers(); + if (numbers != null) { - if(i.getKey() >= 0) + for (Entry i : numbers.entrySet()) { - labels.put(i.getKey(), new PDPageLabelRange((COSDictionary)i.getValue())); + if (i.getKey() >= 0) + { + labels.put(i.getKey(), (PDPageLabelRange) i.getValue()); + } } } } @@ -224,14 +229,15 @@ public void newLabel(int pageIndex, String label) */ public String[] getLabelsByPageIndices() { - final String[] map = new String[doc.getNumberOfPages()]; + final int numberOfPages = doc.getNumberOfPages(); + final String[] map = new String[numberOfPages]; computeLabels(new LabelHandler() { @Override public void newLabel(int pageIndex, String label) { - if(pageIndex < doc.getNumberOfPages()) - { + if (pageIndex < numberOfPages) + { map[pageIndex] = label; } } @@ -239,6 +245,16 @@ public void newLabel(int pageIndex, String label) return map; } + /** + * Get an ordered set of page indices having a page label range. + * + * @return set of page indices. + */ + public NavigableSet getPageIndices() + { + return new TreeSet(labels.keySet()); + } + /** * Internal interface for the control flow support. * @@ -314,21 +330,22 @@ public String next() throw new NoSuchElementException(); } StringBuilder buf = new StringBuilder(); - if (labelInfo.getPrefix() != null) + String label = labelInfo.getPrefix(); + if (label != null) { - String label = labelInfo.getPrefix(); // there may be some labels with some null bytes at the end // which will lead to an incomplete output, see PDFBOX-1047 - while (label.lastIndexOf(0) != -1) + int index = label.indexOf(0); + if (index > -1) { - label = label.substring(0, label.length()-1); + label = label.substring(0, index); } buf.append(label); } - if (labelInfo.getStyle() != null) + String style = labelInfo.getStyle(); + if (style != null) { - buf.append(getNumber(labelInfo.getStart() + currentPage, - labelInfo.getStyle())); + buf.append(getNumber(labelInfo.getStart() + currentPage, style)); } currentPage++; return buf.toString(); @@ -379,7 +396,7 @@ private static String makeRomanLabel(int pageIndex) while (power < 3 && pageIndex > 0) { buf.insert(0, ROMANS[power][pageIndex % 10]); - pageIndex = pageIndex / 10; + pageIndex /= 10; power++; } // Prepend as many m as there are thousands (which is @@ -397,14 +414,14 @@ private static String makeRomanLabel(int pageIndex) } /** - * A..Z, AA..ZZ, AAA..ZZZ ... labeling as described in PDF32000-1:2008, + * a..z, aa..zz, aaa..zzz ... labeling as described in PDF32000-1:2008, * Table 159, Page 375. */ private static String makeLetterLabel(int num) { StringBuilder buf = new StringBuilder(); int numLetters = num / 26 + Integer.signum(num % 26); - int letter = num % 26 + 26 * (1 - Integer.signum(num % 26)) + 64; + int letter = num % 26 + 26 * (1 - Integer.signum(num % 26)) + 'a' - 1; for (int i = 0; i < numLetters; i++) { buf.appendCodePoint(letter); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDRectangle.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDRectangle.java index d0ee11bc5cb..ba6b9cf8c2e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDRectangle.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDRectangle.java @@ -18,6 +18,7 @@ import java.awt.geom.GeneralPath; import java.awt.geom.Point2D; +import java.util.Arrays; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSFloat; @@ -34,8 +35,11 @@ */ public class PDRectangle implements COSObjectable { + /** user space units per inch */ private static final float POINTS_PER_INCH = 72; - private static final float MM_PER_INCH = 1 / (10 * 2.54f) * POINTS_PER_INCH; + + /** user space units per millimeter */ + private static final float POINTS_PER_MM = 1 / (10 * 2.54f) * POINTS_PER_INCH; /** A rectangle the size of U.S. Letter, 8.5" x 11". */ public static final PDRectangle LETTER = new PDRectangle(8.5f * POINTS_PER_INCH, @@ -44,25 +48,25 @@ public class PDRectangle implements COSObjectable public static final PDRectangle LEGAL = new PDRectangle(8.5f * POINTS_PER_INCH, 14f * POINTS_PER_INCH); /** A rectangle the size of A0 Paper. */ - public static final PDRectangle A0 = new PDRectangle(841 * MM_PER_INCH, 1189 * MM_PER_INCH); + public static final PDRectangle A0 = new PDRectangle(841 * POINTS_PER_MM, 1189 * POINTS_PER_MM); /** A rectangle the size of A1 Paper. */ - public static final PDRectangle A1 = new PDRectangle(594 * MM_PER_INCH, 841 * MM_PER_INCH); + public static final PDRectangle A1 = new PDRectangle(594 * POINTS_PER_MM, 841 * POINTS_PER_MM); /** A rectangle the size of A2 Paper. */ - public static final PDRectangle A2 = new PDRectangle(420 * MM_PER_INCH, 594 * MM_PER_INCH); + public static final PDRectangle A2 = new PDRectangle(420 * POINTS_PER_MM, 594 * POINTS_PER_MM); /** A rectangle the size of A3 Paper. */ - public static final PDRectangle A3 = new PDRectangle(297 * MM_PER_INCH, 420 * MM_PER_INCH); + public static final PDRectangle A3 = new PDRectangle(297 * POINTS_PER_MM, 420 * POINTS_PER_MM); /** A rectangle the size of A4 Paper. */ - public static final PDRectangle A4 = new PDRectangle(210 * MM_PER_INCH, 297 * MM_PER_INCH); + public static final PDRectangle A4 = new PDRectangle(210 * POINTS_PER_MM, 297 * POINTS_PER_MM); /** A rectangle the size of A5 Paper. */ - public static final PDRectangle A5 = new PDRectangle(148 * MM_PER_INCH, 210 * MM_PER_INCH); + public static final PDRectangle A5 = new PDRectangle(148 * POINTS_PER_MM, 210 * POINTS_PER_MM); /** A rectangle the size of A6 Paper. */ - public static final PDRectangle A6 = new PDRectangle(105 * MM_PER_INCH, 148 * MM_PER_INCH); + public static final PDRectangle A6 = new PDRectangle(105 * POINTS_PER_MM, 148 * POINTS_PER_MM); private final COSArray rectArray; @@ -125,7 +129,7 @@ public PDRectangle( BoundingBox box ) */ public PDRectangle( COSArray array ) { - float[] values = array.toFloatArray(); + float[] values = Arrays.copyOf(array.toFloatArray(), 4); rectArray = new COSArray(); // we have to start with the lower left corner rectArray.add( new COSFloat( Math.min(values[0],values[2] )) ); @@ -153,8 +157,8 @@ public boolean contains( float x, float y ) /** * This will create a translated rectangle based off of this rectangle, such * that the new rectangle retains the same dimensions(height/width), but the - * lower left x,y values are zero.
- * 100, 100, 400, 400 (llx, lly, urx, ury )
+ * lower left x,y values are zero.
+ * 100, 100, 400, 400 (llx, lly, urx, ury )
* will be translated to 0,0,300,300 * * @return A new rectangle that has been translated back to the origin. @@ -280,8 +284,12 @@ public float getHeight() } /** - * Returns a path which represents this rectangle having been transformed by the given matrix. - * Note that the resulting path need not be rectangular. + * Returns a path which represents this rectangle having been transformed by the given matrix. Note that the + * resulting path need not be rectangular. + * + * @param matrix the matrix to be used for the transformation. + * + * @return the resulting path. */ public GeneralPath transform(Matrix matrix) { @@ -316,8 +324,10 @@ public COSBase getCOSObject() } /** - * Returns a general path equivalent to this rectangle. This method avoids the problems - * caused by Rectangle2D not working well with -ve rectangles. + * Returns a general path equivalent to this rectangle. This method avoids the problems caused by Rectangle2D not + * working well with -ve rectangles. + * + * @return the general path. */ public GeneralPath toGeneralPath() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDStream.java index 9fe7687e3c9..56d988f348f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/PDStream.java @@ -24,6 +24,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; @@ -32,6 +35,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.filter.DecodeOptions; import org.apache.pdfbox.filter.Filter; import org.apache.pdfbox.filter.FilterFactory; import org.apache.pdfbox.io.IOUtils; @@ -45,10 +49,12 @@ */ public class PDStream implements COSObjectable { + private static final Log LOG = LogFactory.getLog(PDStream.class); + private final COSStream stream; /** - * Creates a new PDStream object. + * Creates a new empty PDStream object. * * @param document The document that the stream will be part of. */ @@ -58,7 +64,7 @@ public PDStream(PDDocument document) } /** - * Creates a new PDStream object. + * Creates a new empty PDStream object. * * @param document The document that the stream will be part of. */ @@ -137,10 +143,7 @@ private PDStream(PDDocument doc, InputStream input, COSBase filters) throws IOEx { output.close(); } - if (input != null) - { - input.close(); - } + input.close(); } } @@ -182,7 +185,7 @@ public void addCompression() } else { - filters = new ArrayList(); + filters = new ArrayList(1); filters.add(COSName.FLATE_DECODE); setFilters(filters); } @@ -235,6 +238,11 @@ public COSInputStream createInputStream() throws IOException return stream.createInputStream(); } + public COSInputStream createInputStream(DecodeOptions options) throws IOException + { + return stream.createInputStream(options); + } + /** * This will get a stream with some filters applied but not others. This is * useful when doing images, ie filters = [flate,dct], we want to remove @@ -261,8 +269,14 @@ public InputStream createInputStream(List stopFilters) throws IOExceptio else { Filter filter = FilterFactory.INSTANCE.getFilter(nextFilter); - filter.decode(is, os, stream, i); - IOUtils.closeQuietly(is); + try + { + filter.decode(is, os, stream, i); + } + finally + { + IOUtils.closeQuietly(is); + } is = new ByteArrayInputStream(os.toByteArray()); os.reset(); } @@ -337,42 +351,57 @@ public void setFilters(List filters) */ public List getDecodeParms() throws IOException { - List retval = null; + // See PDF Ref 1.5 implementation note 7, the DP is sometimes used instead. + return internalGetDecodeParams(COSName.DECODE_PARMS, COSName.DP); + } - COSBase dp = stream.getDictionaryObject(COSName.DECODE_PARMS); - if (dp == null) - { - // See PDF Ref 1.5 implementation note 7, the DP is sometimes used - // instead. - dp = stream.getDictionaryObject(COSName.DP); - } + /** + * Get the list of decode parameters. Each entry in the list will refer to + * an entry in the filters list. + * + * @return The list of decode parameters. + * @throws IOException if there is an error retrieving the parameters. + */ + public List getFileDecodeParams() throws IOException + { + return internalGetDecodeParams(COSName.F_DECODE_PARMS, null); + } + + private List internalGetDecodeParams(COSName name1, COSName name2) throws IOException + { + COSBase dp = stream.getDictionaryObject(name1, name2); if (dp instanceof COSDictionary) { - Map map = COSDictionaryMap - .convertBasicTypesToMap((COSDictionary) dp); - retval = new COSArrayList(map, dp, stream, - COSName.DECODE_PARMS); - } - else if (dp instanceof COSArray) + Map map = COSDictionaryMap.convertBasicTypesToMap((COSDictionary) dp); + return new COSArrayList(map, dp, stream, name1); + } + + if (dp instanceof COSArray) { COSArray array = (COSArray) dp; - List actuals = new ArrayList(); + List actuals = new ArrayList(array.size()); for (int i = 0; i < array.size(); i++) { - actuals.add(COSDictionaryMap - .convertBasicTypesToMap((COSDictionary) array - .getObject(i))); + COSBase base = array.getObject(i); + if (base instanceof COSDictionary) + { + actuals.add(COSDictionaryMap.convertBasicTypesToMap((COSDictionary) base)); + } + else + { + LOG.warn("Expected COSDictionary, got " + base + ", ignored"); + } } - retval = new COSArrayList(actuals, array); + return new COSArrayList(actuals, array); } - return retval; + return null; } /** - * This will set the list of decode parameterss. + * This will set the list of decode parameters. * - * @param decodeParams The list of decode parameterss. + * @param decodeParams The list of decode parameters. */ public void setDecodeParms(List decodeParams) { @@ -438,41 +467,6 @@ public void setFileFilters(List filters) stream.setItem(COSName.F_FILTER, obj); } - /** - * Get the list of decode parameters. Each entry in the list will refer to - * an entry in the filters list. - * - * @return The list of decode parameters. - * @throws IOException if there is an error retrieving the parameters. - */ - public List getFileDecodeParams() throws IOException - { - List retval = null; - - COSBase dp = stream.getDictionaryObject(COSName.F_DECODE_PARMS); - if (dp instanceof COSDictionary) - { - Map map = COSDictionaryMap - .convertBasicTypesToMap((COSDictionary) dp); - retval = new COSArrayList(map, dp, stream, - COSName.F_DECODE_PARMS); - } - else if (dp instanceof COSArray) - { - COSArray array = (COSArray) dp; - List actuals = new ArrayList(); - for (int i = 0; i < array.size(); i++) - { - actuals.add(COSDictionaryMap - .convertBasicTypesToMap((COSDictionary) array - .getObject(i))); - } - retval = new COSArrayList(actuals, array); - } - - return retval; - } - /** * This will set the list of decode params. * @@ -480,8 +474,7 @@ else if (dp instanceof COSArray) */ public void setFileDecodeParams(List decodeParams) { - stream.setItem("FDecodeParams", - COSArrayList.converterToCOSArray(decodeParams)); + stream.setItem(COSName.F_DECODE_PARMS, COSArrayList.converterToCOSArray(decodeParams)); } /** @@ -492,17 +485,11 @@ public void setFileDecodeParams(List decodeParams) */ public byte[] toByteArray() throws IOException { - ByteArrayOutputStream output = new ByteArrayOutputStream(); - byte[] buf = new byte[1024]; InputStream is = null; try { is = createInputStream(); - int amountRead; - while ((amountRead = is.read(buf)) != -1) - { - output.write(buf, 0, amountRead); - } + return IOUtils.toByteArray(is); } finally { @@ -511,7 +498,6 @@ public byte[] toByteArray() throws IOException is.close(); } } - return output.toByteArray(); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDComplexFileSpecification.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDComplexFileSpecification.java index a3e16466340..a9f4842349a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDComplexFileSpecification.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDComplexFileSpecification.java @@ -74,7 +74,7 @@ private COSDictionary getEFDictionary() { if (efDictionary == null && fs != null) { - efDictionary = (COSDictionary)fs.getDictionaryObject( COSName.EF ); + efDictionary = fs.getCOSDictionary(COSName.EF); } return efDictionary; } @@ -131,7 +131,9 @@ public String getFileUnicode() } /** - * This will set unicode file name. + * This will set the unicode file name. If you call this, then do not forget to also call + * {@link #setFile(java.lang.String) setFile(String)} or the attachment will not be visible on + * some viewers. * * @param file The name of the file. */ @@ -152,7 +154,9 @@ public String getFile() } /** - * This will set the file name. + * This will set the file name. You should also call + * {@link #setFileUnicode(java.lang.String) setFileUnicode(String)} for cross-platform and + * cross-language compatibility. * * @param file The name of the file. */ @@ -176,7 +180,9 @@ public String getFileDos() * This will set name representing a dos file. * * @param file The name of the file. + * @deprecated This method is obsolescent and should not be used by conforming writers. */ + @Deprecated public void setFileDos( String file ) { fs.setString( COSName.DOS, file ); @@ -196,7 +202,9 @@ public String getFileMac() * This will set name representing a Mac file. * * @param file The name of the file. + * @deprecated This method is obsolescent and should not be used by conforming writers. */ + @Deprecated public void setFileMac( String file ) { fs.setString( COSName.MAC, file ); @@ -216,7 +224,9 @@ public String getFileUnix() * This will set name representing a Unix file. * * @param file The name of the file. + * @deprecated This method is obsolescent and should not be used by conforming writers. */ + @Deprecated public void setFileUnix( String file ) { fs.setString( COSName.UNIX, file ); @@ -250,17 +260,18 @@ public boolean isVolatile() */ public PDEmbeddedFile getEmbeddedFile() { - PDEmbeddedFile file = null; - COSStream stream = (COSStream)getObjectFromEFDictionary(COSName.F); - if( stream != null ) + COSBase base = getObjectFromEFDictionary(COSName.F); + if (base instanceof COSStream) { - file = new PDEmbeddedFile( stream ); + return new PDEmbeddedFile((COSStream) base); } - return file; + return null; } /** - * Set the embedded file for this spec. + * Set the embedded file for this spec. You should also call + * {@link #setEmbeddedFileUnicode(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) setEmbeddedFileUnicode(PDEmbeddedFile)} + * for cross-platform and cross-language compatibility. * * @param file The file to be embedded. */ @@ -284,21 +295,22 @@ public void setEmbeddedFile( PDEmbeddedFile file ) * @return The embedded dos file for this file spec. */ public PDEmbeddedFile getEmbeddedFileDos() - { - PDEmbeddedFile file = null; - COSStream stream = (COSStream)getObjectFromEFDictionary( COSName.DOS ); - if( stream != null ) + { + COSBase base = getObjectFromEFDictionary( COSName.DOS ); + if (base instanceof COSStream) { - file = new PDEmbeddedFile( stream ); + return new PDEmbeddedFile((COSStream) base); } - return file; + return null; } /** * Set the embedded dos file for this spec. * * @param file The dos file to be embedded. + * @deprecated This method is obsolescent and should not be used by conforming writers. */ + @Deprecated public void setEmbeddedFileDos( PDEmbeddedFile file ) { COSDictionary ef = getEFDictionary(); @@ -319,21 +331,22 @@ public void setEmbeddedFileDos( PDEmbeddedFile file ) * @return The embedded Mac file for this file spec. */ public PDEmbeddedFile getEmbeddedFileMac() - { - PDEmbeddedFile file = null; - COSStream stream = (COSStream)getObjectFromEFDictionary( COSName.MAC ); - if( stream != null ) + { + COSBase base = getObjectFromEFDictionary( COSName.MAC ); + if (base instanceof COSStream) { - file = new PDEmbeddedFile( stream ); + return new PDEmbeddedFile((COSStream) base); } - return file; + return null; } /** * Set the embedded Mac file for this spec. * * @param file The Mac file to be embedded. + * @deprecated This method is obsolescent and should not be used by conforming writers. */ + @Deprecated public void setEmbeddedFileMac( PDEmbeddedFile file ) { COSDictionary ef = getEFDictionary(); @@ -354,21 +367,22 @@ public void setEmbeddedFileMac( PDEmbeddedFile file ) * @return The embedded file for this file spec. */ public PDEmbeddedFile getEmbeddedFileUnix() - { - PDEmbeddedFile file = null; - COSStream stream = (COSStream)getObjectFromEFDictionary( COSName.UNIX ); - if( stream != null ) + { + COSBase base = getObjectFromEFDictionary( COSName.UNIX ); + if (base instanceof COSStream) { - file = new PDEmbeddedFile( stream ); + return new PDEmbeddedFile((COSStream) base); } - return file; + return null; } /** * Set the embedded Unix file for this spec. * * @param file The Unix file to be embedded. + * @deprecated This method is obsolescent and should not be used by conforming writers. */ + @Deprecated public void setEmbeddedFileUnix( PDEmbeddedFile file ) { COSDictionary ef = getEFDictionary(); @@ -389,18 +403,20 @@ public void setEmbeddedFileUnix( PDEmbeddedFile file ) * @return The embedded unicode file for this file spec. */ public PDEmbeddedFile getEmbeddedFileUnicode() - { - PDEmbeddedFile file = null; - COSStream stream = (COSStream)getObjectFromEFDictionary( COSName.UF ); - if( stream != null ) + { + COSBase base = getObjectFromEFDictionary( COSName.UF ); + if (base instanceof COSStream) { - file = new PDEmbeddedFile( stream ); + return new PDEmbeddedFile((COSStream) base); } - return file; + return null; } /** - * Set the embedded Unicode file for this spec. + * Set the embedded Unicode file for this spec. If you call this, then do not forget to also + * call + * {@link #setEmbeddedFile(org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile) setEmbeddedFile(PDEmbeddedFile)} + * or the attachment will not be visible on some viewers. * * @param file The Unicode file to be embedded. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDEmbeddedFile.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDEmbeddedFile.java index 250a77d3189..7ed09b51e65 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDEmbeddedFile.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDEmbeddedFile.java @@ -35,21 +35,16 @@ public class PDEmbeddedFile extends PDStream { /** - * Constructor. - * - * @param document {@inheritDoc} + * {@inheritDoc} */ public PDEmbeddedFile( PDDocument document ) { super( document ); getCOSObject().setName(COSName.TYPE, "EmbeddedFile" ); - } /** - * Constructor. - * - * @param str The stream parameter. + * {@inheritDoc} */ public PDEmbeddedFile( COSStream str ) { @@ -57,12 +52,7 @@ public PDEmbeddedFile( COSStream str ) } /** - * Constructor. - * - * @param doc {@inheritDoc} - * @param str {@inheritDoc} - * - * @throws IOException {@inheritDoc} + * {@inheritDoc} */ public PDEmbeddedFile( PDDocument doc, InputStream str ) throws IOException { @@ -71,10 +61,7 @@ public PDEmbeddedFile( PDDocument doc, InputStream str ) throws IOException } /** - * Constructor. - * - * @param doc {@inheritDoc} - * @param input {@inheritDoc} + * {@inheritDoc} * @param filter Filter to apply to the stream. * * @throws IOException {@inheritDoc} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDFileSpecification.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDFileSpecification.java index efb3ff5340c..3af4b070a81 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDFileSpecification.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/PDFileSpecification.java @@ -33,7 +33,7 @@ public abstract class PDFileSpecification implements COSObjectable { /** - * A file specfication can either be a COSString or a COSDictionary. This + * A file specification can either be a COSString or a COSDictionary. This * will create the file specification either way. * * @param base The cos object that describes the fs. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/package.html index 0b42edc6d7c..9d03297a345 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/filespecification/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunction.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunction.java index b188d2614c1..882a24aa5d2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunction.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunction.java @@ -102,12 +102,13 @@ protected PDStream getPDStream() { return functionStream; } + /** * Create the correct PD Model function based on the COS base function. * * @param function The COS function dictionary. * - * @return The PDModel Function object. + * @return The PDModel Function object, never null. * * @throws IOException If we are unable to create the PDFunction object. */ @@ -117,35 +118,32 @@ public static PDFunction create( COSBase function ) throws IOException { return new PDFunctionTypeIdentity(null); } - - PDFunction retval = null; - if( function instanceof COSObject ) - { - function = ((COSObject)function).getObject(); - } - COSDictionary functionDictionary = (COSDictionary)function; - int functionType = functionDictionary.getInt( COSName.FUNCTION_TYPE ); - if( functionType == 0 ) - { - retval = new PDFunctionType0(functionDictionary); - } - else if( functionType == 2 ) + + COSBase base = function; + if (function instanceof COSObject) { - retval = new PDFunctionType2(functionDictionary); + base = ((COSObject) function).getObject(); } - else if( functionType == 3 ) + if (!(base instanceof COSDictionary)) { - retval = new PDFunctionType3(functionDictionary); + throw new IOException("Error: Function must be a Dictionary, but is " + + (base == null ? "(null)" : base.getClass().getSimpleName())); } - else if( functionType == 4 ) + COSDictionary functionDictionary = (COSDictionary) base; + int functionType = functionDictionary.getInt(COSName.FUNCTION_TYPE); + switch (functionType) { - retval = new PDFunctionType4(functionDictionary); + case 0: + return new PDFunctionType0(functionDictionary); + case 2: + return new PDFunctionType2(functionDictionary); + case 3: + return new PDFunctionType3(functionDictionary); + case 4: + return new PDFunctionType4(functionDictionary); + default: + throw new IOException("Error: Unknown function type " + functionType); } - else - { - throw new IOException( "Error: Unknown function type " + functionType ); - } - return retval; } /** @@ -153,9 +151,9 @@ else if( functionType == 4 ) * have a range specified. A range for output parameters * is optional so this may return zero for a function * that does have output parameters, this will simply return the - * number that have the rnage specified. + * number that have the range specified. * - * @return The number of input parameters that have a range + * @return The number of output parameters that have a range * specified. */ public int getNumberOfOutputParameters() @@ -238,8 +236,14 @@ public void setDomainValues(COSArray domainValues) } /** + * @param input The array of input values for the function. + * + * @return The of outputs the function returns based on those inputs. + * @throws IOException if something went wrong. + * * @deprecated Replaced by {@link #eval(float[] input)} */ + @Deprecated public COSArray eval(COSArray input) throws IOException { float[] outputValues = eval(input.toFloatArray()); @@ -258,7 +262,7 @@ public COSArray eval(COSArray input) throws IOException * @return The of outputs the function returns based on those inputs. * In many cases will be an array of a single value, but not always. * - * @throws IOException an IOExcpetion is thrown if something went wrong processing the function. + * @throws IOException if something went wrong processing the function. */ public abstract float[] eval(float[] input) throws IOException; @@ -300,7 +304,7 @@ protected float[] clipToRange(float[] inputValues) { COSArray rangesArray = getRangeValues(); float[] result; - if (rangesArray != null) + if (rangesArray != null && rangesArray.size() > 0) { float[] rangeValues = rangesArray.toFloatArray(); int numberOfRanges = rangeValues.length/2; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType0.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType0.java index d77e5788159..b73ec34c17b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType0.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType0.java @@ -17,6 +17,8 @@ package org.apache.pdfbox.pdmodel.common.function; import java.io.IOException; +import java.io.InputStream; + import javax.imageio.stream.ImageInputStream; import javax.imageio.stream.MemoryCacheImageInputStream; @@ -99,51 +101,6 @@ public COSArray getSize() return size; } - /** - * Get all sample values of this function. - * - * @return an array with all samples. - */ - private int[][] getSamples() - { - if (samples == null) - { - int arraySize = 1; - int numberOfInputValues = getNumberOfInputParameters(); - int numberOfOutputValues = getNumberOfOutputParameters(); - COSArray sizes = getSize(); - for (int i = 0; i < numberOfInputValues; i++) - { - arraySize *= sizes.getInt(i); - } - samples = new int[arraySize][numberOfOutputValues]; - int bitsPerSample = getBitsPerSample(); - int index = 0; - try - { - // PDF spec 1.7 p.171: - // Each sample value is represented as a sequence of BitsPerSample bits. - // Successive values are adjacent in the bit stream; there is no padding at byte boundaries. - ImageInputStream mciis = new MemoryCacheImageInputStream(getPDStream().createInputStream()); - for (int i = 0; i < arraySize; i++) - { - for (int k = 0; k < numberOfOutputValues; k++) - { - // TODO will this cast work properly for 32 bitsPerSample or should we use long[]? - samples[index][k] = (int) mciis.readBits(bitsPerSample); - } - index++; - } - mciis.close(); - } - catch (IOException exception) - { - LOG.error("IOException while reading the sample values of this function.", exception); - } - } - return samples; - } - /** * Get the number of bits that the output value will take up. * @@ -282,36 +239,6 @@ public void setDecodeValues(COSArray decodeValues) getCOSObject().setItem(COSName.DECODE, decodeValues); } - /** - * calculate array index (structure described in p.171 PDF spec 1.7) in - * multiple dimensions. - * - * @param vector with coordinates - * @return index in flat array - */ - private int calcSampleIndex(int[] vector) - { - // inspiration: http://stackoverflow.com/a/12113479/535646 - // but used in reverse - float[] sizeValues = getSize().toFloatArray(); - int index = 0; - int sizeProduct = 1; - int dimension = vector.length; - for (int i = dimension - 2; i >= 0; --i) - { - sizeProduct *= sizeValues[i]; - } - for (int i = dimension - 1; i >= 0; --i) - { - index += sizeProduct * vector[i]; - if (i - 1 >= 0) - { - sizeProduct /= sizeValues[i - 1]; - } - } - return index; - } - /** * Inner class do to an interpolation in the Nth dimension by comparing the * content size of N-1 dimensional objects. This is done with the help of @@ -414,6 +341,82 @@ private float[] rinterpol(int[] coord, int step) return resultSample; } } + + /** + * calculate array index (structure described in p.171 PDF spec 1.7) in multiple dimensions. + * + * @param vector with coordinates + * @return index in flat array + */ + private int calcSampleIndex(int[] vector) + { + // inspiration: http://stackoverflow.com/a/12113479/535646 + // but used in reverse + float[] sizeValues = getSize().toFloatArray(); + int index = 0; + int sizeProduct = 1; + int dimension = vector.length; + for (int i = dimension - 2; i >= 0; --i) + { + sizeProduct *= sizeValues[i]; + } + for (int i = dimension - 1; i >= 0; --i) + { + index += sizeProduct * vector[i]; + if (i - 1 >= 0) + { + sizeProduct /= sizeValues[i - 1]; + } + } + return index; + } + + /** + * Get all sample values of this function. + * + * @return an array with all samples. + */ + private int[][] getSamples() + { + if (samples == null) + { + int arraySize = 1; + int nIn = getNumberOfInputParameters(); + int nOut = getNumberOfOutputParameters(); + COSArray sizes = getSize(); + for (int i = 0; i < nIn; i++) + { + arraySize *= sizes.getInt(i); + } + samples = new int[arraySize][nOut]; + int bitsPerSample = getBitsPerSample(); + int index = 0; + try + { + // PDF spec 1.7 p.171: + // Each sample value is represented as a sequence of BitsPerSample bits. + // Successive values are adjacent in the bit stream; there is no padding at byte boundaries. + InputStream inputStream = getPDStream().createInputStream(); + ImageInputStream mciis = new MemoryCacheImageInputStream(inputStream); + for (int i = 0; i < arraySize; i++) + { + for (int k = 0; k < nOut; k++) + { + // TODO will this cast work properly for 32 bitsPerSample or should we use long[]? + samples[index][k] = (int) mciis.readBits(bitsPerSample); + } + index++; + } + mciis.close(); + inputStream.close(); + } + catch (IOException exception) + { + LOG.error("IOException while reading the sample values of this function.", exception); + } + } + return samples; + } } /** @@ -433,6 +436,7 @@ public float[] eval(float[] input) throws IOException int[] inputPrev = new int[numberOfInputValues]; int[] inputNext = new int[numberOfInputValues]; + input = input.clone(); // PDFBOX-4461 for (int i = 0; i < numberOfInputValues; i++) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType2.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType2.java index a25ea5073f7..f01a15c0192 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType2.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType2.java @@ -54,24 +54,30 @@ public PDFunctionType2(COSBase function) { super(function); - if (getCOSObject().getDictionaryObject(COSName.C0) == null) + if (getCOSObject().getDictionaryObject(COSName.C0) instanceof COSArray) { - c0 = new COSArray(); - c0.add(new COSFloat(0)); + c0 = (COSArray) getCOSObject().getDictionaryObject(COSName.C0); } else { - c0 = (COSArray) getCOSObject().getDictionaryObject(COSName.C0); + c0 = new COSArray(); + } + if (c0.size() == 0) + { + c0.add(new COSFloat(0)); } - if (getCOSObject().getDictionaryObject(COSName.C1) == null) + if (getCOSObject().getDictionaryObject(COSName.C1) instanceof COSArray) { - c1 = new COSArray(); - c1.add(new COSFloat(1)); + c1 = (COSArray) getCOSObject().getDictionaryObject(COSName.C1); } else { - c1 = (COSArray) getCOSObject().getDictionaryObject(COSName.C1); + c1 = new COSArray(); + } + if (c1.size() == 0) + { + c1.add(new COSFloat(1)); } exponent = getCOSObject().getFloat(COSName.N); @@ -97,7 +103,7 @@ public float[] eval(float[] input) throws IOException // exponential interpolation float xToN = (float) Math.pow(input[0], exponent); // x^exponent - float[] result = new float[c0.size()]; + float[] result = new float[Math.min(c0.size(),c1.size())]; for (int j = 0; j < result.length; j++) { float c0j = ((COSNumber) c0.get(j)).floatValue(); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType3.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType3.java index f79ca60b04c..24e8e39ab48 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType3.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionType3.java @@ -30,10 +30,11 @@ */ public class PDFunctionType3 extends PDFunction { - private COSArray functions = null; private COSArray encode = null; private COSArray bounds = null; + private PDFunction[] functionsArray = null; + private float[] boundsValues = null; /** * Constructor. @@ -68,19 +69,30 @@ public float[] eval(float[] input) throws IOException PDRange domain = getDomainForInput(0); // clip input value to domain x = clipToRange(x, domain.getMin(), domain.getMax()); + + if (functionsArray == null) + { + COSArray ar = getFunctions(); + functionsArray = new PDFunction[ar.size()]; + for (int i = 0; i < ar.size(); ++i) + { + functionsArray[i] = PDFunction.create(ar.getObject(i)); + } + } - COSArray functionsArray = getFunctions(); - int numberOfFunctions = functionsArray.size(); - // This doesn't make sense but it may happen ... - if (numberOfFunctions == 1) + if (functionsArray.length == 1) { - function = PDFunction.create(functionsArray.get(0)); + // This doesn't make sense but it may happen ... + function = functionsArray[0]; PDRange encRange = getEncodeForParameter(0); x = interpolate(x, domain.getMin(), domain.getMax(), encRange.getMin(), encRange.getMax()); } else { - float[] boundsValues = getBounds().toFloatArray(); + if (boundsValues == null) + { + boundsValues = getBounds().toFloatArray(); + } int boundsSize = boundsValues.length; // create a combined array containing the domain and the bounds values // domain.min, bounds[0], bounds[1], ...., bounds[boundsSize-1], domain.max @@ -95,7 +107,7 @@ public float[] eval(float[] input) throws IOException if ( x >= partitionValues[i] && (x < partitionValues[i+1] || (i == partitionValuesSize - 2 && x == partitionValues[i+1]))) { - function = PDFunction.create(functionsArray.get(i)); + function = functionsArray[i]; PDRange encRange = getEncodeForParameter(i); x = interpolate(x, partitionValues[i], partitionValues[i+1], encRange.getMin(), encRange.getMax()); break; @@ -158,7 +170,7 @@ public COSArray getEncode() /** * Get the encode for the input parameter. * - * @param paramNum The function parameter number. + * @param n The function parameter number. * * @return The encode parameter range or null if none is set. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionTypeIdentity.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionTypeIdentity.java index 51e10419285..4e87742200d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionTypeIdentity.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/PDFunctionTypeIdentity.java @@ -36,6 +36,7 @@ public int getFunctionType() { // shouldn't be called throw new UnsupportedOperationException(); + //TODO this is a violation of the interface segregation principle } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/package.html index a36a97640a7..5b0a626780e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ArithmeticOperators.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ArithmeticOperators.java index cf2716ee4b1..86728eab818 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ArithmeticOperators.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ArithmeticOperators.java @@ -25,6 +25,11 @@ class ArithmeticOperators { + private ArithmeticOperators() + { + // Private constructor. + } + /** Implements the "abs" operator. */ static class Abs implements Operator { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/BitwiseOperators.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/BitwiseOperators.java index 88911527de2..ee4caaa5838 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/BitwiseOperators.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/BitwiseOperators.java @@ -25,6 +25,11 @@ class BitwiseOperators { + private BitwiseOperators() + { + // Private constructor. + } + /** Abstract base class for logical operators. */ private abstract static class AbstractLogicalOperator implements Operator { @@ -68,7 +73,7 @@ static class And extends AbstractLogicalOperator @Override protected boolean applyForBoolean(boolean bool1, boolean bool2) { - return bool1 & bool2; + return bool1 && bool2; } @Override @@ -148,7 +153,7 @@ static class Or extends AbstractLogicalOperator @Override protected boolean applyForBoolean(boolean bool1, boolean bool2) { - return bool1 | bool2; + return bool1 || bool2; } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ConditionalOperators.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ConditionalOperators.java index a419182f29b..1333fdef5bf 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ConditionalOperators.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ConditionalOperators.java @@ -25,6 +25,11 @@ class ConditionalOperators { + private ConditionalOperators() + { + // Private constructor. + } + /** Implements the "if" operator. */ static class If implements Operator { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ExecutionContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ExecutionContext.java index 135d299ef84..8b5dd4e1f01 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ExecutionContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/ExecutionContext.java @@ -72,7 +72,7 @@ public Number popNumber() */ public int popInt() { - return ((Integer)stack.pop()); + return (Integer) stack.pop(); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/InstructionSequenceBuilder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/InstructionSequenceBuilder.java index 1244eacef36..9bdb2189fe0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/InstructionSequenceBuilder.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/InstructionSequenceBuilder.java @@ -26,6 +26,8 @@ */ public final class InstructionSequenceBuilder extends Parser.AbstractSyntaxHandler { + private static final Pattern INTEGER_PATTERN = Pattern.compile("[\\+\\-]?\\d+"); + private static final Pattern REAL_PATTERN = Pattern.compile("[\\-]?\\d*\\.\\d*([Ee]\\-?\\d+)?"); private final InstructionSequence mainSequence = new InstructionSequence(); private final Stack seqStack = new Stack(); @@ -62,9 +64,6 @@ private InstructionSequence getCurrentSequence() return this.seqStack.peek(); } - private static final Pattern INTEGER_PATTERN = Pattern.compile("[\\+\\-]?\\d+"); - private static final Pattern REAL_PATTERN = Pattern.compile("[\\-]?\\d*\\.\\d*([Ee]\\-?\\d+)?"); - /** {@inheritDoc} */ @Override public void token(CharSequence text) @@ -114,11 +113,8 @@ else if ("}".equals(token)) */ public static int parseInt(String token) { - if (token.startsWith("+")) - { - token = token.substring(1); - } - return Integer.parseInt(token); + //TODO Beginning with JDK7 Integer.parseInt accepts leading +'s + return Integer.parseInt(token.startsWith("+") ? token.substring(1) : token); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/Parser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/Parser.java index f29828e9220..da828844ca9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/Parser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/Parser.java @@ -25,7 +25,7 @@ public final class Parser { /** Used to indicate the parsers current state. */ - private static enum State + private enum State { NEWLINE, WHITESPACE, COMMENT, TOKEN } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/RelationalOperators.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/RelationalOperators.java index facf0d166e9..de418915a5d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/RelationalOperators.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/RelationalOperators.java @@ -25,6 +25,11 @@ class RelationalOperators { + private RelationalOperators() + { + // Private constructor. + } + /** Implements the "eq" operator. */ static class Eq implements Operator { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/StackOperators.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/StackOperators.java index f4331a30d4e..ad1d3b31e67 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/StackOperators.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/StackOperators.java @@ -27,6 +27,11 @@ class StackOperators { + private StackOperators() + { + // Private constructor. + } + /** Implements the "copy" operator. */ static class Copy implements Operator { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/package.html index 4c25940a656..2237b60e994 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/function/type4/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/package.html index 4d3bd103c00..4766b2dfbbd 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDAttributeObject.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDAttributeObject.java index 80e960f1bdb..24dad708b32 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDAttributeObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDAttributeObject.java @@ -35,6 +35,23 @@ public abstract class PDAttributeObject extends PDDictionaryWrapper { + /** + * Default constructor. + */ + public PDAttributeObject() + { + } + + /** + * Creates a new attribute object with a given dictionary. + * + * @param dictionary the dictionary + */ + public PDAttributeObject(COSDictionary dictionary) + { + super(dictionary); + } + /** * Creates an attribute object. * @@ -99,25 +116,6 @@ protected void setStructureElement(PDStructureElement structureElement) this.structureElement = structureElement; } - - /** - * Default constructor. - */ - public PDAttributeObject() - { - } - - /** - * Creates a new attribute object with a given dictionary. - * - * @param dictionary the dictionary - */ - public PDAttributeObject(COSDictionary dictionary) - { - super(dictionary); - } - - /** * Returns the owner of the attributes. * @@ -197,7 +195,7 @@ protected void notifyChanged() @Override public String toString() { - return new StringBuilder("O=").append(this.getOwner()).toString(); + return "O=" + this.getOwner(); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java index f3c8bb0361c..14687fef901 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDMarkedContentReference.java @@ -32,15 +32,6 @@ public class PDMarkedContentReference implements COSObjectable private final COSDictionary dictionary; - /** - * {@inheritDoc} - */ - @Override - public COSDictionary getCOSObject() - { - return this.dictionary; - } - /** * Default constructor */ @@ -60,6 +51,15 @@ public PDMarkedContentReference(COSDictionary dictionary) this.dictionary = dictionary; } + /** + * {@inheritDoc} + */ + @Override + public COSDictionary getCOSObject() + { + return this.dictionary; + } + /** * Gets the page. * @@ -109,7 +109,7 @@ public void setMCID(int mcid) @Override public String toString() { - return new StringBuilder().append("mcid=").append(this.getMCID()).toString(); + return "mcid=" + this.getMCID(); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java index 19862bff9c6..694e2c55fb0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDObjectReference.java @@ -21,6 +21,7 @@ import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; @@ -28,6 +29,8 @@ /** * An object reference. + *

+ * This is described as "Entries in an object reference dictionary" in the PDF specification. * * @author Johannes Koch */ @@ -41,17 +44,6 @@ public class PDObjectReference implements COSObjectable private final COSDictionary dictionary; - /** - * Returns the underlying dictionary. - * - * @return the dictionary - */ - @Override - public COSDictionary getCOSObject() - { - return this.dictionary; - } - /** * Default Constructor. * @@ -72,6 +64,17 @@ public PDObjectReference(COSDictionary theDictionary) dictionary = theDictionary; } + /** + * Returns the underlying dictionary. + * + * @return the dictionary + */ + @Override + public COSDictionary getCOSObject() + { + return this.dictionary; + } + /** * Gets a higher-level object for the referenced object. * Currently this method may return a {@link PDAnnotation}, @@ -88,10 +91,13 @@ public COSObjectable getReferencedObject() } try { - PDXObject xobject = PDXObject.createXObject(obj, null); // <-- TODO: valid? - if (xobject != null) + if (obj instanceof COSStream) { - return xobject; + PDXObject xobject = PDXObject.createXObject(obj, null); // <-- TODO: valid? + if (xobject != null) + { + return xobject; + } } COSDictionary objDictionary = (COSDictionary)obj; PDAnnotation annotation = PDAnnotation.createAnnotation(obj); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDParentTreeValue.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDParentTreeValue.java new file mode 100644 index 00000000000..914c932861c --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDParentTreeValue.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.pdmodel.common.COSObjectable; + +/** + * Described in the PDF specification in the ParentTree segment of the table "Entries in the + * structure tree root". This is either a dictionary or an array. + * + * @author Tilman Hausherr + */ +public class PDParentTreeValue implements COSObjectable +{ + COSObjectable obj; + + public PDParentTreeValue(COSArray obj) + { + this.obj = obj; + } + + public PDParentTreeValue(COSDictionary obj) + { + this.obj = obj; + } + + @Override + public COSBase getCOSObject() + { + return obj.getCOSObject(); + } + + @Override + public String toString() + { + return obj.toString(); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java index c7e0a1ab055..abe1e8a61c0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java @@ -24,6 +24,7 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent; @@ -90,12 +91,12 @@ public final void setStructureType(String structureType) */ public PDStructureNode getParent() { - COSDictionary p = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.P); - if (p == null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.P); + if (base instanceof COSDictionary) { - return null; + return PDStructureNode.create((COSDictionary) base); } - return PDStructureNode.create(p); + return null; } /** @@ -137,12 +138,12 @@ public void setElementIdentifier(String id) */ public PDPage getPage() { - COSDictionary pageDic = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.PG); - if (pageDic == null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.PG); + if (base instanceof COSDictionary) { - return null; + return new PDPage((COSDictionary) base); } - return new PDPage(pageDic); + return null; } /** @@ -159,12 +160,11 @@ public void setPage(PDPage page) /** * Returns the attributes together with their revision numbers (A). * - * @return the attributes + * @return the attributes as a list, never null. */ public Revisions getAttributes() { - Revisions attributes = - new Revisions(); + Revisions attributes = new Revisions(); COSBase a = this.getCOSObject().getDictionaryObject(COSName.A); if (a instanceof COSArray) { @@ -174,6 +174,10 @@ public Revisions getAttributes() while (it.hasNext()) { COSBase item = it.next(); + if (item instanceof COSObject) + { + item = ((COSObject) item).getObject(); + } if (item instanceof COSDictionary) { ao = PDAttributeObject.create((COSDictionary) item); @@ -182,8 +186,7 @@ public Revisions getAttributes() } else if (item instanceof COSInteger) { - attributes.setRevisionNumber(ao, - ((COSInteger) item).intValue()); + attributes.setRevisionNumber(ao, ((COSNumber) item).intValue()); } } } @@ -326,7 +329,7 @@ public void attributeChanged(PDAttributeObject attributeObject) /** * Returns the class names together with their revision numbers (C). * - * @return the class names + * @return the class names as a list, never null. */ public Revisions getClassNames() { @@ -345,6 +348,10 @@ public Revisions getClassNames() while (it.hasNext()) { COSBase item = it.next(); + if (item instanceof COSObject) + { + item = ((COSObject) item).getObject(); + } if (item instanceof COSName) { className = ((COSName) item).getName(); @@ -352,8 +359,7 @@ public Revisions getClassNames() } else if (item instanceof COSInteger) { - classNames.setRevisionNumber(className, - ((COSInteger) item).intValue()); + classNames.setRevisionNumber(className, ((COSInteger) item).intValue()); } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java index 0d4b521fec9..4ab765e4e77 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureNode.java @@ -17,7 +17,6 @@ package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import org.apache.pdfbox.cos.COSArray; @@ -37,6 +36,27 @@ public abstract class PDStructureNode implements COSObjectable { + /** + * Constructor. + * + * @param type the type + */ + protected PDStructureNode(String type) + { + this.dictionary = new COSDictionary(); + this.dictionary.setName(COSName.TYPE, type); + } + + /** + * Constructor for an existing structure node. + * + * @param dictionary The existing dictionary. + */ + protected PDStructureNode(COSDictionary dictionary) + { + this.dictionary = dictionary; + } + /** * Creates a node in the structure tree. Can be either a structure tree root, * or a structure element. @@ -51,7 +71,7 @@ public static PDStructureNode create(COSDictionary node) { return new PDStructureTreeRoot(node); } - if ((type == null) || "StructElem".equals(type)) + if (type == null || "StructElem".equals(type)) { return new PDStructureElement(node); } @@ -69,27 +89,6 @@ public COSDictionary getCOSObject() return dictionary; } - /** - * Constructor. - * - * @param type the type - */ - protected PDStructureNode(String type) - { - this.dictionary = new COSDictionary(); - this.dictionary.setName(COSName.TYPE, type); - } - - /** - * Constructor for an existing structure node. - * - * @param dictionary The existing dictionary. - */ - protected PDStructureNode(COSDictionary dictionary) - { - this.dictionary = dictionary; - } - /** * Returns the type. * @@ -103,7 +102,7 @@ public String getType() /** * Returns a list of objects for the kids (K). * - * @return a list of objects for the kids + * @return a list of objects for the kids, never null. */ public List getKids() { @@ -111,10 +110,8 @@ public List getKids() COSBase k = this.getCOSObject().getDictionaryObject(COSName.K); if (k instanceof COSArray) { - Iterator kids = ((COSArray) k).iterator(); - while (kids.hasNext()) + for (COSBase kid : (COSArray) k) { - COSBase kid = kids.next(); Object kidObject = this.createObject(kid); if (kidObject != null) { @@ -362,10 +359,9 @@ else if (k instanceof COSArray) * The type of object depends on the type of the kid. It can be *
    *
  • a {@link PDStructureElement},
  • - *
  • a {@link org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation PDAnnotation},
  • - *
  • a {@link org.apache.pdfbox.pdmodel.graphics.PDXObject PDXObject},
  • - *
  • a {@link PDMarkedContentReference}
  • - *
  • a {@link Integer}
  • + *
  • a {@link PDObjectReference},
  • + *
  • a {@link PDMarkedContentReference},
  • + *
  • an {@link Integer}
  • *
* * @param kid the kid @@ -388,33 +384,35 @@ else if (kid instanceof COSObject) } if (kidDic != null) { - String type = kidDic.getNameAsString(COSName.TYPE); - if ((type == null) || PDStructureElement.TYPE.equals(type)) - { - // A structure element dictionary denoting another structure - // element - return new PDStructureElement(kidDic); - } - else if (PDObjectReference.TYPE.equals(type)) - { - // An object reference dictionary denoting a PDF object - return new PDObjectReference(kidDic); - } - else if (PDMarkedContentReference.TYPE.equals(type)) - { - // A marked-content reference dictionary denoting a - // marked-content sequence - return new PDMarkedContentReference(kidDic); - } + return createObjectFromDic(kidDic); } else if (kid instanceof COSInteger) { - // An integer marked-content identifier denoting a - // marked-content sequence + // An integer marked-content identifier denoting a marked-content sequence COSInteger mcid = (COSInteger) kid; return mcid.intValue(); } return null; } + private COSObjectable createObjectFromDic(COSDictionary kidDic) + { + String type = kidDic.getNameAsString(COSName.TYPE); + if ((type == null) || PDStructureElement.TYPE.equals(type)) + { + // A structure element dictionary denoting another structure element + return new PDStructureElement(kidDic); + } + else if (PDObjectReference.TYPE.equals(type)) + { + // An object reference dictionary denoting a PDF object + return new PDObjectReference(kidDic); + } + else if (PDMarkedContentReference.TYPE.equals(type)) + { + // A marked-content reference dictionary denoting a marked-content sequence + return new PDMarkedContentReference(kidDic); + } + return null; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java index ae76591bf71..5926a5283d4 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java @@ -17,7 +17,7 @@ package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; import java.io.IOException; -import java.util.Hashtable; +import java.util.HashMap; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -70,33 +70,37 @@ public PDStructureTreeRoot(COSDictionary dic) * Returns the K array entry. * * @return the K array entry + * + * @deprecated use {@link #getK()} only. /K can be a dictionary or an array, and the next level + * can also be a dictionary. See file 054080.pdf in PDFBOX-4417 and read "Entries in the + * structure tree root" in the PDF specification. */ + @Deprecated public COSArray getKArray() { COSBase k = this.getCOSObject().getDictionaryObject(COSName.K); - if (k != null) + if (k instanceof COSDictionary) { - if (k instanceof COSDictionary) - { - COSDictionary kdict = (COSDictionary) k; - k = kdict.getDictionaryObject(COSName.K); - if (k instanceof COSArray) - { - return (COSArray) k; - } - } - else + COSDictionary kdict = (COSDictionary) k; + k = kdict.getDictionaryObject(COSName.K); + if (k instanceof COSArray) { return (COSArray) k; } } + else if (k instanceof COSArray) + { + return (COSArray) k; + } + return null; } /** - * Returns the K entry. - * - * @return the K entry + * Returns the K entry. This can be a dictionary representing a structure element, or an array + * of them. + * + * @return the K entry. */ public COSBase getK() { @@ -118,12 +122,12 @@ public void setK(COSBase k) * * @return the ID tree */ - public PDNameTreeNode getIDTree() + public PDNameTreeNode getIDTree() { - COSDictionary idTreeDic = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.ID_TREE); - if (idTreeDic != null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.ID_TREE); + if (base instanceof COSDictionary) { - return new PDStructureElementNameTreeNode(idTreeDic); + return new PDStructureElementNameTreeNode((COSDictionary) base); } return null; } @@ -133,7 +137,7 @@ public PDNameTreeNode getIDTree() * * @param idTree the ID tree */ - public void setIDTree(PDNameTreeNode idTree) + public void setIDTree(PDNameTreeNode idTree) { this.getCOSObject().setItem(COSName.ID_TREE, idTree); } @@ -145,10 +149,10 @@ public void setIDTree(PDNameTreeNode idTree) */ public PDNumberTreeNode getParentTree() { - COSDictionary parentTreeDic = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.PARENT_TREE); - if (parentTreeDic != null) + COSBase base = getCOSObject().getDictionaryObject(COSName.PARENT_TREE); + if (base instanceof COSDictionary) { - return new PDNumberTreeNode(parentTreeDic, COSBase.class); + return new PDNumberTreeNode((COSDictionary) base, PDParentTreeValue.class); } return null; } @@ -202,7 +206,7 @@ public Map getRoleMap() LOG.error(e,e); } } - return new Hashtable(); + return new HashMap(); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDUserAttributeObject.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDUserAttributeObject.java index 4b8195c64bb..40c2cd0a5f2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDUserAttributeObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDUserAttributeObject.java @@ -117,7 +117,7 @@ public void removeUserProperty(PDUserProperty userProperty) } /** - * @param userProperty + * @param userProperty the changed user property. */ public void userPropertyChanged(PDUserProperty userProperty) { @@ -127,9 +127,9 @@ public void userPropertyChanged(PDUserProperty userProperty) @Override public String toString() { - return new StringBuilder().append(super.toString()) - .append(", userProperties=") - .append(this.getOwnerUserProperties()).toString(); + return super.toString() + + ", userProperties=" + + this.getOwnerUserProperties(); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDUserProperty.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDUserProperty.java index 17947fd420a..ea9692fbc4d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDUserProperty.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDUserProperty.java @@ -145,10 +145,10 @@ public void setHidden(boolean hidden) @Override public String toString() { - return new StringBuilder("Name=").append(this.getName()) - .append(", Value=").append(this.getValue()) - .append(", FormattedValue=").append(this.getFormattedValue()) - .append(", Hidden=").append(this.isHidden()).toString(); + return "Name=" + this.getName() + + ", Value=" + this.getValue() + + ", FormattedValue=" + this.getFormattedValue() + + ", Hidden=" + this.isHidden(); } @@ -223,4 +223,4 @@ else if (!userAttributeObject.equals(other.userAttributeObject)) return true; } -} \ No newline at end of file +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java index 65889e584f4..da796722859 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/Revisions.java @@ -30,6 +30,13 @@ public class Revisions private List objects; private List revisionNumbers; + /** + * Constructor. + */ + public Revisions() + { + } + private List getObjects() { if (this.objects == null) @@ -48,13 +55,6 @@ private List getRevisionNumbers() return this.revisionNumbers; } - /** - * - */ - public Revisions() - { - } - /** * Returns the object at the specified position. * diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/package.html index a7d32e7c21c..c83ace8694e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java index 98dd773e2d9..300524d4d0e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDMarkedContent.java @@ -1,202 +1,201 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.documentinterchange.markedcontent; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.PDArtifactMarkedContent; -import org.apache.pdfbox.pdmodel.graphics.PDXObject; -import org.apache.pdfbox.text.TextPosition; - -/** - * A marked content. - * - * @author Johannes Koch - */ -public class PDMarkedContent -{ - - /** - * Creates a marked-content sequence. - * - * @param tag the tag - * @param properties the properties - * @return the marked-content sequence - */ - public static PDMarkedContent create(COSName tag, COSDictionary properties) - { - if (COSName.ARTIFACT.equals(tag)) - { - return new PDArtifactMarkedContent(properties); - } - return new PDMarkedContent(tag, properties); - } - - - private final String tag; - private final COSDictionary properties; - private final List contents; - - - /** - * Creates a new marked content object. - * - * @param tag the tag - * @param properties the properties - */ - public PDMarkedContent(COSName tag, COSDictionary properties) - { - this.tag = tag == null ? null : tag.getName(); - this.properties = properties; - this.contents = new ArrayList(); - } - - - /** - * Gets the tag. - * - * @return the tag - */ - public String getTag() - { - return this.tag; - } - - /** - * Gets the properties. - * - * @return the properties - */ - public COSDictionary getProperties() - { - return this.properties; - } - - /** - * Gets the marked-content identifier. - * - * @return the marked-content identifier, or -1 if it doesn't exist. - */ - public int getMCID() - { - return this.getProperties() == null ? -1 : - this.getProperties().getInt(COSName.MCID); - } - - /** - * Gets the language (Lang). - * - * @return the language - */ - public String getLanguage() - { - return this.getProperties() == null ? null : - this.getProperties().getNameAsString(COSName.LANG); - } - - /** - * Gets the actual text (ActualText). - * - * @return the actual text - */ - public String getActualText() - { - return this.getProperties() == null ? null : - this.getProperties().getString(COSName.ACTUAL_TEXT); - } - - /** - * Gets the alternate description (Alt). - * - * @return the alternate description - */ - public String getAlternateDescription() - { - return this.getProperties() == null ? null : - this.getProperties().getString(COSName.ALT); - } - - /** - * Gets the expanded form (E). - * - * @return the expanded form - */ - public String getExpandedForm() - { - return this.getProperties() == null ? null : - this.getProperties().getString(COSName.E); - } - - /** - * Gets the contents of the marked content sequence. Can be - *
    - *
  • {@link TextPosition},
  • - *
  • {@link PDMarkedContent}, or
  • - *
  • {@link PDXObject}.
  • - *
- * - * @return the contents of the marked content sequence - */ - public List getContents() - { - return this.contents; - } - - /** - * Adds a text position to the contents. - * - * @param text the text position - */ - public void addText(TextPosition text) - { - this.getContents().add(text); - } - - /** - * Adds a marked content to the contents. - * - * @param markedContent the marked content - */ - public void addMarkedContent(PDMarkedContent markedContent) - { - this.getContents().add(markedContent); - } - - /** - * Adds an XObject to the contents. - * - * @param xobject the XObject - */ - public void addXObject(PDXObject xobject) - { - this.getContents().add(xobject); - } - - - @Override - public String toString() - { - StringBuilder sb = new StringBuilder("tag=").append(this.tag) - .append(", properties=").append(this.properties); - sb.append(", contents=").append(this.contents); - return sb.toString(); - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.documentinterchange.markedcontent; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.PDArtifactMarkedContent; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.text.TextPosition; + +/** + * A marked content. + * + * @author Johannes Koch + */ +public class PDMarkedContent +{ + + /** + * Creates a marked-content sequence. + * + * @param tag the tag + * @param properties the properties + * @return the marked-content sequence + */ + public static PDMarkedContent create(COSName tag, COSDictionary properties) + { + if (COSName.ARTIFACT.equals(tag)) + { + return new PDArtifactMarkedContent(properties); + } + return new PDMarkedContent(tag, properties); + } + + + private final String tag; + private final COSDictionary properties; + private final List contents; + + + /** + * Creates a new marked content object. + * + * @param tag the tag + * @param properties the properties + */ + public PDMarkedContent(COSName tag, COSDictionary properties) + { + this.tag = tag == null ? null : tag.getName(); + this.properties = properties; + this.contents = new ArrayList(); + } + + + /** + * Gets the tag. + * + * @return the tag + */ + public String getTag() + { + return this.tag; + } + + /** + * Gets the properties. + * + * @return the properties + */ + public COSDictionary getProperties() + { + return this.properties; + } + + /** + * Gets the marked-content identifier. + * + * @return the marked-content identifier, or -1 if it doesn't exist. + */ + public int getMCID() + { + return this.getProperties() == null ? -1 : + this.getProperties().getInt(COSName.MCID); + } + + /** + * Gets the language (Lang). + * + * @return the language + */ + public String getLanguage() + { + return this.getProperties() == null ? null : + this.getProperties().getNameAsString(COSName.LANG); + } + + /** + * Gets the actual text (ActualText). + * + * @return the actual text + */ + public String getActualText() + { + return this.getProperties() == null ? null : + this.getProperties().getString(COSName.ACTUAL_TEXT); + } + + /** + * Gets the alternate description (Alt). + * + * @return the alternate description + */ + public String getAlternateDescription() + { + return this.getProperties() == null ? null : + this.getProperties().getString(COSName.ALT); + } + + /** + * Gets the expanded form (E). + * + * @return the expanded form + */ + public String getExpandedForm() + { + return this.getProperties() == null ? null : + this.getProperties().getString(COSName.E); + } + + /** + * Gets the contents of the marked content sequence. Can be + *
    + *
  • {@link TextPosition},
  • + *
  • {@link PDMarkedContent}, or
  • + *
  • {@link PDXObject}.
  • + *
+ * + * @return the contents of the marked content sequence + */ + public List getContents() + { + return this.contents; + } + + /** + * Adds a text position to the contents. + * + * @param text the text position + */ + public void addText(TextPosition text) + { + this.getContents().add(text); + } + + /** + * Adds a marked content to the contents. + * + * @param markedContent the marked content + */ + public void addMarkedContent(PDMarkedContent markedContent) + { + this.getContents().add(markedContent); + } + + /** + * Adds an XObject to the contents. + * + * @param xobject the XObject + */ + public void addXObject(PDXObject xobject) + { + this.getContents().add(xobject); + } + + + @Override + public String toString() + { + return "tag=" + this.tag + + ", properties=" + this.properties + + ", contents=" + this.contents; + } + +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDPropertyList.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDPropertyList.java index 307840ab8d3..b9f1bd05446 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDPropertyList.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/PDPropertyList.java @@ -20,6 +20,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentMembershipDictionary; /** * A property list is a dictionary containing private information meaningful to the conforming @@ -31,7 +32,10 @@ public class PDPropertyList implements COSObjectable /** * Creates a property list from the given dictionary. + * * @param dict COS dictionary + * + * @return the property list */ public static PDPropertyList create(COSDictionary dict) { @@ -39,6 +43,10 @@ public static PDPropertyList create(COSDictionary dict) { return new PDOptionalContentGroup(dict); } + else if (COSName.OCMD.equals(dict.getItem(COSName.TYPE))) + { + return new PDOptionalContentMembershipDictionary(dict); + } else { // todo: more types @@ -56,6 +64,8 @@ protected PDPropertyList() /** * Constructor for subclasses. + * + * @param dict the dictionary to be used. */ protected PDPropertyList(COSDictionary dict) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html index 35ba6751f10..5b3c7dab07e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/markedcontent/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/prepress/PDBoxStyle.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/prepress/PDBoxStyle.java index 2ea176c27bd..00b876f466d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/prepress/PDBoxStyle.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/prepress/PDBoxStyle.java @@ -90,8 +90,7 @@ public PDColor getGuidelineColor() colorValues.add( COSInteger.ZERO ); dictionary.setItem(COSName.C, colorValues); } - PDColor color = new PDColor(colorValues.toFloatArray(), PDDeviceRGB.INSTANCE); - return color; + return new PDColor(colorValues.toFloatArray(), PDDeviceRGB.INSTANCE); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/prepress/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/prepress/package.html index 879f0d560ae..a83427e77b0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/prepress/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/prepress/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDExportFormatAttributeObject.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDExportFormatAttributeObject.java index cf1343902c0..0ddb8219ffb 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDExportFormatAttributeObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDExportFormatAttributeObject.java @@ -58,6 +58,8 @@ public class PDExportFormatAttributeObject extends PDLayoutAttributeObject /** * Default constructor. + * + * @param owner the owner string. */ public PDExportFormatAttributeObject(String owner) { @@ -240,11 +242,11 @@ public String toString() } if (this.isSpecified(PDTableAttributeObject.ROW_SPAN)) { - sb.append(", RowSpan=").append(String.valueOf(this.getRowSpan())); + sb.append(", RowSpan=").append(this.getRowSpan()); } if (this.isSpecified(PDTableAttributeObject.COL_SPAN)) { - sb.append(", ColSpan=").append(String.valueOf(this.getColSpan())); + sb.append(", ColSpan=").append(this.getColSpan()); } if (this.isSpecified(PDTableAttributeObject.HEADERS)) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDLayoutAttributeObject.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDLayoutAttributeObject.java index 21ca37755dc..3b12045d29c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDLayoutAttributeObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDLayoutAttributeObject.java @@ -1041,22 +1041,22 @@ public void setAllTBorderStyles(String tBorderStyle) } /** - * Sets the style of the border drawn on each edge of a table cell - * (TBorderStyle). The values should be of: + * Sets the style of the border drawn on each edge of a table cell (TBorderStyle). The values should be of: *
    - *
  • {@link #BORDER_STYLE_NONE},
  • - *
  • {@link #BORDER_STYLE_HIDDEN},
  • - *
  • {@link #BORDER_STYLE_DOTTED},
  • - *
  • {@link #BORDER_STYLE_DASHED},
  • - *
  • {@link #BORDER_STYLE_SOLID},
  • - *
  • {@link #BORDER_STYLE_DOUBLE},
  • - *
  • {@link #BORDER_STYLE_GROOVE},
  • - *
  • {@link #BORDER_STYLE_RIDGE},
  • - *
  • {@link #BORDER_STYLE_INSET},
  • - *
  • {@link #BORDER_STYLE_OUTSET}.
  • + *
  • {@link #BORDER_STYLE_NONE},
  • + *
  • {@link #BORDER_STYLE_HIDDEN},
  • + *
  • {@link #BORDER_STYLE_DOTTED},
  • + *
  • {@link #BORDER_STYLE_DASHED},
  • + *
  • {@link #BORDER_STYLE_SOLID},
  • + *
  • {@link #BORDER_STYLE_DOUBLE},
  • + *
  • {@link #BORDER_STYLE_GROOVE},
  • + *
  • {@link #BORDER_STYLE_RIDGE},
  • + *
  • {@link #BORDER_STYLE_INSET},
  • + *
  • {@link #BORDER_STYLE_OUTSET}.
  • *
* - * @param tBorderStyles + * @param tBorderStyles an array of border styles. + * */ public void setTBorderStyles(String[] tBorderStyles) { @@ -1518,7 +1518,7 @@ public String toString() } else { - sb.append(String.valueOf(borderThickness)); + sb.append(borderThickness); } } if (this.isSpecified(PADDING)) @@ -1531,7 +1531,7 @@ public String toString() } else { - sb.append(String.valueOf(padding)); + sb.append(padding); } } if (this.isSpecified(COLOR)) @@ -1541,27 +1541,27 @@ public String toString() if (this.isSpecified(SPACE_BEFORE)) { sb.append(", SpaceBefore=") - .append(String.valueOf(this.getSpaceBefore())); + .append(this.getSpaceBefore()); } if (this.isSpecified(SPACE_AFTER)) { sb.append(", SpaceAfter=") - .append(String.valueOf(this.getSpaceAfter())); + .append(this.getSpaceAfter()); } if (this.isSpecified(START_INDENT)) { sb.append(", StartIndent=") - .append(String.valueOf(this.getStartIndent())); + .append(this.getStartIndent()); } if (this.isSpecified(END_INDENT)) { sb.append(", EndIndent=") - .append(String.valueOf(this.getEndIndent())); + .append(this.getEndIndent()); } if (this.isSpecified(TEXT_INDENT)) { sb.append(", TextIndent=") - .append(String.valueOf(this.getTextIndent())); + .append(this.getTextIndent()); } if (this.isSpecified(TEXT_ALIGN)) { @@ -1573,29 +1573,11 @@ public String toString() } if (this.isSpecified(WIDTH)) { - Object width = this.getWidth(); - sb.append(", Width="); - if (width instanceof Float) - { - sb.append(String.valueOf(width)); - } - else - { - sb.append(width); - } + sb.append(", Width=").append(this.getWidth()); } if (this.isSpecified(HEIGHT)) { - Object height = this.getHeight(); - sb.append(", Height="); - if (height instanceof Float) - { - sb.append(String.valueOf(height)); - } - else - { - sb.append(height); - } + sb.append(", Height=").append(this.getHeight()); } if (this.isSpecified(BLOCK_ALIGN)) { @@ -1628,26 +1610,16 @@ public String toString() } else { - sb.append(String.valueOf(tPadding)); + sb.append(tPadding); } } if (this.isSpecified(BASELINE_SHIFT)) { - sb.append(", BaselineShift=") - .append(String.valueOf(this.getBaselineShift())); + sb.append(", BaselineShift=").append(this.getBaselineShift()); } if (this.isSpecified(LINE_HEIGHT)) { - Object lineHeight = this.getLineHeight(); - sb.append(", LineHeight="); - if (lineHeight instanceof Float) - { - sb.append(String.valueOf(lineHeight)); - } - else - { - sb.append(lineHeight); - } + sb.append(", LineHeight=").append(this.getLineHeight()); } if (this.isSpecified(TEXT_DECORATION_COLOR)) { @@ -1657,7 +1629,7 @@ public String toString() if (this.isSpecified(TEXT_DECORATION_THICKNESS)) { sb.append(", TextDecorationThickness=") - .append(String.valueOf(this.getTextDecorationThickness())); + .append(this.getTextDecorationThickness()); } if (this.isSpecified(TEXT_DECORATION_TYPE)) { @@ -1680,7 +1652,7 @@ public String toString() if (this.isSpecified(COLUMN_COUNT)) { sb.append(", ColumnCount=") - .append(String.valueOf(this.getColumnCount())); + .append(this.getColumnCount()); } if (this.isSpecified(COLUMN_GAP)) { @@ -1692,7 +1664,7 @@ public String toString() } else { - sb.append(String.valueOf(columnGap)); + sb.append(columnGap); } } if (this.isSpecified(COLUMN_WIDTHS)) @@ -1705,7 +1677,7 @@ public String toString() } else { - sb.append(String.valueOf(columnWidth)); + sb.append(columnWidth); } } return sb.toString(); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDStandardAttributeObject.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDStandardAttributeObject.java index 15f1c6c2bcf..37080e43a81 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDStandardAttributeObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDStandardAttributeObject.java @@ -365,9 +365,9 @@ protected void setNumber(String name, int value) protected void setArrayOfNumber(String name, float[] values) { COSArray array = new COSArray(); - for (int i = 0; i < values.length; i++) + for (float value : values) { - array.add(new COSFloat(values[i])); + array.add(new COSFloat(value)); } COSBase oldBase = this.getCOSObject().getDictionaryObject(name); this.getCOSObject().setItem(name, array); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDTableAttributeObject.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDTableAttributeObject.java index 5250e228012..d8b90809ba6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDTableAttributeObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/PDTableAttributeObject.java @@ -196,11 +196,11 @@ public String toString() StringBuilder sb = new StringBuilder().append(super.toString()); if (this.isSpecified(ROW_SPAN)) { - sb.append(", RowSpan=").append(String.valueOf(this.getRowSpan())); + sb.append(", RowSpan=").append(this.getRowSpan()); } if (this.isSpecified(COL_SPAN)) { - sb.append(", ColSpan=").append(String.valueOf(this.getColSpan())); + sb.append(", ColSpan=").append(this.getColSpan()); } if (this.isSpecified(HEADERS)) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/StandardStructureTypes.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/StandardStructureTypes.java index 9eab839d82a..0adb640d326 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/StandardStructureTypes.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/StandardStructureTypes.java @@ -37,11 +37,6 @@ public class StandardStructureTypes */ private static final Log LOG = LogFactory.getLog(StandardStructureTypes.class); - private StandardStructureTypes() - { - } - - // Grouping Elements /** * Document @@ -323,4 +318,7 @@ private StandardStructureTypes() Collections.sort(types); } + private StandardStructureTypes() + { + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/package.html index ca5daa9e7a4..efed32d7d4c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/documentinterchange/taggedpdf/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/AccessPermission.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/AccessPermission.java index 2672ac94876..3be685201bf 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/AccessPermission.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/AccessPermission.java @@ -49,7 +49,7 @@ public class AccessPermission { - private static final int DEFAULT_PERMISSIONS = 0xFFFFFFFF ^ 3;//bits 0 & 1 need to be zero + private static final int DEFAULT_PERMISSIONS = ~3; //bits 0 & 1 need to be zero private static final int PRINT_BIT = 3; private static final int MODIFICATION_BIT = 4; private static final int EXTRACT_BIT = 5; @@ -59,7 +59,7 @@ public class AccessPermission private static final int ASSEMBLE_DOCUMENT_BIT = 11; private static final int DEGRADED_PRINT_BIT = 12; - private int bytes = DEFAULT_PERMISSIONS; + private int bytes; private boolean readOnly = false; @@ -115,7 +115,7 @@ private boolean setPermissionBit( int bit, boolean value ) } else { - permissions = permissions & (0xFFFFFFFF ^ (1 << (bit-1))); + permissions = permissions & (~(1 << (bit - 1))); } bytes = permissions; @@ -209,7 +209,8 @@ public boolean canPrint() /** * Set if the user can print. - * This method will have no effect if the object is in read only mode + *

+ * This method will have no effect if the object is in read only mode. * * @param allowPrinting A boolean determining if the user can print. */ @@ -233,7 +234,8 @@ public boolean canModify() /** * Set if the user can modify the document. - * This method will have no effect if the object is in read only mode + *

+ * This method will have no effect if the object is in read only mode. * * @param allowModifications A boolean determining if the user can modify the document. */ @@ -258,7 +260,8 @@ public boolean canExtractContent() /** * Set if the user can extract content from the document. - * This method will have no effect if the object is in read only mode + *

+ * This method will have no effect if the object is in read only mode. * * @param allowExtraction A boolean determining if the user can extract content * from the document. @@ -272,7 +275,11 @@ public void setCanExtractContent( boolean allowExtraction ) } /** - * This will tell if the user can add/modify text annotations, fill in interactive forms fields. + * This will tell if the user can add or modify text annotations and fill in interactive forms + * fields and, if {@link #canModify() canModify()} returns true, create or modify interactive + * form fields (including signature fields). Note that if + * {@link #canFillInForm() canFillInForm()} returns true, it is still possible to fill in + * interactive forms (including signature fields) even if this method here returns false. * * @return true If supplied with the user password they are allowed to modify annotations. */ @@ -282,10 +289,15 @@ public boolean canModifyAnnotations() } /** - * Set if the user can modify annotations. - * This method will have no effect if the object is in read only mode + * Set if the user can add or modify text annotations and fill in interactive forms fields and, + * if {@link #canModify() canModify()} returns true, create or modify interactive form fields + * (including signature fields). Note that if {@link #canFillInForm() canFillInForm()} returns + * true, it is still possible to fill in interactive forms (including signature fields) even the + * parameter here is false. + *

+ * This method will have no effect if the object is in read only mode. * - * @param allowAnnotationModification A boolean determining if the user can modify annotations. + * @param allowAnnotationModification A boolean determining the new setting. */ public void setCanModifyAnnotations( boolean allowAnnotationModification ) { @@ -296,7 +308,8 @@ public void setCanModifyAnnotations( boolean allowAnnotationModification ) } /** - * This will tell if the user can fill in interactive forms. + * This will tell if the user can fill in interactive form fields (including signature fields) + * even if {@link #canModifyAnnotations() canModifyAnnotations()} returns false. * * @return true If supplied with the user password they are allowed to fill in form fields. */ @@ -306,8 +319,12 @@ public boolean canFillInForm() } /** - * Set if the user can fill in interactive forms. - * This method will have no effect if the object is in read only mode + * Set if the user can fill in interactive form fields (including signature fields) even if + * {@link #canModifyAnnotations() canModifyAnnotations()} returns false. Therefore, if you want + * to prevent a user from filling in interactive form fields, you need to call + * {@link #setCanModifyAnnotations(boolean) setCanModifyAnnotations(false)} as well. + *

+ * This method will have no effect if the object is in read only mode. * * @param allowFillingInForm A boolean determining if the user can fill in interactive forms. */ @@ -333,7 +350,8 @@ public boolean canExtractForAccessibility() /** * Set if the user can extract content from the document for accessibility purposes. - * This method will have no effect if the object is in read only mode + *

+ * This method will have no effect if the object is in read only mode. * * @param allowExtraction A boolean determining if the user can extract content * from the document. @@ -349,8 +367,7 @@ public void setCanExtractForAccessibility( boolean allowExtraction ) /** * This will tell if the user can insert/rotate/delete pages. * - * @return true If supplied with the user password they are allowed to extract content - * from the PDF document + * @return true If supplied with the user password they are allowed to assemble the document. */ public boolean canAssembleDocument() { @@ -359,7 +376,8 @@ public boolean canAssembleDocument() /** * Set if the user can insert/rotate/delete pages. - * This method will have no effect if the object is in read only mode + *

+ * This method will have no effect if the object is in read only mode. * * @param allowAssembly A boolean determining if the user can assemble the document. */ @@ -384,16 +402,17 @@ public boolean canPrintDegraded() /** * Set if the user can print the document in a degraded format. - * This method will have no effect if the object is in read only mode + *

+ * This method will have no effect if the object is in read only mode. * - * @param allowAssembly A boolean determining if the user can print the + * @param canPrintDegraded A boolean determining if the user can print the * document in a degraded format. */ - public void setCanPrintDegraded( boolean allowAssembly ) + public void setCanPrintDegraded( boolean canPrintDegraded ) { if(!readOnly) { - setPermissionBit( DEGRADED_PRINT_BIT, allowAssembly ); + setPermissionBit( DEGRADED_PRINT_BIT, canPrintDegraded ); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/MessageDigests.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/MessageDigests.java index 44632280fff..cf9ca28be09 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/MessageDigests.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/MessageDigests.java @@ -60,4 +60,20 @@ static MessageDigest getSHA1() throw new RuntimeException(e); } } + + /** + * @return SHA-256 message digest + */ + static MessageDigest getSHA256() + { + try + { + return MessageDigest.getInstance("SHA-256"); + } + catch (NoSuchAlgorithmException e) + { + // should never happen + throw new RuntimeException(e); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDCryptFilterDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDCryptFilterDictionary.java index d82ad663f0d..6f25c4e08ce 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDCryptFilterDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDCryptFilterDictionary.java @@ -19,6 +19,7 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.common.COSObjectable; /** * This class is a specialized view of the crypt filter dictionary of a PDF document. @@ -26,7 +27,7 @@ * manage its fields. * */ -public class PDCryptFilterDictionary +public class PDCryptFilterDictionary implements COSObjectable { /** @@ -55,12 +56,25 @@ public PDCryptFilterDictionary(COSDictionary d) * This will get the dictionary associated with this crypt filter dictionary. * * @return The COS dictionary that this object wraps. + * @deprecated use {@link #getCOSObject()} */ + @Deprecated public COSDictionary getCOSDictionary() { return cryptFilterDictionary; } + /** + * This will get the dictionary associated with this crypt filter dictionary. + * + * @return The COS dictionary that this object wraps. + */ + @Override + public COSDictionary getCOSObject() + { + return cryptFilterDictionary; + } + /** * This will set the number of bits to use for the crypt filter algorithm. * @@ -72,7 +86,7 @@ public void setLength(int length) } /** - * This will return the Length entry of the crypt filter dictionary.

+ * This will return the Length entry of the crypt filter dictionary.

* The length in bits for the crypt filter algorithm. This will return a multiple of 8. * * @return The length in bits for the encryption algorithm diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java index 67422cd735e..84719b55285 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java @@ -25,6 +25,7 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdmodel.common.COSObjectable; /** * This class is a specialized view of the encryption dictionary of a PDF document. @@ -37,7 +38,7 @@ * @author Ben Litchfield * @author Benoit Guillon */ -public class PDEncryption +public class PDEncryption implements COSObjectable { /** * See PDF Reference 1.4 Table 3.13. @@ -133,12 +134,25 @@ public boolean hasSecurityHandler() * This will get the dictionary associated with this encryption dictionary. * * @return The COS dictionary that this object wraps. + * @deprecated use {@link #getCOSObject() } */ + @Deprecated public COSDictionary getCOSDictionary() { return dictionary; } + /** + * This will get the dictionary associated with this encryption dictionary. + * + * @return The COS dictionary that this object wraps. + */ + @Override + public COSDictionary getCOSObject() + { + return dictionary; + } + /** * Sets the filter entry of the encryption dictionary. * @@ -180,8 +194,8 @@ public void setSubFilter(String subfilter) } /** - * This will set the V entry of the encryption dictionary.

- * See PDF Reference 1.4 Table 3.13.

+ * This will set the V entry of the encryption dictionary.

+ * See PDF Reference 1.4 Table 3.13.

* Note: This value is used to decrypt the pdf document. If you change this when * the document is encrypted then decryption will fail!. * @@ -193,7 +207,7 @@ public void setVersion(int version) } /** - * This will return the V entry of the encryption dictionary.

+ * This will return the V entry of the encryption dictionary.

* See PDF Reference 1.4 Table 3.13. * * @return The encryption version to use. @@ -214,7 +228,7 @@ public void setLength(int length) } /** - * This will return the Length entry of the encryption dictionary.

+ * This will return the Length entry of the encryption dictionary.

* The length in bits for the encryption algorithm. This will return a multiple of 8. * * @return The length in bits for the encryption algorithm @@ -225,11 +239,13 @@ public int getLength() } /** - * This will set the R entry of the encryption dictionary.

- * See PDF Reference 1.4 Table 3.14.

+ * This will set the R entry of the encryption dictionary.
+ *
+ * See PDF Reference 1.4 Table 3.14.
+ *
* - * Note: This value is used to decrypt the pdf document. If you change this when - * the document is encrypted then decryption will fail!. + * Note: This value is used to decrypt the pdf document. If you change this when the document is encrypted then + * decryption will fail!. * * @param revision The new encryption version. */ @@ -239,7 +255,7 @@ public void setRevision(int revision) } /** - * This will return the R entry of the encryption dictionary.

+ * This will return the R entry of the encryption dictionary.

* See PDF Reference 1.4 Table 3.14. * * @return The encryption revision to use. @@ -424,6 +440,7 @@ public void setRecipients(byte[][] recipients) throws IOException array.add(recip); } dictionary.setItem(COSName.RECIPIENTS, array); + array.setDirect(true); } /** @@ -460,6 +477,16 @@ public PDCryptFilterDictionary getStdCryptFilterDictionary() return getCryptFilterDictionary(COSName.STD_CF); } + /** + * Returns the default crypt filter (for public-key security handler). + * + * @return the default crypt filter if available. + */ + public PDCryptFilterDictionary getDefaultCryptFilterDictionary() + { + return getCryptFilterDictionary(COSName.DEFAULT_CRYPT_FILTER); + } + /** * Returns the crypt filter with the given name. * @@ -467,15 +494,16 @@ public PDCryptFilterDictionary getStdCryptFilterDictionary() * * @return the crypt filter with the given name if available */ - public PDCryptFilterDictionary getCryptFilterDictionary(COSName cryptFilterName) + public PDCryptFilterDictionary getCryptFilterDictionary(COSName cryptFilterName) { - COSDictionary cryptFilterDictionary = (COSDictionary) dictionary.getDictionaryObject( COSName.CF ); - if (cryptFilterDictionary != null) + // See CF in "Table 20 – Entries common to all encryption dictionaries" + COSBase base = dictionary.getDictionaryObject(COSName.CF); + if (base instanceof COSDictionary) { - COSDictionary stdCryptFilterDictionary = (COSDictionary)cryptFilterDictionary.getDictionaryObject(cryptFilterName); - if (stdCryptFilterDictionary != null) + COSBase base2 = ((COSDictionary) base).getDictionaryObject(cryptFilterName); + if (base2 instanceof COSDictionary) { - return new PDCryptFilterDictionary(stdCryptFilterDictionary); + return new PDCryptFilterDictionary((COSDictionary) base2); } } return null; @@ -489,14 +517,14 @@ public PDCryptFilterDictionary getCryptFilterDictionary(COSName cryptFilterName) */ public void setCryptFilterDictionary(COSName cryptFilterName, PDCryptFilterDictionary cryptFilterDictionary) { - COSDictionary cfDictionary = (COSDictionary)dictionary.getDictionaryObject( COSName.CF ); + COSDictionary cfDictionary = dictionary.getCOSDictionary(COSName.CF); if (cfDictionary == null) { cfDictionary = new COSDictionary(); dictionary.setItem(COSName.CF, cfDictionary); } - - cfDictionary.setItem(cryptFilterName, cryptFilterDictionary.getCOSDictionary()); + cfDictionary.setDirect(true); // PDFBOX-4436 direct obj needed for Adobe Reader on Android + cfDictionary.setItem(cryptFilterName, cryptFilterDictionary.getCOSObject()); } /** @@ -506,9 +534,21 @@ public void setCryptFilterDictionary(COSName cryptFilterName, PDCryptFilterDicti */ public void setStdCryptFilterDictionary(PDCryptFilterDictionary cryptFilterDictionary) { + cryptFilterDictionary.getCOSObject().setDirect(true); // PDFBOX-4436 setCryptFilterDictionary(COSName.STD_CF, cryptFilterDictionary); } - + + /** + * Sets the default crypt filter (for public-key security handler). + * + * @param defaultFilterDictionary the standard crypt filter to set + */ + public void setDefaultCryptFilterDictionary(PDCryptFilterDictionary defaultFilterDictionary) + { + defaultFilterDictionary.getCOSObject().setDirect(true); // PDFBOX-4436 + setCryptFilterDictionary(COSName.DEFAULT_CRYPT_FILTER, defaultFilterDictionary); + } + /** * Returns the name of the filter which is used for de/encrypting streams. * Default value is "Identity". diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/ProtectionPolicy.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/ProtectionPolicy.java index a4f4d82a7ed..a886f15ed0c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/ProtectionPolicy.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/ProtectionPolicy.java @@ -33,6 +33,7 @@ public abstract class ProtectionPolicy private static final int DEFAULT_KEY_LENGTH = 40; private int encryptionKeyLength = DEFAULT_KEY_LENGTH; + private boolean preferAES = false; /** * set the length in (bits) of the secret key that will be @@ -61,4 +62,28 @@ public int getEncryptionKeyLength() { return encryptionKeyLength; } + + /** + * Tell whether AES encryption is preferred when several encryption methods are available for + * the chosen key length. The default is false. This setting is only relevant if the key length + * is 128 bits. + * + * @return true if AES encryption is preferred + */ + public boolean isPreferAES() + { + return this.preferAES; + } + + /** + * Set whether AES encryption is preferred when several encryption methods are available for the + * chosen key length. The default is false. This setting is only relevant if the key length is + * 128 bits. + * + * @param preferAES + */ + public void setPreferAES(boolean preferAES) + { + this.preferAES = preferAES; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PublicKeySecurityHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PublicKeySecurityHandler.java index 2fdf56aeb03..33badbca48f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PublicKeySecurityHandler.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PublicKeySecurityHandler.java @@ -25,12 +25,12 @@ import java.security.GeneralSecurityException; import java.security.InvalidKeyException; import java.security.KeyStoreException; -import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.security.PrivateKey; import java.security.SecureRandom; import java.security.cert.CertificateEncodingException; import java.security.cert.X509Certificate; +import java.util.Collection; import java.util.Iterator; import javax.crypto.BadPaddingException; @@ -41,14 +41,15 @@ import javax.crypto.SecretKey; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdmodel.PDDocument; +import org.bouncycastle.asn1.ASN1Encoding; import org.bouncycastle.asn1.ASN1InputStream; import org.bouncycastle.asn1.ASN1ObjectIdentifier; import org.bouncycastle.asn1.ASN1Primitive; import org.bouncycastle.asn1.ASN1Set; import org.bouncycastle.asn1.DEROctetString; -import org.bouncycastle.asn1.DEROutputStream; import org.bouncycastle.asn1.DERSet; import org.bouncycastle.asn1.cms.ContentInfo; import org.bouncycastle.asn1.cms.EncryptedContentInfo; @@ -59,7 +60,7 @@ import org.bouncycastle.asn1.cms.RecipientInfo; import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers; import org.bouncycastle.asn1.x509.AlgorithmIdentifier; -import org.bouncycastle.asn1.x509.TBSCertificateStructure; +import org.bouncycastle.asn1.x509.TBSCertificate; import org.bouncycastle.cert.X509CertificateHolder; import org.bouncycastle.cms.CMSEnvelopedData; import org.bouncycastle.cms.CMSException; @@ -79,10 +80,9 @@ public final class PublicKeySecurityHandler extends SecurityHandler /** The filter name. */ public static final String FILTER = "Adobe.PubSec"; - private static final String SUBFILTER = "adbe.pkcs7.s4"; + private static final String SUBFILTER4 = "adbe.pkcs7.s4"; + private static final String SUBFILTER5 = "adbe.pkcs7.s5"; - private PublicKeyProtectionPolicy policy = null; - /** * Constructor. */ @@ -93,12 +93,12 @@ public PublicKeySecurityHandler() /** * Constructor used for encryption. * - * @param p The protection policy. + * @param publicKeyProtectionPolicy The protection policy. */ - public PublicKeySecurityHandler(PublicKeyProtectionPolicy p) + public PublicKeySecurityHandler(PublicKeyProtectionPolicy publicKeyProtectionPolicy) { - policy = p; - this.keyLength = policy.getEncryptionKeyLength(); + setProtectionPolicy(publicKeyProtectionPolicy); + setKeyLength(publicKeyProtectionPolicy.getEncryptionKeyLength()); } /** @@ -123,13 +123,19 @@ public void prepareForDecryption(PDEncryption encryption, COSArray documentIDArr if (!(decryptionMaterial instanceof PublicKeyDecryptionMaterial)) { throw new IOException( - "Provided decryption material is not compatible with the document"); + "Provided decryption material is not compatible with the document - " + + "did you pass a null keyStore?"); } setDecryptMetadata(encryption.isEncryptMetaData()); - if (encryption.getLength() != 0) + PDCryptFilterDictionary defaultCryptFilterDictionary = encryption.getDefaultCryptFilterDictionary(); + if (defaultCryptFilterDictionary != null && defaultCryptFilterDictionary.getLength() != 0) + { + setKeyLength(defaultCryptFilterDictionary.getLength()); + } + else if (encryption.getLength() != 0) { - this.keyLength = encryption.getLength(); + setKeyLength(encryption.getLength()); } PublicKeyDecryptionMaterial material = (PublicKeyDecryptionMaterial) decryptionMaterial; @@ -138,40 +144,52 @@ public void prepareForDecryption(PDEncryption encryption, COSArray documentIDArr { boolean foundRecipient = false; + X509Certificate certificate = material.getCertificate(); + X509CertificateHolder materialCert = null; + if (certificate != null) + { + materialCert = new X509CertificateHolder(certificate.getEncoded()); + } + // the decrypted content of the enveloped data that match // the certificate in the decryption material provided byte[] envelopedData = null; // the bytes of each recipient in the recipients array - byte[][] recipientFieldsBytes = new byte[encryption.getRecipientsLength()][]; + COSArray array = encryption.getCOSObject().getCOSArray(COSName.RECIPIENTS); + if (array == null && defaultCryptFilterDictionary != null) + { + array = defaultCryptFilterDictionary.getCOSObject().getCOSArray(COSName.RECIPIENTS); + } + if (array == null) + { + throw new IOException("/Recipients entry is missing in encryption dictionary"); + } + byte[][] recipientFieldsBytes = new byte[array.size()][]; + //TODO encryption.getRecipientsLength() and getRecipientStringAt() should be deprecated int recipientFieldsLength = 0; - int i = 0; StringBuilder extraInfo = new StringBuilder(); - for (; i < encryption.getRecipientsLength(); i++) + for (int i = 0; i < array.size(); i++) { - COSString recipientFieldString = encryption.getRecipientStringAt(i); + COSString recipientFieldString = (COSString) array.getObject(i); byte[] recipientBytes = recipientFieldString.getBytes(); CMSEnvelopedData data = new CMSEnvelopedData(recipientBytes); - Iterator recipCertificatesIt = data.getRecipientInfos().getRecipients().iterator(); + Collection recipCertificatesIt = data.getRecipientInfos() + .getRecipients(); int j = 0; - while (recipCertificatesIt.hasNext()) + for (RecipientInformation ri : recipCertificatesIt) { - RecipientInformation ri = (RecipientInformation) recipCertificatesIt.next(); // Impl: if a matching certificate was previously found it is an error, // here we just don't care about it - X509Certificate certificate = material.getCertificate(); - X509CertificateHolder materialCert = null; - if (null != certificate) - { - materialCert = new X509CertificateHolder(certificate.getEncoded()); - } RecipientId rid = ri.getRID(); - if (rid.match(materialCert) && !foundRecipient) + if (!foundRecipient && rid.match(materialCert)) { foundRecipient = true; PrivateKey privateKey = (PrivateKey) material.getPrivateKey(); - envelopedData = ri.getContent(new JceKeyTransEnvelopedRecipient(privateKey).setProvider("BC")); + // might need to call setContentProvider() if we use PKI token, see + // http://bouncy-castle.1462172.n4.nabble.com/CMSException-exception-unwrapping-key-key-invalid-unknown-key-type-passed-to-RSA-td4658109.html + envelopedData = ri.getContent(new JceKeyTransEnvelopedRecipient(privateKey)); break; } j++; @@ -191,7 +209,7 @@ public void prepareForDecryption(PDEncryption encryption, COSArray documentIDArr } if (!foundRecipient || envelopedData == null) { - throw new IOException("The certificate matches none of " + i + throw new IOException("The certificate matches none of " + array.size() + " recipient entries" + extraInfo.toString()); } if (envelopedData.length != 24) @@ -224,12 +242,36 @@ public void prepareForDecryption(PDEncryption encryption, COSArray documentIDArr sha1InputOffset += recipientFieldsByte.length; } - MessageDigest md = MessageDigests.getSHA1(); - byte[] mdResult = md.digest(sha1Input); + byte[] mdResult; + if (encryption.getVersion() == 4 || encryption.getVersion() == 5) + { + if (encryption.getVersion() == 4) + { + mdResult = MessageDigests.getSHA1().digest(sha1Input); + } + else + { + mdResult = MessageDigests.getSHA256().digest(sha1Input); + } + + // detect whether AES encryption is used. This assumes that the encryption algo is + // stored in the PDCryptFilterDictionary + // However, crypt filters are used only when V is 4 or 5. + if (defaultCryptFilterDictionary != null) + { + COSName cryptFilterMethod = defaultCryptFilterDictionary.getCryptFilterMethod(); + setAES(COSName.AESV2.equals(cryptFilterMethod) || + COSName.AESV3.equals(cryptFilterMethod)); + } + } + else + { + mdResult = MessageDigests.getSHA1().digest(sha1Input); + } // we have the encryption key ... - encryptionKey = new byte[this.keyLength / 8]; - System.arraycopy(mdResult, 0, encryptionKey, 0, this.keyLength / 8); + setEncryptionKey(new byte[getKeyLength() / 8]); + System.arraycopy(mdResult, 0, getEncryptionKey(), 0, getKeyLength() / 8); } catch (CMSException e) { @@ -279,10 +321,6 @@ private void appendCertInfo(StringBuilder extraInfo, KeyTransRecipientId ktRid, @Override public void prepareDocumentForEncryption(PDDocument doc) throws IOException { - if (keyLength == 256) - { - throw new IOException("256 bit key length is not supported yet for public key security"); - } try { PDEncryption dictionary = doc.getEncryption(); @@ -292,16 +330,14 @@ public void prepareDocumentForEncryption(PDDocument doc) throws IOException } dictionary.setFilter(FILTER); - dictionary.setLength(this.keyLength); - dictionary.setVersion(2); - + dictionary.setLength(getKeyLength()); + int version = computeVersionNumber(); + dictionary.setVersion(version); + // remove CF, StmF, and StrF entries that may be left from a previous encryption dictionary.removeV45filters(); - - dictionary.setSubFilter(SUBFILTER); // create the 20 bytes seed - byte[] seed = new byte[20]; KeyGenerator key; @@ -317,42 +353,56 @@ public void prepareDocumentForEncryption(PDDocument doc) throws IOException key.init(192, new SecureRandom()); SecretKey sk = key.generateKey(); - System.arraycopy(sk.getEncoded(), 0, seed, 0, 20); // create the 20 bytes seed - - byte[][] recipientsField = computeRecipientsField(seed); - dictionary.setRecipients(recipientsField); - int sha1InputLength = seed.length; + // create the 20 bytes seed + System.arraycopy(sk.getEncoded(), 0, seed, 0, 20); + + byte[][] recipientsFields = computeRecipientsField(seed); + + int shaInputLength = seed.length; - for(int j=0; j it = policy.getRecipientsIterator(); + PublicKeyProtectionPolicy protectionPolicy = (PublicKeyProtectionPolicy) getProtectionPolicy(); + byte[][] recipientsField = new byte[protectionPolicy.getNumberOfRecipients()][]; + Iterator it = protectionPolicy.getRecipientsIterator(); int i = 0; while(it.hasNext()) @@ -384,17 +454,13 @@ private byte[][] computeRecipientsField(byte[] seed) throws GeneralSecurityExcep pkcs7input[21] = three; pkcs7input[22] = two; pkcs7input[23] = one; - + ASN1Primitive obj = createDERForRecipient(pkcs7input, certificate); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - - DEROutputStream k = new DEROutputStream(baos); - - k.writeObject(obj); - + obj.encodeTo(baos, ASN1Encoding.DER); + recipientsField[i] = baos.toByteArray(); - + i++; } return recipientsField; @@ -403,20 +469,21 @@ private byte[][] computeRecipientsField(byte[] seed) throws GeneralSecurityExcep private ASN1Primitive createDERForRecipient(byte[] in, X509Certificate cert) throws IOException, GeneralSecurityException { - String algorithm = "1.2.840.113549.3.2"; + String algorithm = PKCSObjectIdentifiers.RC2_CBC.getId(); AlgorithmParameterGenerator apg; KeyGenerator keygen; Cipher cipher; try { - apg = AlgorithmParameterGenerator.getInstance(algorithm); - keygen = KeyGenerator.getInstance(algorithm); - cipher = Cipher.getInstance(algorithm); + apg = AlgorithmParameterGenerator.getInstance(algorithm, SecurityProvider.getProvider()); + keygen = KeyGenerator.getInstance(algorithm, SecurityProvider.getProvider()); + cipher = Cipher.getInstance(algorithm, SecurityProvider.getProvider()); } catch (NoSuchAlgorithmException e) { - // should never happen, if this happens throw IOException instead - throw new RuntimeException("Could not find a suitable javax.crypto provider", e); + // happens when using the command line app .jar file + throw new IOException("Could not find a suitable javax.crypto provider for algorithm " + + algorithm + "; possible reason: using an unsigned .jar file", e); } catch (NoSuchPaddingException e) { @@ -453,7 +520,7 @@ private KeyTransRecipientInfo computeRecipientInfo(X509Certificate x509certifica BadPaddingException, IllegalBlockSizeException { ASN1InputStream input = new ASN1InputStream(x509certificate.getTBSCertificate()); - TBSCertificateStructure certificate = TBSCertificateStructure.getInstance(input.readObject()); + TBSCertificate certificate = TBSCertificate.getInstance(input.readObject()); input.close(); AlgorithmIdentifier algorithmId = certificate.getSubjectPublicKeyInfo().getAlgorithm(); @@ -465,7 +532,8 @@ private KeyTransRecipientInfo computeRecipientInfo(X509Certificate x509certifica Cipher cipher; try { - cipher = Cipher.getInstance(algorithmId.getAlgorithm().getId()); + cipher = Cipher.getInstance(algorithmId.getAlgorithm().getId(), + SecurityProvider.getProvider()); } catch (NoSuchAlgorithmException e) { @@ -484,13 +552,4 @@ private KeyTransRecipientInfo computeRecipientInfo(X509Certificate x509certifica RecipientIdentifier recipientId = new RecipientIdentifier(serial); return new KeyTransRecipientInfo(recipientId, algorithmId, octets); } - - /** - * {@inheritDoc} - */ - @Override - public boolean hasProtectionPolicy() - { - return policy != null; - } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/RC4Cipher.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/RC4Cipher.java index 6d4c310df79..a1c077caf1a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/RC4Cipher.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/RC4Cipher.java @@ -70,7 +70,7 @@ public void setKey( byte[] key ) } /** - * Thie will ensure that the value for a byte >=0. + * This will ensure that the value for a byte >=0. * * @param aByte The byte to test against. * @@ -122,9 +122,9 @@ public void write( byte aByte, OutputStream output ) throws IOException */ public void write( byte[] data, OutputStream output ) throws IOException { - for( int i = 0; i < data.length; i++ ) + for (byte aData : data) { - write( data[i], output ); + write(aData, output); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SaslPrep.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SaslPrep.java new file mode 100644 index 00000000000..72fa7119166 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SaslPrep.java @@ -0,0 +1,344 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.encryption; + +import java.nio.CharBuffer; +import java.text.Normalizer; + +/** + * Copied from https://github.com/tombentley/saslprep/blob/master/src/main/java/SaslPrep.java on + * 30.5.2019, commit 2e30daa. + * + * @author Tom Bentley + */ +class SaslPrep +{ + + private SaslPrep() + { + } + + /** + * Return the {@code SASLPrep}-canonicalised version of the given {@code str} for use as a query + * string. This implements the {@code SASLPrep} algorithm defined in + * RFC 4013. + * + * @param str The string to canonicalise. + * @return The canonicalised string. + * @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the + * requirements for bidirectional character handling. + * @see RFC 3454, Section 7 for + * discussion of what a query string is. + */ + static String saslPrepQuery(String str) + { + return saslPrep(str, true); + } + + /** + * Return the {@code SASLPrep}-canonicalised version of the given + * @code str} for use as a stored string. This implements the {@code SASLPrep} algorithm defined + * in + * RFC 4013. + * + * @param str The string to canonicalise. + * @return The canonicalised string. + * @throws IllegalArgumentException if the string contained prohibited codepoints, or broke the + * requirements for bidirectional character handling. + * @see RFC 3454, Section 7 for + * discussion of what a stored string is. + */ + static String saslPrepStored(String str) + { + return saslPrep(str, false); + } + + private static String saslPrep(String str, boolean allowUnassigned) + { + char[] chars = str.toCharArray(); + + // 1. Map + // non-ASCII space chars mapped to space + for (int i = 0; i < str.length(); i++) + { + char ch = str.charAt(i); + if (nonAsciiSpace(ch)) + { + chars[i] = ' '; + } + } + + int length = 0; + for (int i = 0; i < str.length(); i++) + { + char ch = chars[i]; + if (!mappedToNothing(ch)) + { + chars[length++] = ch; + } + } + + // 2. Normalize + String normalized = Normalizer.normalize(CharBuffer.wrap(chars, 0, length), Normalizer.Form.NFKC); + + boolean containsRandALCat = false; + boolean containsLCat = false; + boolean initialRandALCat = false; + int i = 0; + while (i < normalized.length()) + { + final int codepoint = normalized.codePointAt(i); + // 3. Prohibit + if (prohibited(codepoint)) + { + throw new IllegalArgumentException("Prohibited character " + + codepoint + " at position " + i); + } + + // 4. Check bidi + final byte directionality = Character.getDirectionality(codepoint); + final boolean isRandALcat = directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT + || directionality == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC; + containsRandALCat |= isRandALcat; + containsLCat |= directionality == Character.DIRECTIONALITY_LEFT_TO_RIGHT; + + initialRandALCat |= i == 0 && isRandALcat; + if (!allowUnassigned && !Character.isDefined(codepoint)) + { + throw new IllegalArgumentException("Character at position " + i + " is unassigned"); + } + + i += Character.charCount(codepoint); + + if (initialRandALCat && i >= normalized.length() && !isRandALcat) + { + throw new IllegalArgumentException("First character is RandALCat, but last character is not"); + } + } + if (containsRandALCat && containsLCat) + { + throw new IllegalArgumentException("Contains both RandALCat characters and LCat characters"); + } + return normalized; + } + + /** + * Return true if the given {@code codepoint} is a prohibited character + * as defined by + * RFC 4013, + * Section 2.3. + */ + static boolean prohibited(int codepoint) + { + return nonAsciiSpace((char)codepoint) + || asciiControl((char)codepoint) + || nonAsciiControl(codepoint) + || privateUse(codepoint) + || nonCharacterCodePoint(codepoint) + || surrogateCodePoint(codepoint) + || inappropriateForPlainText(codepoint) + || inappropriateForCanonical(codepoint) + || changeDisplayProperties(codepoint) + || tagging(codepoint); + } + + /** + * Return true if the given {@code codepoint} is a tagging character + * as defined by + * RFC 3454, + * Appendix C.9. + */ + private static boolean tagging(int codepoint) + { + return codepoint == 0xE0001 + || 0xE0020 <= codepoint && codepoint <= 0xE007F; + } + + /** + * Return true if the given {@code codepoint} is change display properties + * or deprecated characters as defined by + * RFC 3454, + * Appendix C.8. + */ + private static boolean changeDisplayProperties(int codepoint) + { + return codepoint == 0x0340 + || codepoint == 0x0341 + || codepoint == 0x200E + || codepoint == 0x200F + || codepoint == 0x202A + || codepoint == 0x202B + || codepoint == 0x202C + || codepoint == 0x202D + || codepoint == 0x202E + || codepoint == 0x206A + || codepoint == 0x206B + || codepoint == 0x206C + || codepoint == 0x206D + || codepoint == 0x206E + || codepoint == 0x206F + ; + } + + /** + * Return true if the given {@code codepoint} is inappropriate for + * canonical representation characters as defined by + * RFC 3454, + * Appendix C.7. + */ + private static boolean inappropriateForCanonical(int codepoint) + { + return 0x2FF0 <= codepoint && codepoint <= 0x2FFB; + } + + /** + * Return true if the given {@code codepoint} is inappropriate for plain + * text characters as defined by + * RFC 3454, + * Appendix C.6. + */ + private static boolean inappropriateForPlainText(int codepoint) + { + return codepoint == 0xFFF9 + || codepoint == 0xFFFA + || codepoint == 0xFFFB + || codepoint == 0xFFFC + || codepoint == 0xFFFD + ; + } + + /** + * Return true if the given {@code codepoint} is a surrogate + * code point as defined by + * RFC 3454, + * Appendix C.5. + */ + private static boolean surrogateCodePoint(int codepoint) + { + return 0xD800 <= codepoint && codepoint <= 0xDFFF; + } + + /** + * Return true if the given {@code codepoint} is a non-character + * code point as defined by + * RFC 3454, + * Appendix C.4. + */ + private static boolean nonCharacterCodePoint(int codepoint) + { + return 0xFDD0 <= codepoint && codepoint <= 0xFDEF + || 0xFFFE <= codepoint && codepoint <= 0xFFFF + || 0x1FFFE <= codepoint && codepoint <= 0x1FFFF + || 0x2FFFE <= codepoint && codepoint <= 0x2FFFF + || 0x3FFFE <= codepoint && codepoint <= 0x3FFFF + || 0x4FFFE <= codepoint && codepoint <= 0x4FFFF + || 0x5FFFE <= codepoint && codepoint <= 0x5FFFF + || 0x6FFFE <= codepoint && codepoint <= 0x6FFFF + || 0x7FFFE <= codepoint && codepoint <= 0x7FFFF + || 0x8FFFE <= codepoint && codepoint <= 0x8FFFF + || 0x9FFFE <= codepoint && codepoint <= 0x9FFFF + || 0xAFFFE <= codepoint && codepoint <= 0xAFFFF + || 0xBFFFE <= codepoint && codepoint <= 0xBFFFF + || 0xCFFFE <= codepoint && codepoint <= 0xCFFFF + || 0xDFFFE <= codepoint && codepoint <= 0xDFFFF + || 0xEFFFE <= codepoint && codepoint <= 0xEFFFF + || 0xFFFFE <= codepoint && codepoint <= 0xFFFFF + || 0x10FFFE <= codepoint && codepoint <= 0x10FFFF + ; + } + + /** + * Return true if the given {@code codepoint} is a private use character + * as defined by RFC 3454, + * Appendix C.3. + */ + private static boolean privateUse(int codepoint) + { + return 0xE000 <= codepoint && codepoint <= 0xF8FF + || 0xF0000 <= codepoint && codepoint <= 0xFFFFD + || 0x100000 <= codepoint && codepoint <= 0x10FFFD; + } + + /** + * Return true if the given {@code ch} is a non-ASCII control character + * as defined by RFC 3454, + * Appendix C.2.2. + */ + private static boolean nonAsciiControl(int codepoint) + { + return 0x0080 <= codepoint && codepoint <= 0x009F + || codepoint == 0x06DD + || codepoint == 0x070F + || codepoint == 0x180E + || codepoint == 0x200C + || codepoint == 0x200D + || codepoint == 0x2028 + || codepoint == 0x2029 + || codepoint == 0x2060 + || codepoint == 0x2061 + || codepoint == 0x2062 + || codepoint == 0x2063 + || 0x206A <= codepoint && codepoint <= 0x206F + || codepoint == 0xFEFF + || 0xFFF9 <= codepoint && codepoint <= 0xFFFC + || 0x1D173 <= codepoint && codepoint <= 0x1D17A; + } + + /** + * Return true if the given {@code ch} is an ASCII control character + * as defined by RFC 3454, + * Appendix C.2.1. + */ + private static boolean asciiControl(char ch) + { + return '\u0000' <= ch && ch <= '\u001F' || ch == '\u007F'; + } + + /** + * Return true if the given {@code ch} is a non-ASCII space character + * as defined by RFC 3454, + * Appendix C.1.2. + */ + private static boolean nonAsciiSpace(char ch) + { + return ch == '\u00A0' + || ch == '\u1680' + || '\u2000' <= ch && ch <= '\u200B' + || ch == '\u202F' + || ch == '\u205F' + || ch == '\u3000'; + } + + /** + * Return true if the given {@code ch} is a "commonly mapped to nothing" character + * as defined by RFC 3454, + * Appendix B.1. + */ + private static boolean mappedToNothing(char ch) + { + return ch == '\u00AD' + || ch == '\u034F' + || ch == '\u1806' + || ch == '\u180B' + || ch == '\u180C' + || ch == '\u180D' + || ch == '\u200B' + || ch == '\u200C' + || ch == '\u200D' + || ch == '\u2060' + || '\uFE00' <= ch && ch <= '\uFE0F' + || ch == '\uFEFF'; + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java index 3e8002dfd9b..b89d917f250 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java @@ -23,22 +23,17 @@ import java.io.InputStream; import java.io.OutputStream; import java.security.GeneralSecurityException; -import java.security.InvalidAlgorithmParameterException; -import java.security.InvalidKeyException; +import java.security.Key; import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import java.security.SecureRandom; import java.util.Arrays; -import java.util.HashSet; +import java.util.Collections; +import java.util.IdentityHashMap; import java.util.Map; import java.util.Set; -import javax.crypto.BadPaddingException; import javax.crypto.Cipher; import javax.crypto.CipherInputStream; -import javax.crypto.IllegalBlockSizeException; -import javax.crypto.NoSuchPaddingException; -import javax.crypto.SecretKey; import javax.crypto.spec.IvParameterSpec; import javax.crypto.spec.SecretKeySpec; @@ -71,10 +66,12 @@ public abstract class SecurityHandler // see 7.6.2, page 58, PDF 32000-1:2008 private static final byte[] AES_SALT = { (byte) 0x73, (byte) 0x41, (byte) 0x6c, (byte) 0x54 }; - /** The length in bits of the secret key used to encrypt the document. */ + /** + * The length in bits of the secret key used to encrypt the document. Will become private in 3.0. + */ protected int keyLength = DEFAULT_KEY_LENGTH; - /** The encryption key that will used to encrypt / decrypt.*/ + /** The encryption key that will used to encrypt / decrypt. Will become private in 3.0. */ protected byte[] encryptionKey; /** The RC4 implementation used for cryptographic functions. */ @@ -83,10 +80,21 @@ public abstract class SecurityHandler /** indicates if the Metadata have to be decrypted of not. */ private boolean decryptMetadata; - private final Set objects = new HashSet(); + /** Can be used to allow stateless AES encryption */ + private SecureRandom customSecureRandom; + + // PDFBOX-4453, PDFBOX-4477: Originally this was just a Set. This failed in rare cases + // when a decrypted string was identical to an encrypted string. + // Because COSString.equals() checks the contents, decryption was then skipped. + // This solution keeps all different "equal" objects. + // IdentityHashMap solves this problem and is also faster than a HashMap + private final Set objects = + Collections.newSetFromMap(new IdentityHashMap()); private boolean useAES; + private ProtectionPolicy protectionPolicy = null; + /** * The access permission granted to the current user for the document. These * permissions are computed during decryption and are in read only mode. @@ -94,7 +102,17 @@ public abstract class SecurityHandler private AccessPermission currentAccessPermission = null; /** - * Set wether to decrypt meta data. + * The stream filter name. + */ + private COSName streamFilterName; + + /** + * The string filter name. + */ + private COSName stringFilterName; + + /** + * Set whether to decrypt meta data. * * @param decryptMetadata true if meta data has to be decrypted. */ @@ -102,6 +120,36 @@ protected void setDecryptMetadata(boolean decryptMetadata) { this.decryptMetadata = decryptMetadata; } + + /** + * Set the string filter name. + * + * @param stringFilterName the string filter name. + */ + protected void setStringFilterName(COSName stringFilterName) + { + this.stringFilterName = stringFilterName; + } + + /** + * Set the stream filter name. + * + * @param streamFilterName the stream filter name. + */ + protected void setStreamFilterName(COSName streamFilterName) + { + this.streamFilterName = streamFilterName; + } + + /** + * Set the custom SecureRandom. + * + * @param customSecureRandom the custom SecureRandom for AES encryption + */ + public void setCustomSecureRandom(SecureRandom customSecureRandom) + { + this.customSecureRandom = customSecureRandom; + } /** * Prepare the document for encryption. @@ -119,6 +167,7 @@ protected void setDecryptMetadata(boolean decryptMetadata) * @param documentIDArray document id which is returned via {@link org.apache.pdfbox.cos.COSDocument#getDocumentID()} * @param decryptionMaterial Information used to decrypt the document. * + * @throws InvalidPasswordException If the password is incorrect. * @throws IOException If there is an error accessing data. */ public abstract void prepareForDecryption(PDEncryption encryption, COSArray documentIDArray, @@ -229,7 +278,7 @@ protected void encryptDataRC4(byte[] finalKey, byte[] input, OutputStream output /** * Encrypt or decrypt data with AES with key length other than 256 bits. * - * @param finalKey The final key obtained with via {@link #calcFinalKey()}. + * @param finalKey The final key obtained with via {@link #calcFinalKey(long, long)}. * @param data The data to encrypt. * @param output The output to write the encrypted data to. * @param decrypt true to decrypt the data, false to encrypt it. @@ -248,45 +297,20 @@ private void encryptDataAESother(byte[] finalKey, InputStream data, OutputStream try { - Cipher decryptCipher; - try - { - decryptCipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); - } - catch (NoSuchAlgorithmException e) - { - // should never happen - throw new RuntimeException(e); - } - - SecretKey aesKey = new SecretKeySpec(finalKey, "AES"); - IvParameterSpec ips = new IvParameterSpec(iv); - decryptCipher.init(decrypt ? Cipher.DECRYPT_MODE : Cipher.ENCRYPT_MODE, aesKey, ips); + Cipher decryptCipher = createCipher(finalKey, iv, decrypt); byte[] buffer = new byte[256]; int n; while ((n = data.read(buffer)) != -1) { - output.write(decryptCipher.update(buffer, 0, n)); + byte[] dst = decryptCipher.update(buffer, 0, n); + if (dst != null) + { + output.write(dst); + } } output.write(decryptCipher.doFinal()); } - catch (InvalidKeyException e) - { - throw new IOException(e); - } - catch (InvalidAlgorithmParameterException e) - { - throw new IOException(e); - } - catch (NoSuchPaddingException e) - { - throw new IOException(e); - } - catch (IllegalBlockSizeException e) - { - throw new IOException(e); - } - catch (BadPaddingException e) + catch (GeneralSecurityException e) { throw new IOException(e); } @@ -313,10 +337,7 @@ private void encryptDataAES256(InputStream data, OutputStream output, boolean de Cipher cipher; try { - cipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); - SecretKeySpec keySpec = new SecretKeySpec(encryptionKey, "AES"); - IvParameterSpec ivSpec = new IvParameterSpec(iv); - cipher.init(decrypt ? Cipher.DECRYPT_MODE : Cipher.ENCRYPT_MODE, keySpec, ivSpec); + cipher = createCipher(this.encryptionKey, iv, decrypt); } catch (GeneralSecurityException e) { @@ -328,7 +349,7 @@ private void encryptDataAES256(InputStream data, OutputStream output, boolean de { IOUtils.copy(cis, output); } - catch(IOException exception) + catch (IOException exception) { // starting with java 8 the JVM wraps an IOException around a GeneralSecurityException // it should be safe to swallow a GeneralSecurityException @@ -336,7 +357,7 @@ private void encryptDataAES256(InputStream data, OutputStream output, boolean de { throw exception; } - LOG.debug("A GeneralSecurityException occured when decrypting some stream data", exception); + LOG.debug("A GeneralSecurityException occurred when decrypting some stream data", exception); } finally { @@ -344,13 +365,23 @@ private void encryptDataAES256(InputStream data, OutputStream output, boolean de } } + private Cipher createCipher(byte[] key, byte[] iv, boolean decrypt) throws GeneralSecurityException + { + @SuppressWarnings({"squid:S4432"}) // PKCS#5 padding is requested by PDF specification + Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); + Key keySpec = new SecretKeySpec(key, "AES"); + IvParameterSpec ips = new IvParameterSpec(iv); + cipher.init(decrypt ? Cipher.DECRYPT_MODE : Cipher.ENCRYPT_MODE, keySpec, ips); + return cipher; + } + private boolean prepareAESInitializationVector(boolean decrypt, byte[] iv, InputStream data, OutputStream output) throws IOException { if (decrypt) { // read IV from stream - int ivSize = data.read(iv); - if (ivSize == -1) + int ivSize = (int) IOUtils.populateBuffer(data, iv); + if (ivSize == 0) { return false; } @@ -364,17 +395,31 @@ private boolean prepareAESInitializationVector(boolean decrypt, byte[] iv, Input else { // generate random IV and write to stream - SecureRandom rnd = new SecureRandom(); + SecureRandom rnd = getSecureRandom(); rnd.nextBytes(iv); output.write(iv); } return true; } + /** + * Returns a SecureRandom If customSecureRandom is not defined, instantiate a new SecureRandom + * + * @return SecureRandom + */ + private SecureRandom getSecureRandom() + { + if (customSecureRandom != null) + { + return customSecureRandom; + } + return new SecureRandom(); + } + /** * This will dispatch to the correct method. * - * @param obj The object to decrypt. + * @param obj The object to decrypt. * @param objNum The object number. * @param genNum The object generation Number. * @@ -382,26 +427,36 @@ private boolean prepareAESInitializationVector(boolean decrypt, byte[] iv, Input */ public void decrypt(COSBase obj, long objNum, long genNum) throws IOException { - if (!objects.contains(obj)) + if (!(obj instanceof COSString || obj instanceof COSDictionary || obj instanceof COSArray)) { - objects.add(obj); - - if (obj instanceof COSString) - { - decryptString((COSString) obj, objNum, genNum); - } - else if (obj instanceof COSStream) - { - decryptStream((COSStream) obj, objNum, genNum); - } - else if (obj instanceof COSDictionary) + return; + } + // PDFBOX-4477: only cache strings and streams, this improves speed and memory footprint + if (obj instanceof COSString) + { + if (objects.contains(obj)) { - decryptDictionary((COSDictionary) obj, objNum, genNum); + return; } - else if (obj instanceof COSArray) + objects.add(obj); + decryptString((COSString) obj, objNum, genNum); + } + else if (obj instanceof COSStream) + { + if (objects.contains(obj)) { - decryptArray((COSArray) obj, objNum, genNum); + return; } + objects.add(obj); + decryptStream((COSStream) obj, objNum, genNum); + } + else if (obj instanceof COSDictionary) + { + decryptDictionary((COSDictionary) obj, objNum, genNum); + } + else if (obj instanceof COSArray) + { + decryptArray((COSArray) obj, objNum, genNum); } } @@ -416,6 +471,12 @@ else if (obj instanceof COSArray) */ public void decryptStream(COSStream stream, long objNum, long genNum) throws IOException { + // Stream encrypted with identity filter + if (COSName.IDENTITY.equals(streamFilterName)) + { + return; + } + COSBase type = stream.getCOSName(COSName.TYPE); if (!decryptMetadata && COSName.METADATA.equals(type)) { @@ -431,7 +492,7 @@ public void decryptStream(COSStream stream, long objNum, long genNum) throws IOE // PDFBOX-3229 check case where metadata is not encrypted despite /EncryptMetadata missing InputStream is = stream.createRawInputStream(); byte buf[] = new byte[10]; - is.read(buf); + IOUtils.populateBuffer(is, buf); is.close(); if (Arrays.equals(buf, " entry : dictionary.entrySet()) { if (isSignature && COSName.CONTENTS.equals(entry.getKey())) @@ -525,6 +596,12 @@ private void decryptDictionary(COSDictionary dictionary, long objNum, long genNu */ private void decryptString(COSString string, long objNum, long genNum) throws IOException { + // String encrypted with identity filter + if (COSName.IDENTITY.equals(stringFilterName)) + { + return; + } + ByteArrayInputStream data = new ByteArrayInputStream(string.getBytes()); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); try @@ -552,7 +629,7 @@ public void encryptString(COSString string, long objNum, int genNum) throws IOEx { ByteArrayInputStream data = new ByteArrayInputStream(string.getBytes()); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - encryptData(objNum, genNum, data, buffer, false /* decrypt */); + encryptData(objNum, genNum, data, buffer, false /* encrypt */); string.setValue(buffer.toByteArray()); } @@ -639,5 +716,74 @@ public void setAES(boolean aesValue) * * @return true if a protection policy has been set. */ - public abstract boolean hasProtectionPolicy(); + public boolean hasProtectionPolicy() + { + return protectionPolicy != null; + } + + /** + * Returns the set {@link ProtectionPolicy} or null. + * + * @return The set {@link ProtectionPolicy}. + */ + protected ProtectionPolicy getProtectionPolicy() + { + return protectionPolicy; + } + + /** + * Sets the {@link ProtectionPolicy} to the given value. + * @param protectionPolicy The {@link ProtectionPolicy}, that shall be set. + */ + protected void setProtectionPolicy(ProtectionPolicy protectionPolicy) + { + this.protectionPolicy = protectionPolicy; + } + + /** + * Returns the current encryption key data. + * + * @return The current encryption key data. + */ + public byte[] getEncryptionKey() + { + return encryptionKey; + } + + /** + * Sets the current encryption key data. + * + * @param encryptionKey The encryption key data to set. + */ + public void setEncryptionKey(byte[] encryptionKey) + { + this.encryptionKey = encryptionKey; + } + + /** + * Computes the version number of the {@link SecurityHandler} based on the encryption key + * length. See PDF Spec 1.6 p 93 and + * PDF + * 1.7 Supplement ExtensionLevel: 3 and + * PDF + * Spec 2.0. + * + * @return The computed version number. + */ + protected int computeVersionNumber() + { + if (keyLength == 40) + { + return 1; + } + else if (keyLength == 128 && protectionPolicy.isPreferAES()) + { + return 4; + } + else if (keyLength == 256) + { + return 5; + } + return 2; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandlerFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandlerFactory.java index bb3d493c855..0a456123ae8 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandlerFactory.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandlerFactory.java @@ -23,8 +23,6 @@ import java.util.HashMap; import java.util.Map; -import org.bouncycastle.jce.provider.BouncyCastleProvider; - /** * Manages security handlers for the application. * It follows the singleton pattern. @@ -39,11 +37,6 @@ public final class SecurityHandlerFactory /** Singleton instance */ public static final SecurityHandlerFactory INSTANCE = new SecurityHandlerFactory(); - static - { - Security.addProvider(new BouncyCastleProvider()); - } - private final Map> nameToHandler = new HashMap>(); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityProvider.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityProvider.java new file mode 100644 index 00000000000..3ee2d2e9f10 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityProvider.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.encryption; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.security.Provider; + +/** + * Singleton which provides a security provider. + * + */ +public class SecurityProvider +{ + private static Provider provider = null; + + private SecurityProvider() + { + } + + /** + * Returns the provider to be used for advanced encrypting/decrypting. Default is the BouncyCastleProvider. + * + * @return the security provider + * + * @throws IOException if the default provider can't be instantiated + */ + public static Provider getProvider() throws IOException + { + // TODO synchronize access + if (provider == null) + { + try + { + Class providerClass = (Class) Class + .forName("org.bouncycastle.jce.provider.BouncyCastleProvider"); + provider = providerClass.getDeclaredConstructor().newInstance(); + } + catch (ClassNotFoundException ex) + { + throw new IOException(ex); + } + catch (InstantiationException ex) + { + throw new IOException(ex); + } + catch (IllegalAccessException ex) + { + throw new IOException(ex); + } + catch (NoSuchMethodException ex) + { + throw new IOException(ex); + } + catch (InvocationTargetException ex) + { + throw new IOException(ex); + } + } + return provider; + } + + /** + * Set the provider to be used for advanced encrypting/decrypting. + * + * @param provider the security provider + */ + public static void setProvider(Provider provider) + { + SecurityProvider.provider = provider; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardProtectionPolicy.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardProtectionPolicy.java index 7a687705b8f..31dd6147206 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardProtectionPolicy.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardProtectionPolicy.java @@ -36,15 +36,17 @@ public final class StandardProtectionPolicy extends ProtectionPolicy { private AccessPermission permissions; + @SuppressWarnings({"squid:S2068"}) private String ownerPassword = ""; + @SuppressWarnings({"squid:S2068"}) private String userPassword = ""; /** * Creates an new instance of the standard protection policy * in order to protect a PDF document with passwords. * - * @param ownerPassword The owner's password. - * @param userPassword The users's password. + * @param ownerPassword The owner password. + * @param userPassword The user password. * @param permissions The access permissions given to the user. */ public StandardProtectionPolicy(String ownerPassword, String userPassword, diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardSecurityHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardSecurityHandler.java index 4a1203f25c4..3129fb1e17c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardSecurityHandler.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardSecurityHandler.java @@ -74,10 +74,6 @@ public final class StandardSecurityHandler extends SecurityHandler // hashes used for Algorithm 2.B, depending on remainder from E modulo 3 private static final String[] HASHES_2B = new String[] {"SHA-256", "SHA-384", "SHA-512"}; - private static final int DEFAULT_VERSION = 1; - - private StandardProtectionPolicy policy; - /** * Constructor. */ @@ -88,34 +84,12 @@ public StandardSecurityHandler() /** * Constructor used for encryption. * - * @param p The protection policy. + * @param standardProtectionPolicy The protection policy. */ - public StandardSecurityHandler(StandardProtectionPolicy p) + public StandardSecurityHandler(StandardProtectionPolicy standardProtectionPolicy) { - policy = p; - keyLength = policy.getEncryptionKeyLength(); - } - - /** - * Computes the version number of the StandardSecurityHandler - * regarding the encryption key length. - * See PDF Spec 1.6 p 93 and PDF 1.7 AEL3 - * - * @return The computed version number. - */ - private int computeVersionNumber() - { - if(keyLength == 40) - { - return DEFAULT_VERSION; - } - //TODO return 4 if keyLength is 128 to enable AES128 functionality - else if(keyLength == 256) - { - return 5; - } - - return 2; + setProtectionPolicy(standardProtectionPolicy); + setKeyLength(standardProtectionPolicy.getEncryptionKeyLength()); } /** @@ -129,7 +103,9 @@ else if(keyLength == 256) */ private int computeRevisionNumber(int version) { - if(version < 2 && !policy.getPermissions().hasAnyRevision3PermissionSet()) + StandardProtectionPolicy protectionPolicy = (StandardProtectionPolicy) getProtectionPolicy(); + AccessPermission permissions = protectionPolicy.getPermissions(); + if (version < 2 && !permissions.hasAnyRevision3PermissionSet()) { return 2; } @@ -142,7 +118,7 @@ private int computeRevisionNumber(int version) { return 4; } - if ( version == 2 || version == 3 || policy.getPermissions().hasAnyRevision3PermissionSet()) + if (version == 2 || version == 3 || permissions.hasAnyRevision3PermissionSet()) { return 3; } @@ -158,6 +134,7 @@ private int computeRevisionNumber(int version) * @param documentIDArray document id * @param decryptionMaterial Information used to decrypt the document. * + * @throws InvalidPasswordException If the password is incorrect. * @throws IOException If there is an error accessing data. */ @Override @@ -169,6 +146,12 @@ public void prepareForDecryption(PDEncryption encryption, COSArray documentIDArr { throw new IOException("Decryption material is not compatible with the document"); } + + // This is only used with security version 4 and 5. + if (encryption.getVersion() >= 4) { + setStreamFilterName(encryption.getStreamFilterName()); + setStringFilterName(encryption.getStreamFilterName()); + } setDecryptMetadata(encryption.isEncryptMetaData()); StandardDecryptionMaterial material = (StandardDecryptionMaterial)decryptionMaterial; @@ -198,7 +181,12 @@ public void prepareForDecryption(PDEncryption encryption, COSArray documentIDArr ue = encryption.getUserEncryptionKey(); oe = encryption.getOwnerEncryptionKey(); } - + + if (dicRevision == 6) + { + password = SaslPrep.saslPrepQuery(password); // PDFBOX-4155 + } + AccessPermission currentAccessPermission; if( isOwnerPassword(password.getBytes(passwordCharset), userKey, ownerKey, @@ -219,7 +207,7 @@ public void prepareForDecryption(PDEncryption encryption, COSArray documentIDArr ownerKey, dicRevision, dicLength ); } - encryptionKey = + setEncryptionKey( computeEncryptedKey( computedPassword, ownerKey, userKey, oe, ue, @@ -227,16 +215,17 @@ public void prepareForDecryption(PDEncryption encryption, COSArray documentIDArr documentIDBytes, dicRevision, dicLength, - encryptMetadata, true ); + encryptMetadata, true)); } else if( isUserPassword(password.getBytes(passwordCharset), userKey, ownerKey, dicPermissions, documentIDBytes, dicRevision, dicLength, encryptMetadata) ) { currentAccessPermission = new AccessPermission(dicPermissions); + currentAccessPermission.setReadOnly(); setCurrentAccessPermission(currentAccessPermission); - encryptionKey = + setEncryptionKey( computeEncryptedKey( password.getBytes(passwordCharset), ownerKey, userKey, oe, ue, @@ -244,7 +233,7 @@ else if( isUserPassword(password.getBytes(passwordCharset), userKey, ownerKey, documentIDBytes, dicRevision, dicLength, - encryptMetadata, false ); + encryptMetadata, false)); } else { @@ -289,7 +278,7 @@ private byte[] getDocumentIDBytes(COSArray documentIDArray) return documentIDBytes; } - // Algorithm 13: validate permissions ("Perms" field). Relaxed to accomodate buggy encoders + // Algorithm 13: validate permissions ("Perms" field). Relaxed to accommodate buggy encoders // https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/adobe_supplement_iso32000.pdf private void validatePerms(PDEncryption encryption, int dicPermissions, boolean encryptMetadata) throws IOException { @@ -298,7 +287,7 @@ private void validatePerms(PDEncryption encryption, int dicPermissions, boolean // "Decrypt the 16-byte Perms string using AES-256 in ECB mode with an // initialization vector of zero and the file encryption key as the key." Cipher cipher = Cipher.getInstance("AES/ECB/NoPadding"); - cipher.init(Cipher.DECRYPT_MODE, new SecretKeySpec(encryptionKey, "AES")); + cipher.init(Cipher.DECRYPT_MODE, new SecretKeySpec(getEncryptionKey(), "AES")); byte[] perms = cipher.doFinal(encryption.getPerms()); // "Verify that bytes 9-11 of the result are the characters ‘a’, ‘d’, ‘b’." @@ -333,7 +322,7 @@ private void validatePerms(PDEncryption encryption, int dicPermissions, boolean /** * Prepare document for encryption. * - * @param document The documeent to encrypt. + * @param document The document to encrypt. * * @throws IOException If there is an error accessing data. */ @@ -355,10 +344,11 @@ public void prepareDocumentForEncryption(PDDocument document) throws IOException encryptionDictionary.removeV45filters(); } encryptionDictionary.setRevision(revision); - encryptionDictionary.setLength(keyLength); + encryptionDictionary.setLength(getKeyLength()); - String ownerPassword = policy.getOwnerPassword(); - String userPassword = policy.getUserPassword(); + StandardProtectionPolicy protectionPolicy = (StandardProtectionPolicy) getProtectionPolicy(); + String ownerPassword = protectionPolicy.getOwnerPassword(); + String userPassword = protectionPolicy.getUserPassword(); if( ownerPassword == null ) { ownerPassword = ""; @@ -367,21 +357,24 @@ public void prepareDocumentForEncryption(PDDocument document) throws IOException { userPassword = ""; } - + // If no owner password is set, use the user password instead. if (ownerPassword.isEmpty()) { ownerPassword = userPassword; } - int permissionInt = policy.getPermissions().getPermissionBytes(); + int permissionInt = protectionPolicy.getPermissions().getPermissionBytes(); encryptionDictionary.setPermissions(permissionInt); - int length = keyLength/8; + int length = getKeyLength()/8; if (revision == 6) { + // PDFBOX-4155 + ownerPassword = SaslPrep.saslPrepStored(ownerPassword); + userPassword = SaslPrep.saslPrepStored(userPassword); prepareEncryptionDictRev6(ownerPassword, userPassword, encryptionDictionary, permissionInt); } else @@ -391,7 +384,7 @@ public void prepareDocumentForEncryption(PDDocument document) throws IOException } document.setEncryptionDictionary( encryptionDictionary ); - document.getDocument().setEncryptionDictionary(encryptionDictionary.getCOSDictionary()); + document.getDocument().setEncryptionDictionary(encryptionDictionary.getCOSObject()); } private void prepareEncryptionDictRev6(String ownerPassword, String userPassword, @@ -404,8 +397,8 @@ private void prepareEncryptionDictRev6(String ownerPassword, String userPassword Cipher cipher = Cipher.getInstance("AES/CBC/NoPadding"); // make a random 256-bit file encryption key - encryptionKey = new byte[32]; - rnd.nextBytes(encryptionKey); + setEncryptionKey(new byte[32]); + rnd.nextBytes(getEncryptionKey()); // Algorithm 8a: Compute U byte[] userPasswordBytes = truncate127(userPassword.getBytes(Charsets.UTF_8)); @@ -422,7 +415,7 @@ private void prepareEncryptionDictRev6(String ownerPassword, String userPassword userPasswordBytes, null); cipher.init(Cipher.ENCRYPT_MODE, new SecretKeySpec(hashUE, "AES"), new IvParameterSpec(new byte[16])); - byte[] ue = cipher.doFinal(encryptionKey); + byte[] ue = cipher.doFinal(getEncryptionKey()); // Algorithm 9a: Compute O byte[] ownerPasswordBytes = truncate127(ownerPassword.getBytes(Charsets.UTF_8)); @@ -439,7 +432,7 @@ private void prepareEncryptionDictRev6(String ownerPassword, String userPassword ownerPasswordBytes, u); cipher.init(Cipher.ENCRYPT_MODE, new SecretKeySpec(hashOE, "AES"), new IvParameterSpec(new byte[16])); - byte[] oe = cipher.doFinal(encryptionKey); + byte[] oe = cipher.doFinal(getEncryptionKey()); // Set keys and other required constants in encryption dictionary encryptionDictionary.setUserKey(u); @@ -468,7 +461,7 @@ private void prepareEncryptionDictRev6(String ownerPassword, String userPassword perms[i] = (byte) rnd.nextInt(); } - cipher.init(Cipher.ENCRYPT_MODE, new SecretKeySpec(encryptionKey, "AES"), + cipher.init(Cipher.ENCRYPT_MODE, new SecretKeySpec(getEncryptionKey(), "AES"), new IvParameterSpec(new byte[16])); byte[] permsEnc = cipher.doFinal(perms); @@ -518,8 +511,8 @@ private void prepareEncryptionDictRev2345(String ownerPassword, String userPassw userPassword.getBytes(Charsets.ISO_8859_1), ownerBytes, permissionInt, id.getBytes(), revision, length, true); - encryptionKey = computeEncryptedKey(userPassword.getBytes(Charsets.ISO_8859_1), ownerBytes, - null, null, null, permissionInt, id.getBytes(), revision, length, true, false); + setEncryptionKey(computeEncryptedKey(userPassword.getBytes(Charsets.ISO_8859_1), ownerBytes, + null, null, null, permissionInt, id.getBytes(), revision, length, true, false)); encryptionDictionary.setOwnerKey(ownerBytes); encryptionDictionary.setUserKey(userBytes); @@ -534,7 +527,7 @@ private void prepareEncryptionDictAES(PDEncryption encryptionDictionary, COSName { PDCryptFilterDictionary cryptFilterDictionary = new PDCryptFilterDictionary(); cryptFilterDictionary.setCryptFilterMethod(aesVName); - cryptFilterDictionary.setLength(keyLength); + cryptFilterDictionary.setLength(getKeyLength()); encryptionDictionary.setStdCryptFilterDictionary(cryptFilterDictionary); encryptionDictionary.setStreamFilterName(COSName.STD_CF); encryptionDictionary.setStringFilterName(COSName.STD_CF); @@ -550,7 +543,7 @@ private void prepareEncryptionDictAES(PDEncryption encryptionDictionary, COSName * @param permissions The set of permissions on the document. * @param id The document id. * @param encRevision The encryption algorithm revision. - * @param length The encryption key length. + * @param keyLengthInBytes The encryption key length in bytes. * @param encryptMetadata The encryption metadata * * @return True If the ownerPassword param is the owner password. @@ -558,7 +551,7 @@ private void prepareEncryptionDictAES(PDEncryption encryptionDictionary, COSName * @throws IOException If there is an error accessing data. */ public boolean isOwnerPassword(byte[] ownerPassword, byte[] user, byte[] owner, - int permissions, byte[] id, int encRevision, int length, + int permissions, byte[] id, int encRevision, int keyLengthInBytes, boolean encryptMetadata) throws IOException { if (encRevision == 6 || encRevision == 5) @@ -567,6 +560,11 @@ public boolean isOwnerPassword(byte[] ownerPassword, byte[] user, byte[] owner, byte[] oHash = new byte[32]; byte[] oValidationSalt = new byte[8]; + if (owner.length < 40) + { + // PDFBOX-5104 + throw new IOException("Owner password is too short"); + } System.arraycopy(owner, 0, oHash, 0, 32); System.arraycopy(owner, 32, oValidationSalt, 0, 8); @@ -584,8 +582,8 @@ public boolean isOwnerPassword(byte[] ownerPassword, byte[] user, byte[] owner, } else { - byte[] userPassword = getUserPassword( ownerPassword, owner, encRevision, length ); - return isUserPassword( userPassword, user, owner, permissions, id, encRevision, length, + byte[] userPassword = getUserPassword( ownerPassword, owner, encRevision, keyLengthInBytes ); + return isUserPassword( userPassword, user, owner, permissions, id, encRevision, keyLengthInBytes, encryptMetadata ); } } @@ -644,7 +642,7 @@ else if( encRevision == 3 || encRevision == 4) * @param permissions The permissions for the document. * @param id The document id. * @param encRevision The revision of the encryption algorithm. - * @param length The length of the encryption key. + * @param keyLengthInBytes The length of the encryption key in bytes. * @param encryptMetadata The encryption metadata * @param isOwnerPassword whether the password given is the owner password (for revision 6) * @@ -653,7 +651,7 @@ else if( encRevision == 3 || encRevision == 4) * @throws IOException If there is an error with encryption. */ public byte[] computeEncryptedKey(byte[] password, byte[] o, byte[] u, byte[] oe, byte[] ue, - int permissions, byte[] id, int encRevision, int length, + int permissions, byte[] id, int encRevision, int keyLengthInBytes, boolean encryptMetadata, boolean isOwnerPassword) throws IOException { @@ -663,7 +661,7 @@ public byte[] computeEncryptedKey(byte[] password, byte[] o, byte[] u, byte[] oe } else { - return computeEncryptedKeyRev234(password, o, permissions, id, encryptMetadata, length, encRevision); + return computeEncryptedKeyRev234(password, o, permissions, id, encryptMetadata, keyLengthInBytes, encRevision); } } @@ -718,6 +716,10 @@ private byte[] computeEncryptedKeyRev56(byte[] password, boolean isOwnerPassword if (isOwnerPassword) { + if (oe == null) + { + throw new IOException("/Encrypt/OE entry is missing"); + } byte[] oKeySalt = new byte[8]; System.arraycopy(o, 40, oKeySalt, 0, 8); @@ -734,6 +736,10 @@ private byte[] computeEncryptedKeyRev56(byte[] password, boolean isOwnerPassword } else { + if (ue == null) + { + throw new IOException("/Encrypt/UE entry is missing"); + } byte[] uKeySalt = new byte[8]; System.arraycopy(u, 40, uKeySalt, 0, 8); @@ -769,7 +775,7 @@ private byte[] computeEncryptedKeyRev56(byte[] password, boolean isOwnerPassword * @param permissions The document permissions. * @param id The document id. * @param encRevision The revision of the encryption. - * @param length The length of the encryption key. + * @param keyLengthInBytes The length of the encryption key in bytes. * @param encryptMetadata The encryption metadata * * @return The user password. @@ -777,12 +783,12 @@ private byte[] computeEncryptedKeyRev56(byte[] password, boolean isOwnerPassword * @throws IOException if the password could not be computed */ public byte[] computeUserPassword(byte[] password, byte[] owner, int permissions, - byte[] id, int encRevision, int length, + byte[] id, int encRevision, int keyLengthInBytes, boolean encryptMetadata) throws IOException { ByteArrayOutputStream result = new ByteArrayOutputStream(); byte[] encKey = computeEncryptedKey( password, owner, null, null, null, permissions, - id, encRevision, length, encryptMetadata, true ); + id, encRevision, keyLengthInBytes, encryptMetadata, true ); if( encRevision == 2 ) { @@ -863,7 +869,7 @@ public byte[] computeOwnerPassword(byte[] ownerPassword, byte[] userPassword, return encrypted.toByteArray(); } - // steps (a) to (d) of "Algorithm 3: Computing the encryption dictionary?s O (owner password) value". + // steps (a) to (d) of "Algorithm 3: Computing the encryption dictionary’s O (owner password) value". private byte[] computeRC4key(byte[] ownerPassword, int encRevision, int length) { MessageDigest md = MessageDigests.getMD5(); @@ -911,55 +917,68 @@ private byte[] truncateOrPad( byte[] password ) * @param permissions The permissions set in the PDF. * @param id The document id used for encryption. * @param encRevision The revision of the encryption algorithm. - * @param length The length of the encryption key. - * @param encryptMetadata The encryption metadata + * @param keyLengthInBytes The length of the encryption key in bytes. + * @param encryptMetadata The encryption metadata. * * @return true If the plaintext password is the user password. * * @throws IOException If there is an error accessing data. */ public boolean isUserPassword(byte[] password, byte[] user, byte[] owner, int permissions, - byte[] id, int encRevision, int length, boolean encryptMetadata) + byte[] id, int encRevision, int keyLengthInBytes, boolean encryptMetadata) throws IOException { - if( encRevision == 2 ) + switch (encRevision) + { + case 2: + case 3: + case 4: + return isUserPassword234(password, user, owner, permissions, id, encRevision, + keyLengthInBytes, encryptMetadata); + case 5: + case 6: + return isUserPassword56(password, user, encRevision); + default: + throw new IOException("Unknown Encryption Revision " + encRevision); + } + } + + private boolean isUserPassword234(byte[] password, byte[] user, byte[] owner, int permissions, + byte[] id, int encRevision, int length, boolean encryptMetadata) + throws IOException + { + byte[] passwordBytes = computeUserPassword(password, owner, permissions, id, encRevision, + length, encryptMetadata); + if (encRevision == 2) { - byte[] passwordBytes = computeUserPassword( password, owner, permissions, id, encRevision, - length, encryptMetadata ); return Arrays.equals(user, passwordBytes); } - else if( encRevision == 3 || encRevision == 4 ) + else { - byte[] passwordBytes = computeUserPassword( password, owner, permissions, id, encRevision, - length, encryptMetadata ); // compare first 16 bytes only return Arrays.equals(Arrays.copyOf(user, 16), Arrays.copyOf(passwordBytes, 16)); } - else if (encRevision == 6 || encRevision == 5) - { - byte[] truncatedPassword = truncate127(password); - - byte[] uHash = new byte[32]; - byte[] uValidationSalt = new byte[8]; - System.arraycopy(user, 0, uHash, 0, 32); - System.arraycopy(user, 32, uValidationSalt, 0, 8); + } - byte[] hash; - if (encRevision == 5) - { - hash = computeSHA256(truncatedPassword, uValidationSalt, null); - } - else - { - hash = computeHash2A(truncatedPassword, uValidationSalt, null); - } + private boolean isUserPassword56(byte[] password, byte[] user, int encRevision) throws IOException + { + byte[] truncatedPassword = truncate127(password); + byte[] uHash = new byte[32]; + byte[] uValidationSalt = new byte[8]; + System.arraycopy(user, 0, uHash, 0, 32); + System.arraycopy(user, 32, uValidationSalt, 0, 8); - return Arrays.equals(hash, uHash); + byte[] hash; + if (encRevision == 5) + { + hash = computeSHA256(truncatedPassword, uValidationSalt, null); } else { - throw new IOException( "Unknown Encryption Revision " + encRevision ); + hash = computeHash2A(truncatedPassword, uValidationSalt, null); } + + return Arrays.equals(hash, uHash); } /** @@ -971,7 +990,7 @@ else if (encRevision == 6 || encRevision == 5) * @param permissions The permissions set in the PDF. * @param id The document id used for encryption. * @param encRevision The revision of the encryption algorithm. - * @param length The length of the encryption key. + * @param keyLengthInBytes The length of the encryption key in bytes. * @param encryptMetadata The encryption metadata * * @return true If the plaintext password is the user password. @@ -979,18 +998,18 @@ else if (encRevision == 6 || encRevision == 5) * @throws IOException If there is an error accessing data. */ public boolean isUserPassword(String password, byte[] user, byte[] owner, int permissions, - byte[] id, int encRevision, int length, boolean encryptMetadata) + byte[] id, int encRevision, int keyLengthInBytes, boolean encryptMetadata) throws IOException { if (encRevision == 6 || encRevision == 5) { return isUserPassword(password.getBytes(Charsets.UTF_8), user, owner, permissions, id, - encRevision, length, encryptMetadata); + encRevision, keyLengthInBytes, encryptMetadata); } else { return isUserPassword(password.getBytes(Charsets.ISO_8859_1), user, owner, permissions, id, - encRevision, length, encryptMetadata); + encRevision, keyLengthInBytes, encryptMetadata); } } @@ -1003,7 +1022,7 @@ public boolean isUserPassword(String password, byte[] user, byte[] owner, int pe * @param permissions The set of permissions on the document. * @param id The document id. * @param encRevision The encryption algorithm revision. - * @param length The encryption key length. + * @param keyLengthInBytes The encryption key length in bytes. * @param encryptMetadata The encryption metadata * * @return True If the ownerPassword param is the owner password. @@ -1011,11 +1030,11 @@ public boolean isUserPassword(String password, byte[] user, byte[] owner, int pe * @throws IOException If there is an error accessing data. */ public boolean isOwnerPassword(String password, byte[] user, byte[] owner, int permissions, - byte[] id, int encRevision, int length, boolean encryptMetadata) + byte[] id, int encRevision, int keyLengthInBytes, boolean encryptMetadata) throws IOException { return isOwnerPassword(password.getBytes(Charsets.ISO_8859_1), user,owner,permissions, id, - encRevision, length, encryptMetadata); + encRevision, keyLengthInBytes, encryptMetadata); } // Algorithm 2.A from ISO 32000-1 @@ -1052,11 +1071,11 @@ private static byte[] computeHash2B(byte[] input, byte[] password, byte[] userKe { try { - MessageDigest md = MessageDigest.getInstance("SHA-256"); + MessageDigest md = MessageDigests.getSHA256(); byte[] k = md.digest(input); byte[] e = null; - for (int round = 0; round < 64 || ((int)e[e.length-1] & 0xFF) > round - 32; round++) + for (int round = 0; round < 64 || (e[e.length-1] & 0xFF) > round - 32; round++) { byte[] k1; if (userKey != null && userKey.length >= 48) @@ -1121,22 +1140,14 @@ private static byte[] computeHash2B(byte[] input, byte[] password, byte[] userKe } } - private static byte[] computeSHA256(byte[] input, byte[] password, byte[] userKey) - throws IOException + private static byte[] computeSHA256(byte[] input, byte[] password, byte[] userKey) { - try - { - MessageDigest md = MessageDigest.getInstance("SHA-256"); - md.update(input); - md.update(password); - return userKey == null ? md.digest() : md.digest(userKey); - } - catch (NoSuchAlgorithmException e) - { - throw new IOException(e); - } + MessageDigest md = MessageDigests.getSHA256(); + md.update(input); + md.update(password); + return userKey == null ? md.digest() : md.digest(userKey); } - + private static byte[] concat(byte[] a, byte[] b) { byte[] o = new byte[a.length + b.length]; @@ -1178,13 +1189,4 @@ private static void logIfStrongEncryptionMissing() { } } - - /** - * {@inheritDoc} - */ - @Override - public boolean hasProtectionPolicy() - { - return policy != null; - } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/package.html index 5f0862e43f5..99cf5689a33 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java index b2c9f773afa..ef24a7c2fae 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java @@ -37,10 +37,12 @@ import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderEffectDictionary; import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; import org.apache.pdfbox.util.DateConverter; +import org.w3c.dom.CDATASection; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; +import org.w3c.dom.Text; /** * This represents an FDF annotation that is part of the FDF document. @@ -56,7 +58,7 @@ public abstract class FDFAnnotation implements COSObjectable /** * An annotation flag. */ - private static final int FLAG_INVISIBLE = 1 << 0; + private static final int FLAG_INVISIBLE = 1; /** * An annotation flag. */ @@ -89,7 +91,11 @@ public abstract class FDFAnnotation implements COSObjectable * An annotation flag. */ private static final int FLAG_TOGGLE_NO_VIEW = 1 << 8; - + /** + * An annotation flag. + */ + private static final int FLAG_LOCKED_CONTENTS = 1 << 9; + /** * Annotation dictionary. */ @@ -327,79 +333,79 @@ public static FDFAnnotation create(COSDictionary fdfDic) throws IOException FDFAnnotation retval = null; if (fdfDic != null) { - if (FDFAnnotationText.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + String fdfDicName = fdfDic.getNameAsString(COSName.SUBTYPE); + if (FDFAnnotationText.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationText(fdfDic); } - else if (FDFAnnotationCaret.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationCaret.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationCaret(fdfDic); } - else if (FDFAnnotationFreeText.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationFreeText.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationFreeText(fdfDic); } - else if (FDFAnnotationFileAttachment.SUBTYPE.equals(fdfDic - .getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationFileAttachment.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationFileAttachment(fdfDic); } - else if (FDFAnnotationHighlight.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationHighlight.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationHighlight(fdfDic); } - else if (FDFAnnotationInk.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationInk.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationInk(fdfDic); } - else if (FDFAnnotationLine.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationLine.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationLine(fdfDic); } - else if (FDFAnnotationLink.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationLink.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationLink(fdfDic); } - else if (FDFAnnotationCircle.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationCircle.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationCircle(fdfDic); } - else if (FDFAnnotationSquare.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationSquare.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationSquare(fdfDic); } - else if (FDFAnnotationPolygon.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationPolygon.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationPolygon(fdfDic); } - else if (FDFAnnotationPolyline.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationPolyline.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationPolyline(fdfDic); } - else if (FDFAnnotationSound.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationSound.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationSound(fdfDic); } - else if (FDFAnnotationSquiggly.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationSquiggly.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationSquiggly(fdfDic); } - else if (FDFAnnotationStamp.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationStamp.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationStamp(fdfDic); } - else if (FDFAnnotationStrikeOut.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationStrikeOut.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationStrikeOut(fdfDic); } - else if (FDFAnnotationUnderline.SUBTYPE.equals(fdfDic.getNameAsString(COSName.SUBTYPE))) + else if (FDFAnnotationUnderline.SUBTYPE.equals(fdfDicName)) { retval = new FDFAnnotationUnderline(fdfDic); } else { LOG.warn("Unknown or unsupported annotation type '" - + fdfDic.getNameAsString(COSName.SUBTYPE) + "'"); + + fdfDicName + "'"); } } return retval; @@ -679,6 +685,26 @@ public final void setToggleNoView(boolean toggleNoView) annot.setFlag(COSName.F, FLAG_TOGGLE_NO_VIEW, toggleNoView); } + /** + * Get the LockedContents flag. + * + * @return The LockedContents flag. + */ + public boolean isLockedContents() + { + return annot.getFlag(COSName.F, FLAG_LOCKED_CONTENTS); + } + + /** + * Set the LockedContents flag. + * + * @param lockedContents The new LockedContents flag. + */ + public void setLockedContents(boolean lockedContents) + { + annot.setFlag(COSName.F, FLAG_LOCKED_CONTENTS, lockedContents); + } + /** * Set a unique name for an annotation. * @@ -959,43 +985,49 @@ else if (base instanceof COSStream) private String richContentsToString(Node node, boolean root) { - String retval = ""; - XPath xpath = XPathFactory.newInstance().newXPath(); - try + StringBuilder sb = new StringBuilder(); + + NodeList nodelist = node.getChildNodes(); + for (int i = 0; i < nodelist.getLength(); i++) { - NodeList nodelist = (NodeList) xpath.evaluate("*", node, XPathConstants.NODESET); - String subString = ""; - if (nodelist.getLength() == 0) - { - subString = node.getFirstChild().getNodeValue(); - } - for (int i = 0; i < nodelist.getLength(); i++) + Node child = nodelist.item(i); + if (child instanceof Element) { - Node child = nodelist.item(i); - if (child instanceof Element) - { - subString += richContentsToString(child, false); - } + sb.append(richContentsToString(child, false)); } - NamedNodeMap attributes = node.getAttributes(); - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < attributes.getLength(); i++) + else if (child instanceof CDATASection) { - Node attribute = attributes.item(i); - builder.append(String.format(" %s=\"%s\"", attribute.getNodeName(), - attribute.getNodeValue())); + sb.append(""); } - if (root) + else if (child instanceof Text) { - return subString; + String cdata = ((Text) child).getData(); + if (cdata!=null) + { + cdata = cdata.replace("&", "&").replace("<", "<"); + } + sb.append(cdata); } - retval = String.format("<%s%s>%s", node.getNodeName(), builder.toString(), - subString, node.getNodeName()); } - catch (XPathExpressionException e) + if (root) { - LOG.debug("Error while evaluating XPath expression for richtext contents"); + return sb.toString(); } - return retval; + + NamedNodeMap attributes = node.getAttributes(); + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < attributes.getLength(); i++) + { + Node attribute = attributes.item(i); + String attributeNodeValue = attribute.getNodeValue(); + if (attributeNodeValue!=null) + { + attributeNodeValue = attributeNodeValue.replace("\"", """); + } + builder.append(String.format(" %s=\"%s\"", attribute.getNodeName(), + attributeNodeValue)); + } + return String.format("<%s%s>%s", node.getNodeName(), builder.toString(), + sb.toString(), node.getNodeName()); } -} +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationFreeText.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationFreeText.java index 4636e2cde9a..9384c3e7046 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationFreeText.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationFreeText.java @@ -48,7 +48,6 @@ public class FDFAnnotationFreeText extends FDFAnnotation */ public FDFAnnotationFreeText() { - super(); annot.setName(COSName.SUBTYPE, SUBTYPE); } @@ -119,7 +118,7 @@ private void initFringe(Element element) throws IOException } } - private void initCallout(Element element) throws IOException + private void initCallout(Element element) { String callout = element.getAttribute("callout"); if (callout != null && !callout.isEmpty()) @@ -150,7 +149,7 @@ public void setCallout(float[] callout) } /** - * This will get the coordinates of the the callout line. + * This will get the coordinates of the callout line. * * @return An array of four or six numbers specifying a callout line attached to the free text * annotation. Six numbers [ x1 y1 x2 y2 x3 y3 ] represent the starting, knee point, and ending diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationLink.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationLink.java index a6a1519ab94..41a45b0e766 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationLink.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationLink.java @@ -18,15 +18,29 @@ import java.io.IOException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI; + import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; /** * This represents a Polygon FDF annotation. */ public class FDFAnnotationLink extends FDFAnnotation { + private static final Log LOG = LogFactory.getLog(FDFAnnotationLink.class); + /** * COS Model value for SubType entry. */ @@ -37,7 +51,6 @@ public class FDFAnnotationLink extends FDFAnnotation */ public FDFAnnotationLink() { - super(); annot.setName(COSName.SUBTYPE, SUBTYPE); } @@ -62,5 +75,28 @@ public FDFAnnotationLink(Element element) throws IOException { super(element); annot.setName(COSName.SUBTYPE, SUBTYPE); + XPath xpath = XPathFactory.newInstance().newXPath(); + + try + { + NodeList uri = (NodeList) xpath.evaluate("OnActivation/Action/URI", element, + XPathConstants.NODESET); + if (uri.getLength() > 0) + { + Node namedItem = uri.item(0).getAttributes().getNamedItem("Name"); + if (namedItem != null && namedItem.getNodeValue() != null) + { + PDActionURI actionURI = new PDActionURI(); + actionURI.setURI(namedItem.getNodeValue()); + annot.setItem(COSName.A, actionURI); + } + } + // GoTo is more tricky, because because page destination needs page tree + // to convert number into PDPage object + } + catch (XPathExpressionException e) + { + LOG.debug("Error while evaluating XPath expression", e); + } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationPolygon.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationPolygon.java index cfb4b5209ea..19fb6f6bdd3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationPolygon.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationPolygon.java @@ -109,7 +109,7 @@ private void initVertices(Element element) throws IOException } /** - * This will set the coordinates of the the vertices. + * This will set the coordinates of the vertices. * * @param vertices array of floats [x1, y1, x2, y2, ...] vertex coordinates in default user space. */ @@ -121,7 +121,7 @@ public void setVertices(float[] vertices) } /** - * This will get the coordinates of the the vertices. + * This will get the coordinates of the vertices. * * @return array of floats [x1, y1, x2, y2, ...] vertex coordinates in default user space. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationPolyline.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationPolyline.java index a1cea8f31ba..3fb0d88a667 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationPolyline.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationPolyline.java @@ -126,7 +126,7 @@ private void initStyles(Element element) } /** - * This will set the coordinates of the the vertices. + * This will set the coordinates of the vertices. * * @param vertices array of floats [x1, y1, x2, y2, ...] vertex coordinates in default user space. */ @@ -138,7 +138,7 @@ public void setVertices(float[] vertices) } /** - * This will get the coordinates of the the vertices. + * This will get the coordinates of the vertices. * * @return array of floats [x1, y1, x2, y2, ...] vertex coordinates in default user space. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationStamp.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationStamp.java index 853cd641619..6e58e6e0211 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationStamp.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationStamp.java @@ -16,19 +16,41 @@ */ package org.apache.pdfbox.pdmodel.fdf; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.OutputStream; + +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBoolean; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.util.Hex; + +import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; /** * This represents a Stamp FDF annotation. * * @author Ben Litchfield + * @author Andrew Hung */ public class FDFAnnotationStamp extends FDFAnnotation { + private static final Log LOG = LogFactory.getLog(FDFAnnotationStamp.class); + /** * COS Model value for SubType entry. */ @@ -39,7 +61,6 @@ public class FDFAnnotationStamp extends FDFAnnotation */ public FDFAnnotationStamp() { - super(); annot.setName(COSName.SUBTYPE, SUBTYPE); } @@ -64,5 +85,339 @@ public FDFAnnotationStamp(Element element) throws IOException { super(element); annot.setName(COSName.SUBTYPE, SUBTYPE); + + // PDFBOX-4437: Initialize the Stamp appearance from the XFDF + // https://www.immagic.com/eLibrary/ARCHIVES/TECH/ADOBE/A070914X.pdf + // appearance is only defined for stamps + XPath xpath = XPathFactory.newInstance().newXPath(); + + // Set the Appearance to the annotation + LOG.debug("Get the DOM Document for the stamp appearance"); + String base64EncodedAppearance; + try + { + base64EncodedAppearance = xpath.evaluate("appearance", element); + } + catch (XPathExpressionException e) + { + // should not happen + LOG.error("Error while evaluating XPath expression for appearance: " + e); + return; + } + byte[] decodedAppearanceXML; + try + { + decodedAppearanceXML = Hex.decodeBase64(base64EncodedAppearance); + } + catch (IllegalArgumentException ex) + { + LOG.error("Bad base64 encoded appearance ignored", ex); + return; + } + if (base64EncodedAppearance != null && !base64EncodedAppearance.isEmpty()) + { + LOG.debug("Decoded XML: " + new String(decodedAppearanceXML)); + + Document stampAppearance = org.apache.pdfbox.util.XMLUtil + .parse(new ByteArrayInputStream(decodedAppearanceXML)); + + Element appearanceEl = stampAppearance.getDocumentElement(); + + // Is the root node have tag as DICT, error otherwise + if (!"dict".equalsIgnoreCase(appearanceEl.getNodeName())) + { + throw new IOException("Error while reading stamp document, " + + "root should be 'dict' and not '" + appearanceEl.getNodeName() + "'"); + } + LOG.debug("Generate and set the appearance dictionary to the stamp annotation"); + annot.setItem(COSName.AP, parseStampAnnotationAppearanceXML(appearanceEl)); + } + } + + /** + * This will create an Appearance dictionary from an appearance XML element. + * + * @param appearanceXML The XML element that contains the appearance data. + */ + private COSDictionary parseStampAnnotationAppearanceXML(Element appearanceXML) throws IOException + { + COSDictionary dictionary = new COSDictionary(); + // the N entry is required. + dictionary.setItem(COSName.N, new COSStream()); + LOG.debug("Build dictionary for Appearance based on the appearanceXML"); + + NodeList nodeList = appearanceXML.getChildNodes(); + String parentAttrKey = appearanceXML.getAttribute("KEY"); + LOG.debug("Appearance Root - tag: " + appearanceXML.getTagName() + ", name: " + + appearanceXML.getNodeName() + ", key: " + parentAttrKey + ", children: " + + nodeList.getLength()); + + // Currently only handles Appearance dictionary (AP key on the root) + if (!"AP".equals(appearanceXML.getAttribute("KEY"))) + { + LOG.warn(parentAttrKey + " => Not handling element: " + appearanceXML.getTagName() + + " with key: " + appearanceXML.getAttribute("KEY")); + return dictionary; + } + for (int i = 0; i < nodeList.getLength(); i++) + { + Node node = nodeList.item(i); + if (node instanceof Element) + { + Element child = (Element) node; + if ("STREAM".equalsIgnoreCase(child.getTagName())) + { + LOG.debug(parentAttrKey + + " => Process " + child.getAttribute("KEY") + + " item in the dictionary after processing the " + + child.getTagName()); + dictionary.setItem(child.getAttribute("KEY"), parseStreamElement(child)); + LOG.debug(parentAttrKey + " => Set " + child.getAttribute("KEY")); + } + else + { + LOG.warn(parentAttrKey + " => Not handling element: " + child.getTagName()); + } + } + } + return dictionary; + } + + private COSStream parseStreamElement(Element streamEl) throws IOException + { + LOG.debug("Parse " + streamEl.getAttribute("KEY") + " Stream"); + COSStream stream = new COSStream(); + + NodeList nodeList = streamEl.getChildNodes(); + String parentAttrKey = streamEl.getAttribute("KEY"); + + for (int i = 0; i < nodeList.getLength(); i++) + { + Node node = nodeList.item(i); + if (node instanceof Element) + { + Element child = (Element) node; + String childAttrKey = child.getAttribute("KEY"); + String childAttrVal = child.getAttribute("VAL"); + LOG.debug(parentAttrKey + " => reading child: " + child.getTagName() + + " with key: " + childAttrKey); + if ("INT".equalsIgnoreCase(child.getTagName())) + { + if (!"Length".equals(childAttrKey)) + { + stream.setInt(COSName.getPDFName(childAttrKey), Integer.parseInt(childAttrVal)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey + ": " + childAttrVal); + } + } + else if ("FIXED".equalsIgnoreCase(child.getTagName())) + { + stream.setFloat(COSName.getPDFName(childAttrKey), Float.parseFloat(childAttrVal)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey + ": " + childAttrVal); + } + else if ("NAME".equalsIgnoreCase(child.getTagName())) + { + stream.setName(COSName.getPDFName(childAttrKey), childAttrVal); + LOG.debug(parentAttrKey + " => Set " + childAttrKey + ": " + childAttrVal); + } + else if ("BOOL".equalsIgnoreCase(child.getTagName())) + { + stream.setBoolean(COSName.getPDFName(childAttrKey), Boolean.parseBoolean(childAttrVal)); + LOG.debug(parentAttrKey + " => Set " + childAttrVal); + } + else if ("ARRAY".equalsIgnoreCase(child.getTagName())) + { + stream.setItem(COSName.getPDFName(childAttrKey), parseArrayElement(child)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey); + } + else if ("DICT".equalsIgnoreCase(child.getTagName())) + { + stream.setItem(COSName.getPDFName(childAttrKey), parseDictElement(child)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey); + } + else if ("STREAM".equalsIgnoreCase(child.getTagName())) + { + stream.setItem(COSName.getPDFName(childAttrKey), parseStreamElement(child)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey); + } + else if ("DATA".equalsIgnoreCase(child.getTagName())) + { + LOG.debug(parentAttrKey + " => Handling DATA with encoding: " + + child.getAttribute("ENCODING")); + if ("HEX".equals(child.getAttribute("ENCODING"))) + { + OutputStream os = null; + try + { + os = stream.createRawOutputStream(); + os.write(Hex.decodeHex(child.getTextContent())); + LOG.debug(parentAttrKey + " => Data was streamed"); + } + finally + { + IOUtils.closeQuietly(os); + } + } + else if ("ASCII".equals(child.getAttribute("ENCODING"))) + { + OutputStream os = null; + try + { + os = stream.createOutputStream(); + // not sure about charset + os.write(child.getTextContent().getBytes()); + LOG.debug(parentAttrKey + " => Data was streamed"); + } + finally + { + IOUtils.closeQuietly(os); + } + } + else + { + LOG.warn(parentAttrKey + " => Not handling element DATA encoding: " + + child.getAttribute("ENCODING")); + } + } + else + { + LOG.warn(parentAttrKey + " => Not handling child element: " + child.getTagName()); + } + } + } + + return stream; + } + + private COSArray parseArrayElement(Element arrayEl) throws IOException + { + LOG.debug("Parse " + arrayEl.getAttribute("KEY") + " Array"); + COSArray array = new COSArray(); + + NodeList nodeList = arrayEl.getChildNodes(); + String parentAttrKey = arrayEl.getAttribute("KEY"); + + if ("BBox".equals(parentAttrKey) && nodeList.getLength() < 4) + { + throw new IOException("BBox does not have enough coordinates, only has: " + + nodeList.getLength()); + } + else if ("Matrix".equals(parentAttrKey) && nodeList.getLength() < 6) + { + throw new IOException("Matrix does not have enough coordinates, only has: " + + nodeList.getLength()); + } + + for (int i = 0; i < nodeList.getLength(); i++) + { + Node node = nodeList.item(i); + if (node instanceof Element) + { + Element child = (Element) node; + String childAttrKey = child.getAttribute("KEY"); + String childAttrVal = child.getAttribute("VAL"); + LOG.debug(parentAttrKey + " => reading child: " + child.getTagName() + + " with key: " + childAttrKey); + if ("INT".equalsIgnoreCase(child.getTagName()) || "FIXED".equalsIgnoreCase(child.getTagName())) + { + LOG.debug(parentAttrKey + " value(" + i + "): " + childAttrVal); + array.add(COSNumber.get(childAttrVal)); + } + else if ("NAME".equalsIgnoreCase(child.getTagName())) + { + LOG.debug(parentAttrKey + " value(" + i + "): " + childAttrVal); + array.add(COSName.getPDFName(childAttrVal)); + } + else if ("BOOL".equalsIgnoreCase(child.getTagName())) + { + LOG.debug(parentAttrKey + " value(" + i + "): " + childAttrVal); + array.add(COSBoolean.getBoolean(Boolean.parseBoolean(childAttrVal))); + } + else if ("DICT".equalsIgnoreCase(child.getTagName())) + { + LOG.debug(parentAttrKey + " value(" + i + "): " + childAttrVal); + array.add(parseDictElement(child)); + } + else if ("STREAM".equalsIgnoreCase(child.getTagName())) + { + LOG.debug(parentAttrKey + " value(" + i + "): " + childAttrVal); + array.add(parseStreamElement(child)); + } + else if ("ARRAY".equalsIgnoreCase(child.getTagName())) + { + LOG.debug(parentAttrKey + " value(" + i + "): " + childAttrVal); + array.add(parseArrayElement(child)); + } + else + { + LOG.warn(parentAttrKey + " => Not handling child element: " + child.getTagName()); + } + } + } + + return array; + } + + private COSDictionary parseDictElement(Element dictEl) throws IOException + { + LOG.debug("Parse " + dictEl.getAttribute("KEY") + " Dictionary"); + COSDictionary dict = new COSDictionary(); + + NodeList nodeList = dictEl.getChildNodes(); + String parentAttrKey = dictEl.getAttribute("KEY"); + + for (int i = 0; i < nodeList.getLength(); i++) + { + Node node = nodeList.item(i); + if (node instanceof Element) + { + Element child = (Element) node; + String childAttrKey = child.getAttribute("KEY"); + String childAttrVal = child.getAttribute("VAL"); + + if ("DICT".equals(child.getTagName())) + { + LOG.debug(parentAttrKey + " => Handling DICT element with key: " + childAttrKey); + dict.setItem(COSName.getPDFName(childAttrKey), parseDictElement(child)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey); + } + else if ("STREAM".equals(child.getTagName())) + { + LOG.debug(parentAttrKey + " => Handling STREAM element with key: " + childAttrKey); + dict.setItem(COSName.getPDFName(childAttrKey), parseStreamElement(child)); + } + else if ("NAME".equals(child.getTagName())) + { + LOG.debug(parentAttrKey + " => Handling NAME element with key: " + childAttrKey); + dict.setName(COSName.getPDFName(childAttrKey), childAttrVal); + LOG.debug(parentAttrKey + " => Set " + childAttrKey + ": " + childAttrVal); + } + else if ("INT".equalsIgnoreCase(child.getTagName())) + { + dict.setInt(COSName.getPDFName(childAttrKey), Integer.parseInt(childAttrVal)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey + ": " + childAttrVal); + } + else if ("FIXED".equalsIgnoreCase(child.getTagName())) + { + dict.setFloat(COSName.getPDFName(childAttrKey), Float.parseFloat(childAttrVal)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey + ": " + childAttrVal); + } + else if ("BOOL".equalsIgnoreCase(child.getTagName())) + { + dict.setBoolean(COSName.getPDFName(childAttrKey), Boolean.parseBoolean(childAttrVal)); + LOG.debug(parentAttrKey + " => Set " + childAttrVal); + } + else if ("ARRAY".equalsIgnoreCase(child.getTagName())) + { + dict.setItem(COSName.getPDFName(childAttrKey), parseArrayElement(child)); + LOG.debug(parentAttrKey + " => Set " + childAttrKey); + } + else + { + LOG.warn(parentAttrKey + " => NOT handling child element: " + child.getTagName()); + } + } + } + + return dict; } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFCatalog.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFCatalog.java index 94d89d7dff8..cc606802c84 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFCatalog.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFCatalog.java @@ -56,7 +56,6 @@ public FDFCatalog(COSDictionary cat) * This will create an FDF catalog from an XFDF XML document. * * @param element The XML document that contains the XFDF data. - * @throws IOException If there is an error reading from the dom. */ public FDFCatalog(Element element) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDictionary.java index fa6c89aec27..ef5adb9f08b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDictionary.java @@ -70,7 +70,6 @@ public FDFDictionary(COSDictionary fdfDictionary) * This will create an FDF dictionary from an XFDF XML document. * * @param fdfXML The XML document that contains the XFDF data. - * @throws IOException If there is an error reading from the dom. */ public FDFDictionary(Element fdfXML) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java index f85129563e9..e8d151c3bcf 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDocument.java @@ -22,10 +22,10 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; -import java.io.FileWriter; import java.io.InputStream; import java.io.IOException; import java.io.OutputStream; +import java.io.OutputStreamWriter; import java.io.Writer; import org.apache.pdfbox.cos.COSDictionary; @@ -48,8 +48,6 @@ public class FDFDocument implements Closeable /** * Constructor, creates a new FDF document. - * - * @throws IOException If there is an error creating this document. */ public FDFDocument() { @@ -129,7 +127,7 @@ public FDFCatalog getCatalog() { FDFCatalog retval = null; COSDictionary trailer = document.getTrailer(); - COSDictionary root = (COSDictionary) trailer.getDictionaryObject(COSName.ROOT); + COSDictionary root = trailer.getCOSDictionary(COSName.ROOT); if (root == null) { retval = new FDFCatalog(); @@ -240,8 +238,7 @@ public static FDFDocument loadXFDF(File file) throws IOException */ public static FDFDocument loadXFDF(InputStream input) throws IOException { - Document doc = XMLUtil.parse(input); - return new FDFDocument(doc); + return new FDFDocument(org.apache.pdfbox.util.XMLUtil.parse(input)); } /** @@ -302,7 +299,8 @@ public void save(OutputStream output) throws IOException */ public void saveXFDF(File fileName) throws IOException { - saveXFDF(new BufferedWriter(new FileWriter(fileName))); + saveXFDF(new BufferedWriter( + new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8"))); } /** @@ -314,7 +312,8 @@ public void saveXFDF(File fileName) throws IOException */ public void saveXFDF(String fileName) throws IOException { - saveXFDF(new BufferedWriter(new FileWriter(fileName))); + saveXFDF(new BufferedWriter( + new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8"))); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFField.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFField.java index 2dd1d163832..7fc0f869850 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFField.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFField.java @@ -85,11 +85,11 @@ public FDFField(Element fieldXML) throws IOException Element child = (Element) node; if (child.getTagName().equals("value")) { - setValue(XMLUtil.getNodeValue(child)); + setValue(org.apache.pdfbox.util.XMLUtil.getNodeValue(child)); } else if (child.getTagName().equals("value-richtext")) { - setRichText(new COSString(XMLUtil.getNodeValue(child))); + setRichText(new COSString(org.apache.pdfbox.util.XMLUtil.getNodeValue(child))); } else if (child.getTagName().equals("field")) { @@ -113,23 +113,35 @@ else if (child.getTagName().equals("field")) */ public void writeXML(Writer output) throws IOException { - output.write("\n"); + output.write("\n"); + Object value = getValue(); - if (value != null) + + if (value instanceof String) { - if (value instanceof COSString) - { - output.write("" + escapeXML(((COSString) value).getString()) + "\n"); - } - else if (value instanceof COSStream) + output.write(""); + output.write(escapeXML((String) value)); + output.write("\n"); + } + else if (value instanceof List) + { + List items = (List) value; + for (String item : items) { - output.write("" + escapeXML(((COSStream) value).toTextString()) + "\n"); + output.write(""); + output.write(escapeXML(item)); + output.write("\n"); } } + String rt = getRichText(); if (rt != null) { - output.write("" + escapeXML(rt) + "\n"); + output.write(""); + output.write(escapeXML(rt)); + output.write("\n"); } List kids = getKids(); if (kids != null) @@ -187,9 +199,9 @@ public void setKids(List kids) /** * This will get the "T" entry in the field dictionary. A partial field name. Where the fully qualified field name - * is a concatenation of the parent's fully qualified field name and "." as a separator. For example
- * Address.State
- * Address.City
+ * is a concatenation of the parent's fully qualified field name and "." as a separator. For example
+ * Address.State
+ * Address.City
* * @return The partial field name. */ @@ -209,9 +221,9 @@ public void setPartialFieldName(String partial) } /** - * This will get the value for the field. This will return type will either be
- * String : Checkboxes, Radio Button
- * java.util.List of strings: Choice Field PDTextStream: Textfields + * This will get the value for the field. This will return type will either be
+ * String : Checkboxes, Radio Button, Textfields
+ * java.util.List of strings: Choice Field * * @return The value of the field. * @throws IOException If there is an error getting the value. @@ -227,9 +239,13 @@ else if (value instanceof COSArray) { return COSArrayList.convertCOSStringCOSArrayToList((COSArray) value); } - else if (value instanceof COSString || value instanceof COSStream) + else if (value instanceof COSString) { - return value; + return ((COSString) value).getString(); + } + else if (value instanceof COSStream) + { + return ((COSStream) value).toTextString(); } else if (value != null) { @@ -250,6 +266,7 @@ else if (value != null) public COSBase getCOSValue() throws IOException { COSBase value = field.getDictionaryObject(COSName.V); + if (value instanceof COSName) { return value; @@ -284,11 +301,11 @@ public void setValue(Object value) throws IOException COSBase cos = null; if (value instanceof List) { - cos = COSArrayList.convertStringListToCOSStringCOSArray((List) value); + cos = COSArrayList.convertStringListToCOSStringCOSArray((List) value); } else if (value instanceof String) { - cos = COSName.getPDFName((String) value); + cos = new COSString((String) value); } else if (value instanceof COSObjectable) { @@ -579,7 +596,7 @@ public PDAppearanceDictionary getAppearanceDictionary() /** * This will set the appearance dictionary. * - * @param ap The apperance dictionary. + * @param ap The appearance dictionary. */ public void setAppearanceDictionary(PDAppearanceDictionary ap) { @@ -650,7 +667,7 @@ public List getOptions() COSArray array = (COSArray) field.getDictionaryObject(COSName.OPT); if (array != null) { - List objects = new ArrayList(); + List objects = new ArrayList(array.size()); for (int i = 0; i < array.size(); i++) { COSBase next = array.getObject(i); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFIconFit.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFIconFit.java index 0b60b868c24..b7b5954ce20 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFIconFit.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFIconFit.java @@ -139,7 +139,7 @@ public void setScaleType(String scale) } /** - * This is guaranteed to never return null.
+ * This is guaranteed to never return null.
* * To quote the PDF Spec "An array of two numbers between 0.0 and 1.0 indicating the fraction of leftover space to * allocate at the left and bottom of the icon. A value of [0.0 0.0] positions the icon at the bottom-left corner of diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFPage.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFPage.java index f6f4c7f5a3b..c83408552f6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFPage.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFPage.java @@ -74,7 +74,7 @@ public List getTemplates() COSArray array = (COSArray) page.getDictionaryObject(COSName.TEMPLATES); if (array != null) { - List objects = new ArrayList(); + List objects = new ArrayList(array.size()); for (int i = 0; i < array.size(); i++) { objects.add(new FDFTemplate((COSDictionary) array.getObject(i))); @@ -102,7 +102,7 @@ public void setTemplates(List templates) public FDFPageInfo getPageInfo() { FDFPageInfo retval = null; - COSDictionary dict = (COSDictionary) page.getDictionaryObject(COSName.INFO); + COSDictionary dict = page.getCOSDictionary(COSName.INFO); if (dict != null) { retval = new FDFPageInfo(dict); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/XMLUtil.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/XMLUtil.java index 5f114a1109f..4cdbe30f7c3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/XMLUtil.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/XMLUtil.java @@ -19,23 +19,17 @@ import java.io.InputStream; import java.io.IOException; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.FactoryConfigurationError; -import javax.xml.parsers.ParserConfigurationException; - import org.w3c.dom.Document; import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.w3c.dom.Text; -import org.xml.sax.SAXException; /** * This class with handle some simple XML operations. * * @author Ben Litchfield + * + * @deprecated Use {@link org.apache.pdfbox.util.XMLUtil} instead */ +@Deprecated final class XMLUtil { /** @@ -55,24 +49,7 @@ private XMLUtil() */ public static Document parse(InputStream is) throws IOException { - try - { - DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder builder = builderFactory.newDocumentBuilder(); - return builder.parse(is); - } - catch (FactoryConfigurationError e) - { - throw new IOException(e.getMessage(), e); - } - catch (ParserConfigurationException e) - { - throw new IOException(e.getMessage(), e); - } - catch (SAXException e) - { - throw new IOException(e.getMessage(), e); - } + return org.apache.pdfbox.util.XMLUtil.parse(is); } /** @@ -83,17 +60,6 @@ public static Document parse(InputStream is) throws IOException */ public static String getNodeValue(Element node) { - StringBuilder sb = new StringBuilder(); - NodeList children = node.getChildNodes(); - int numNodes = children.getLength(); - for (int i = 0; i < numNodes; i++) - { - Node next = children.item(i); - if (next instanceof Text) - { - sb.append(next.getNodeValue()); - } - } - return sb.toString(); + return org.apache.pdfbox.util.XMLUtil.getNodeValue(node); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/package.html index ac8a6ec7db1..eec75437619 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/AbstractFixup.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/AbstractFixup.java new file mode 100644 index 00000000000..8bb19da22aa --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/AbstractFixup.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.fixup; + +import org.apache.pdfbox.pdmodel.PDDocument; + +public abstract class AbstractFixup implements PDDocumentFixup +{ + protected PDDocument document; + + protected AbstractFixup(PDDocument document) + { + this.document = document; + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/AcroFormDefaultFixup.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/AcroFormDefaultFixup.java new file mode 100644 index 00000000000..ea9dbe00c66 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/AcroFormDefaultFixup.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.fixup; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormDefaultsProcessor; +import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormGenerateAppearancesProcessor; +import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormOrphanWidgetsProcessor; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; + +public class AcroFormDefaultFixup extends AbstractFixup +{ + public AcroFormDefaultFixup(PDDocument document) + { + super(document); + } + + @Override + public void apply() { + new AcroFormDefaultsProcessor(document).process(); + + /* + * Get the AcroForm in it's current state. + * + * Also note: getAcroForm() applies a default fixup which this processor + * is part of. So keep the null parameter otherwise this will end + * in an endless recursive call + */ + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(null); + + // PDFBOX-4985 + // build the visual appearance as there is none for the widgets + if (acroForm != null && acroForm.getNeedAppearances()) + { + if (acroForm.getFields().isEmpty()) + { + new AcroFormOrphanWidgetsProcessor(document).process(); + } + + // PDFBOX-4985 + // build the visual appearance as there is none for the widgets + new AcroFormGenerateAppearancesProcessor(document).process(); + } + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/PDDocumentFixup.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/PDDocumentFixup.java new file mode 100644 index 00000000000..376c20ec048 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/PDDocumentFixup.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.fixup; + +public interface PDDocumentFixup +{ + void apply(); +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AbstractProcessor.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AbstractProcessor.java new file mode 100644 index 00000000000..b3d440edb5a --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AbstractProcessor.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.fixup.processor; + +import org.apache.pdfbox.pdmodel.PDDocument; + +public abstract class AbstractProcessor implements PDDocumentProcessor +{ + protected PDDocument document; + + protected AbstractProcessor(PDDocument document) + { + this.document = document; + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormDefaultsProcessor.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormDefaultsProcessor.java new file mode 100644 index 00000000000..1b3a8c24d2d --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormDefaultsProcessor.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.fixup.processor; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; + + +/** + * Verify and ensure default resources for /AcroForm. + * + *
    + *
  • a default appearance string is defined
  • + *
  • default resources are defined
  • + *
  • Helvetica as /Helv and Zapf Dingbats as ZaDb are included. + * ZaDb is required for most check boxes and radio buttons
  • + *
+ * + */ +public class AcroFormDefaultsProcessor extends AbstractProcessor +{ + public AcroFormDefaultsProcessor(PDDocument document) + { + super(document); + } + + @Override + public void process() { + /* + * Get the AcroForm in it's current state. + * + * Also note: getAcroForm() applies a default fixup which this processor + * is part of. So keep the null parameter otherwise this will end + * in an endless recursive call + */ + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(null); + if (acroForm != null) + { + verifyOrCreateDefaults(acroForm); + } + } + + /* + * Verify that there are default entries for required + * properties. + * + * If these are missing create default entries similar to + * Adobe Reader / Adobe Acrobat + * + */ + private void verifyOrCreateDefaults(PDAcroForm acroForm) + { + final String adobeDefaultAppearanceString = "/Helv 0 Tf 0 g "; + + // DA entry is required + if (acroForm.getDefaultAppearance().length() == 0) + { + acroForm.setDefaultAppearance(adobeDefaultAppearanceString); + acroForm.getCOSObject().setNeedToBeUpdated(true); + } + + // DR entry is required + PDResources defaultResources = acroForm.getDefaultResources(); + if (defaultResources == null) + { + defaultResources = new PDResources(); + acroForm.setDefaultResources(defaultResources); + acroForm.getCOSObject().setNeedToBeUpdated(true); + } + + // PDFBOX-3732: Adobe Acrobat uses Helvetica as a default font and + // stores that under the name '/Helv' in the resources dictionary + // Zapf Dingbats is included per default for check boxes and + // radio buttons as /ZaDb. + // PDFBOX-4393: the two fonts are added by Adobe when signing + // and this breaks a previous signature. (Might be an Adobe bug) + COSDictionary fontDict = defaultResources.getCOSObject().getCOSDictionary(COSName.FONT); + if (fontDict == null) + { + fontDict = new COSDictionary(); + defaultResources.getCOSObject().setItem(COSName.FONT, fontDict); + } + if (!fontDict.containsKey(COSName.HELV)) + { + defaultResources.put(COSName.HELV, PDType1Font.HELVETICA); + defaultResources.getCOSObject().setNeedToBeUpdated(true); + fontDict.setNeedToBeUpdated(true); + } + if (!fontDict.containsKey(COSName.ZA_DB)) + { + defaultResources.put(COSName.ZA_DB, PDType1Font.ZAPF_DINGBATS); + defaultResources.getCOSObject().setNeedToBeUpdated(true); + fontDict.setNeedToBeUpdated(true); + } + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormGenerateAppearancesProcessor.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormGenerateAppearancesProcessor.java new file mode 100644 index 00000000000..e1ff2a045ad --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormGenerateAppearancesProcessor.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.fixup.processor; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; + +public class AcroFormGenerateAppearancesProcessor extends AbstractProcessor +{ + + private static final Log LOG = LogFactory.getLog(AcroFormGenerateAppearancesProcessor.class); + + public AcroFormGenerateAppearancesProcessor(PDDocument document) + { + super(document); + } + + @Override + public void process() { + /* + * Get the AcroForm in it's current state. + * + * Also note: getAcroForm() applies a default fixup which this processor + * is part of. So keep the null parameter otherwise this will end + * in an endless recursive call + */ + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(null); + + // PDFBOX-4985 + // build the visual appearance as there is none for the widgets + if (acroForm != null && acroForm.getNeedAppearances()) + { + try + { + LOG.debug("trying to generate appearance streams for fields as NeedAppearances is true()"); + acroForm.refreshAppearances(); + acroForm.setNeedAppearances(false); + } + catch (IOException ioe) + { + LOG.debug("couldn't generate appearance stream for some fields - check output"); + LOG.debug(ioe.getMessage()); + } + catch (IllegalArgumentException iae) + { + LOG.debug("couldn't generate appearance stream for some fields - check output"); + LOG.debug(iae.getMessage()); + } + } + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormOrphanWidgetsProcessor.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormOrphanWidgetsProcessor.java new file mode 100644 index 00000000000..fbe59a2f6ff --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/AcroFormOrphanWidgetsProcessor.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.fixup.processor; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.font.FontMapper; +import org.apache.pdfbox.pdmodel.font.FontMappers; +import org.apache.pdfbox.pdmodel.font.FontMapping; +import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDField; +import org.apache.pdfbox.pdmodel.interactive.form.PDFieldFactory; +import org.apache.pdfbox.pdmodel.interactive.form.PDVariableText; + +/** + * Generate field entries from page level widget annotations + * if there AcroForm /Fields entry is empty. + * + */ +public class AcroFormOrphanWidgetsProcessor extends AbstractProcessor +{ + + private static final Log LOG = LogFactory.getLog(AcroFormOrphanWidgetsProcessor.class); + + public AcroFormOrphanWidgetsProcessor(PDDocument document) + { + super(document); + } + + @Override + public void process() { + /* + * Get the AcroForm in it's current state. + * + * Also note: getAcroForm() applies a default fixup which this processor + * is part of. So keep the null parameter otherwise this will end + * in an endless recursive call + */ + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(null); + + if (acroForm != null) + { + resolveFieldsFromWidgets(acroForm); + } + } + + private void resolveFieldsFromWidgets(PDAcroForm acroForm) + { + Map nonTerminalFieldsMap = new HashMap(); + + LOG.debug("rebuilding fields from widgets"); + List fields = new ArrayList(); + for (PDPage page : document.getPages()) + { + try + { + handleAnnotations(acroForm, fields, page.getAnnotations(), nonTerminalFieldsMap); + } + catch (IOException ioe) + { + LOG.debug("couldn't read annotations for page " + ioe.getMessage()); + } + } + + acroForm.setFields(fields); + + // ensure that PDVariableText fields have the neccesary resources + for (PDField field : acroForm.getFieldTree()) + { + if (field instanceof PDVariableText) + { + ensureFontResources(acroForm.getDefaultResources(), (PDVariableText) field); + } + } + } + + private void handleAnnotations(PDAcroForm acroForm, List fields, List annotations, Map nonTerminalFieldsMap) + { + PDResources acroFormResources = acroForm.getDefaultResources(); + + for (PDAnnotation annot : annotations) + { + if (annot instanceof PDAnnotationWidget) + { + addFontFromWidget(acroFormResources, annot); + + COSDictionary parent = annot.getCOSObject().getCOSDictionary(COSName.PARENT); + if (parent != null) + { + PDField resolvedField = resolveNonRootField(acroForm, (PDAnnotationWidget) annot, nonTerminalFieldsMap); + if (resolvedField != null) + { + fields.add(resolvedField); + } + } + else + { + fields.add(PDFieldFactory.createField(acroForm, annot.getCOSObject(), null)); + } + } + } + } + + /* + * Add font resources from the widget to the AcroForm to make sure embedded fonts are being + * used and not added by ensureFontResources potentially using a fallback font + */ + private void addFontFromWidget(PDResources acroFormResources, PDAnnotation annotation) + { + PDAppearanceStream normalAppearanceStream = annotation.getNormalAppearanceStream(); + if (normalAppearanceStream != null && normalAppearanceStream.getResources() != null) + { + PDResources widgetResources = normalAppearanceStream.getResources(); + for (COSName fontName : widgetResources.getFontNames()) + { + if (!fontName.getName().startsWith("+")) + { + try + { + if (acroFormResources.getFont(fontName) == null) + { + acroFormResources.put(fontName, widgetResources.getFont(fontName)); + LOG.debug("qdded font resource to AcroForm from widget for font name " + fontName.getName()); + } + } + catch (IOException ioe) + { + LOG.debug("unable to add font to AcroForm for font name " + fontName.getName()); + } + } + else + { + LOG.debug("font resource for widget was a subsetted font - ignored: " + fontName.getName()); + } + } + } + } + + /* + * Widgets having a /Parent entry are non root fields. Go up until the root node is found + * and handle from there. + */ + private PDField resolveNonRootField(PDAcroForm acroForm, PDAnnotationWidget widget, Map nonTerminalFieldsMap) + { + COSDictionary parent = widget.getCOSObject().getCOSDictionary(COSName.PARENT); + while (parent.containsKey(COSName.PARENT)) + { + parent = parent.getCOSDictionary(COSName.PARENT); + if (parent == null) + { + return null; + } + } + + if (nonTerminalFieldsMap.get(parent.getString(COSName.T)) == null) + { + PDField field = PDFieldFactory.createField(acroForm, parent, null); + if (field != null) + { + nonTerminalFieldsMap.put(field.getFullyQualifiedName(), field); + } + return field; + } + + // this should not happen + return null; + } + + /* + * Lookup the font used in the default appearance and if this is + * not available try to find a suitable font and use that. + * This may not be the original font but a similar font replacement + * + * TODO: implement a font lookup similar as discussed in PDFBOX-2661 so that already existing + * font resources might be accepatble. + * In such case this must be implemented in PDDefaultAppearanceString too! + */ + private void ensureFontResources(PDResources defaultResources, PDVariableText field) + { + String daString = field.getDefaultAppearance(); + if (daString.startsWith("/") && daString.length() > 1) + { + COSName fontName = COSName.getPDFName(daString.substring(1, daString.indexOf(" "))); + try{ + if (defaultResources != null && defaultResources.getFont(fontName) == null) + { + LOG.debug("trying to add missing font resource for field " + field.getFullyQualifiedName()); + FontMapper mapper = FontMappers.instance(); + FontMapping fontMapping = mapper.getTrueTypeFont(fontName.getName() , null); + if (fontMapping != null) + { + PDType0Font pdFont = PDType0Font.load(document, fontMapping.getFont(), false); + LOG.debug("looked up font for " + fontName.getName() + " - found " + fontMapping.getFont().getName()); + defaultResources.put(fontName, pdFont); + } + else + { + LOG.debug("no suitable font found for field " + field.getFullyQualifiedName() + " for font name " + fontName.getName()); + } + } + } + catch (IOException ioe) + { + LOG.debug("Unable to handle font resources for field " + field.getFullyQualifiedName() + ": " + ioe.getMessage()); + } + } + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/PDDocumentProcessor.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/PDDocumentProcessor.java new file mode 100644 index 00000000000..b53461b31c0 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fixup/processor/PDDocumentProcessor.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.fixup.processor; + +public interface PDDocumentProcessor +{ + void process(); +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/CIDSystemInfo.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/CIDSystemInfo.java index 9fc901751ba..62df3b26597 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/CIDSystemInfo.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/CIDSystemInfo.java @@ -28,7 +28,7 @@ public final class CIDSystemInfo private final String ordering; private final int supplement; - CIDSystemInfo(String registry, String ordering, int supplement) + public CIDSystemInfo(String registry, String ordering, int supplement) { this.registry = registry; this.ordering = ordering; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/CMapManager.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/CMapManager.java index 46b4a19a2be..54be8769e9c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/CMapManager.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/CMapManager.java @@ -41,6 +41,8 @@ private CMapManager() * Fetches the predefined CMap from disk (or cache). * * @param cMapName CMap name + * @return The predefined CMap, never null. + * @throws IOException */ public static CMap getPredefinedCMap(String cMapName) throws IOException { @@ -50,8 +52,7 @@ public static CMap getPredefinedCMap(String cMapName) throws IOException return cmap; } - CMapParser parser = new CMapParser(); - CMap targetCmap = parser.parsePredefined(cMapName); + CMap targetCmap = new CMapParser().parsePredefined(cMapName); // limit the cache to predefined CMaps cMapCache.put(targetCmap.getName(), targetCmap); @@ -69,8 +70,8 @@ public static CMap parseCMap(InputStream cMapStream) throws IOException CMap targetCmap = null; if (cMapStream != null) { - CMapParser parser = new CMapParser(); - targetCmap = parser.parse(cMapStream); + // parse CMap using strict mode + targetCmap = new CMapParser(true).parse(cMapStream); } return targetCmap; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java index a3e7f0d92d0..0f22ea269a3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java @@ -36,6 +36,7 @@ import org.apache.fontbox.cff.CFFCIDFont; import org.apache.fontbox.cff.CFFFont; import org.apache.fontbox.ttf.NamingTable; +import org.apache.fontbox.ttf.OS2WindowsMetricsTable; import org.apache.fontbox.ttf.OTFParser; import org.apache.fontbox.ttf.OpenTypeFont; import org.apache.fontbox.ttf.TTFParser; @@ -71,7 +72,7 @@ private static class FSFontInfo extends FontInfo private final int macStyle; private final PDPanoseClassification panose; private final File file; - private transient FileSystemFontProvider parent; + private final FileSystemFontProvider parent; private FSFontInfo(File file, FontFormat format, String postScriptName, CIDSystemInfo cidSystemInfo, int usWeightClass, int sFamilyClass, @@ -109,9 +110,17 @@ public CIDSystemInfo getCIDSystemInfo() return cidSystemInfo; } + /** + * {@inheritDoc} + *

+ * The method returns null if there is there was an error opening the font. + * + */ @Override - public FontBoxFont getFont() + public synchronized FontBoxFont getFont() { + // synchronized to avoid race condition on cache access, + // which could result in an unreferenced but open font FontBoxFont cached = parent.cache.getFont(this); if (cached != null) { @@ -122,12 +131,15 @@ public FontBoxFont getFont() FontBoxFont font; switch (format) { - case PFB: font = parent.getType1Font(postScriptName, file); break; - case TTF: font = parent.getTrueTypeFont(postScriptName, file); break; - case OTF: font = parent.getOTFFont(postScriptName, file); break; + case PFB: font = getType1Font(postScriptName, file); break; + case TTF: font = getTrueTypeFont(postScriptName, file); break; + case OTF: font = getOTFFont(postScriptName, file); break; default: throw new RuntimeException("can't happen"); } - parent.cache.addFont(this, font); + if (font != null) + { + parent.cache.addFont(this, font); + } return font; } } @@ -173,6 +185,127 @@ public String toString() { return super.toString() + " " + file; } + + private TrueTypeFont getTrueTypeFont(String postScriptName, File file) + { + try + { + TrueTypeFont ttf = readTrueTypeFont(postScriptName, file); + + if (LOG.isDebugEnabled()) + { + LOG.debug("Loaded " + postScriptName + " from " + file); + } + return ttf; + } + catch (IOException e) + { + LOG.warn("Could not load font file: " + file, e); + } + return null; + } + + private TrueTypeFont readTrueTypeFont(String postScriptName, File file) throws IOException + { + if (file.getName().toLowerCase().endsWith(".ttc")) + { + @SuppressWarnings("squid:S2095") + // ttc not closed here because it is needed later when ttf is accessed, + // e.g. rendering PDF with non-embedded font which is in ttc file in our font directory + TrueTypeCollection ttc = new TrueTypeCollection(file); + TrueTypeFont ttf; + try + { + ttf = ttc.getFontByName(postScriptName); + } + catch (IOException ex) + { + ttc.close(); + throw ex; + } + if (ttf == null) + { + ttc.close(); + throw new IOException("Font " + postScriptName + " not found in " + file); + } + return ttf; + } + else + { + TTFParser ttfParser = new TTFParser(false, true); + return ttfParser.parse(file); + } + } + + private OpenTypeFont getOTFFont(String postScriptName, File file) + { + try + { + if (file.getName().toLowerCase().endsWith(".ttc")) + { + @SuppressWarnings("squid:S2095") + // ttc not closed here because it is needed later when ttf is accessed, + // e.g. rendering PDF with non-embedded font which is in ttc file in our font directory + TrueTypeCollection ttc = new TrueTypeCollection(file); + TrueTypeFont ttf; + try + { + ttf = ttc.getFontByName(postScriptName); + } + catch (IOException ex) + { + LOG.error(ex.getMessage(), ex); + ttc.close(); + return null; + } + if (ttf == null) + { + ttc.close(); + throw new IOException("Font " + postScriptName + " not found in " + file); + } + return (OpenTypeFont) ttf; + } + + OTFParser parser = new OTFParser(false, true); + OpenTypeFont otf = parser.parse(file); + + if (LOG.isDebugEnabled()) + { + LOG.debug("Loaded " + postScriptName + " from " + file); + } + return otf; + } + catch (IOException e) + { + LOG.warn("Could not load font file: " + file, e); + } + return null; + } + + private Type1Font getType1Font(String postScriptName, File file) + { + InputStream input = null; + try + { + input = new FileInputStream(file); + Type1Font type1 = Type1Font.createWithPFB(input); + + if (LOG.isDebugEnabled()) + { + LOG.debug("Loaded " + postScriptName + " from " + file); + } + return type1; + } + catch (IOException e) + { + LOG.warn("Could not load font file: " + file, e); + } + finally + { + IOUtils.closeQuietly(input); + } + return null; + } } /** @@ -200,9 +333,9 @@ private FSIgnored(File file, FontFormat format, String postScriptName) } // scan the local system for font files - List files = new ArrayList(); FontFileFinder fontFileFinder = new FontFileFinder(); List fonts = fontFileFinder.find(); + List files = new ArrayList(fonts.size()); for (URI font : fonts) { files.add(new File(font)); @@ -215,7 +348,7 @@ private FSIgnored(File file, FontFormat format, String postScriptName) // load cached FontInfo objects List cachedInfos = loadDiskCache(files); - if (cachedInfos != null && cachedInfos.size() > 0) + if (cachedInfos != null && !cachedInfos.isEmpty()) { fontInfoList.addAll(cachedInfos); } @@ -240,24 +373,23 @@ private void scanFonts(List files) { try { - if (file.getPath().toLowerCase().endsWith(".ttf") || - file.getPath().toLowerCase().endsWith(".otf")) + String filePath = file.getPath().toLowerCase(); + if (filePath.endsWith(".ttf") || filePath.endsWith(".otf")) { addTrueTypeFont(file); } - else if (file.getPath().toLowerCase().endsWith(".ttc") || - file.getPath().toLowerCase().endsWith(".otc")) + else if (filePath.endsWith(".ttc") || filePath.endsWith(".otc")) { addTrueTypeCollection(file); } - else if (file.getPath().toLowerCase().endsWith(".pfb")) + else if (filePath.endsWith(".pfb")) { addType1Font(file); } } catch (IOException e) { - LOG.error("Error parsing font " + file.getPath(), e); + LOG.warn("Error parsing font " + file.getPath(), e); } } } @@ -265,10 +397,10 @@ else if (file.getPath().toLowerCase().endsWith(".pfb")) private File getDiskCacheFile() { String path = System.getProperty("pdfbox.fontcache"); - if (path == null) + if (path == null || !new File(path).isDirectory() || !new File(path).canWrite()) { path = System.getProperty("user.home"); - if (path == null) + if (path == null || !new File(path).isDirectory() || !new File(path).canWrite()) { path = System.getProperty("java.io.tmpdir"); } @@ -284,8 +416,15 @@ private void saveDiskCache() BufferedWriter writer = null; try { - File file = getDiskCacheFile(); - writer = new BufferedWriter(new FileWriter(file)); + try + { + File file = getDiskCacheFile(); + writer = new BufferedWriter(new FileWriter(file)); + } + catch (SecurityException e) + { + return; + } for (FSFontInfo fontInfo : fontInfoList) { @@ -339,7 +478,9 @@ private void saveDiskCache() } catch (IOException e) { - LOG.error("Could not write to font cache", e); + LOG.warn("Could not write to font cache", e); + LOG.warn("Installed fonts information will have to be reloaded for each start"); + LOG.warn("You can assign a directory to the 'pdfbox.fontcache' property"); } finally { @@ -352,15 +493,27 @@ private void saveDiskCache() */ private List loadDiskCache(List files) { - Set pending = new HashSet(); + Set pending = new HashSet(files.size()); for (File file : files) { pending.add(file.getAbsolutePath()); } List results = new ArrayList(); - File file = getDiskCacheFile(); - if (file.exists()) + + // Get the disk cache + File file = null; + boolean fileExists = false; + try + { + file = getDiskCacheFile(); + fileExists = file.exists(); + } + catch (SecurityException e) + { + } + + if (fileExists) { BufferedReader reader = null; try @@ -372,7 +525,7 @@ private List loadDiskCache(List files) String[] parts = line.split("\\|", 10); if (parts.length < 10) { - LOG.error("Incorrect line '" + line + "' in font disk cache is skipped"); + LOG.warn("Incorrect line '" + line + "' in font disk cache is skipped"); continue; } @@ -419,17 +572,23 @@ private List loadDiskCache(List files) } } fontFile = new File(parts[9]); - - FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, - cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1, - ulCodePageRange2, macStyle, panose, this); - results.add(info); + if (fontFile.exists()) + { + FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, + cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1, + ulCodePageRange2, macStyle, panose, this); + results.add(info); + } + else + { + LOG.debug("Font file " + fontFile.getAbsolutePath() + " not found, skipped"); + } pending.remove(fontFile.getAbsolutePath()); } } catch (IOException e) { - LOG.error("Error loading font cache, will be re-built", e); + LOG.warn("Error loading font cache, will be re-built", e); return null; } finally @@ -438,7 +597,7 @@ private List loadDiskCache(List files) } } - if (pending.size() > 0) + if (!pending.isEmpty()) { // re-build the entire cache if we encounter un-cached fonts (could be optimised) LOG.warn("New fonts found, font cache will be re-built"); @@ -466,13 +625,9 @@ public void process(TrueTypeFont ttf) throws IOException } }); } - catch (NullPointerException e) // TTF parser is buggy - { - LOG.error("Could not load font file: " + ttcFile, e); - } catch (IOException e) { - LOG.error("Could not load font file: " + ttcFile, e); + LOG.warn("Could not load font file: " + ttcFile, e); } finally { @@ -490,7 +645,7 @@ private void addTrueTypeFont(File ttfFile) throws IOException { try { - if (ttfFile.getPath().endsWith(".otf")) + if (ttfFile.getPath().toLowerCase().endsWith(".otf")) { OTFParser parser = new OTFParser(false, true); OpenTypeFont otf = parser.parse(ttfFile); @@ -503,13 +658,9 @@ private void addTrueTypeFont(File ttfFile) throws IOException addTrueTypeFontImpl(ttf, ttfFile); } } - catch (NullPointerException e) // TTF parser is buggy - { - LOG.error("Could not load font file: " + ttfFile, e); - } catch (IOException e) { - LOG.error("Could not load font file: " + ttfFile, e); + LOG.warn("Could not load font file: " + ttfFile, e); } } @@ -521,7 +672,12 @@ private void addTrueTypeFontImpl(TrueTypeFont ttf, File file) throws IOException try { // read PostScript name, if any - if (ttf.getName() != null) + if (ttf.getName() != null && ttf.getName().contains("|")) + { + fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skippipeinname*")); + LOG.warn("Skipping font with '|' in name " + ttf.getName() + " in file " + file); + } + else if (ttf.getName() != null) { // ignore bitmap fonts if (ttf.getHeader() == null) @@ -536,14 +692,15 @@ private void addTrueTypeFontImpl(TrueTypeFont ttf, File file) throws IOException int ulCodePageRange1 = 0; int ulCodePageRange2 = 0; byte[] panose = null; + OS2WindowsMetricsTable os2WindowsMetricsTable = ttf.getOS2Windows(); // Apple's AAT fonts don't have an OS/2 table - if (ttf.getOS2Windows() != null) + if (os2WindowsMetricsTable != null) { - sFamilyClass = ttf.getOS2Windows().getFamilyClass(); - usWeightClass = ttf.getOS2Windows().getWeightClass(); - ulCodePageRange1 = (int)ttf.getOS2Windows().getCodePageRange1(); - ulCodePageRange2 = (int)ttf.getOS2Windows().getCodePageRange2(); - panose = ttf.getOS2Windows().getPanose(); + sFamilyClass = os2WindowsMetricsTable.getFamilyClass(); + usWeightClass = os2WindowsMetricsTable.getWeightClass(); + ulCodePageRange1 = (int) os2WindowsMetricsTable.getCodePageRange1(); + ulCodePageRange2 = (int) os2WindowsMetricsTable.getCodePageRange2(); + panose = os2WindowsMetricsTable.getPanose(); } String format; @@ -575,7 +732,7 @@ private void addTrueTypeFontImpl(TrueTypeFont ttf, File file) throws IOException String registryName = reg.substring(0, reg.indexOf('\0')); String ord = new String(bytes, 76, 64, Charsets.US_ASCII); String orderName = ord.substring(0, ord.indexOf('\0')); - int supplementVersion = bytes[140] << 8 & bytes[141]; + int supplementVersion = bytes[140] << 8 & (bytes[141] & 0xFF); ros = new CIDSystemInfo(registryName, orderName, supplementVersion); } @@ -605,14 +762,11 @@ private void addTrueTypeFontImpl(TrueTypeFont ttf, File file) throws IOException catch (IOException e) { fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skipexception*")); - LOG.error("Could not load font file: " + file, e); + LOG.warn("Could not load font file: " + file, e); } finally { - if (ttf != null) - { - ttf.close(); - } + ttf.close(); } } @@ -625,6 +779,18 @@ private void addType1Font(File pfbFile) throws IOException try { Type1Font type1 = Type1Font.createWithPFB(input); + if (type1.getName() == null) + { + fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, "*skipnoname*")); + LOG.warn("Missing 'name' entry for PostScript name in font " + pfbFile); + return; + } + if (type1.getName().contains("|")) + { + fontInfoList.add(new FSIgnored(pfbFile, FontFormat.PFB, "*skippipeinname*")); + LOG.warn("Skipping font with '|' in name " + type1.getName() + " in file " + pfbFile); + return; + } fontInfoList.add(new FSFontInfo(pfbFile, FontFormat.PFB, type1.getName(), null, -1, -1, 0, 0, -1, null, this)); @@ -636,7 +802,7 @@ private void addType1Font(File pfbFile) throws IOException } catch (IOException e) { - LOG.error("Could not load font file: " + pfbFile, e); + LOG.warn("Could not load font file: " + pfbFile, e); } finally { @@ -644,95 +810,6 @@ private void addType1Font(File pfbFile) throws IOException } } - private TrueTypeFont getTrueTypeFont(String postScriptName, File file) - { - try - { - TrueTypeFont ttf = readTrueTypeFont(postScriptName, file); - - if (LOG.isDebugEnabled()) - { - LOG.debug("Loaded " + postScriptName + " from " + file); - } - return ttf; - } - catch (NullPointerException e) // TTF parser is buggy - { - LOG.error("Could not load font file: " + file, e); - } - catch (IOException e) - { - LOG.error("Could not load font file: " + file, e); - } - return null; - } - - private TrueTypeFont readTrueTypeFont(String postScriptName, File file) throws IOException - { - if (file.getName().toLowerCase().endsWith(".ttc")) - { - TrueTypeCollection ttc = new TrueTypeCollection(file); - TrueTypeFont ttf = ttc.getFontByName(postScriptName); - if (ttf == null) - { - ttc.close(); - throw new IOException("Font " + postScriptName + " not found in " + file); - } - return ttf; - } - else - { - TTFParser ttfParser = new TTFParser(false, true); - return ttfParser.parse(file); - } - } - - private OpenTypeFont getOTFFont(String postScriptName, File file) - { - try - { - // todo JH: we don't yet support loading CFF fonts from OTC collections
 - OTFParser parser = new OTFParser(false, true); - OpenTypeFont otf = parser.parse(file); - - if (LOG.isDebugEnabled()) - { - LOG.debug("Loaded " + postScriptName + " from " + file); - } - return otf; - } - catch (IOException e) - { - LOG.error("Could not load font file: " + file, e); - } - return null; - } - - private Type1Font getType1Font(String postScriptName, File file) - { - InputStream input = null; - try - { - input = new FileInputStream(file); - Type1Font type1 = Type1Font.createWithPFB(input); - - if (LOG.isDebugEnabled()) - { - LOG.debug("Loaded " + postScriptName + " from " + file); - } - return type1; - } - catch (IOException e) - { - LOG.error("Could not load font file: " + file, e); - } - finally - { - IOUtils.closeQuietly(input); - } - return null; - } - @Override public String toDebugString() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapper.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapper.java index 54df09b1c5e..8cf5139115a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapper.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapper.java @@ -21,7 +21,7 @@ /** * Font mapper, locates non-embedded fonts. If you implement this then you're responsible for - * caching the fonts. SoftReference is recommended. + * caching the fonts. SoftReference<FontBoxFont> is recommended. * * @author John Hewson */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapperImpl.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapperImpl.java index d19cdbdb60a..ffd2e9b5aaf 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapperImpl.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapperImpl.java @@ -16,9 +16,9 @@ */ package org.apache.pdfbox.pdmodel.font; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; -import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -29,9 +29,9 @@ import java.util.Map; import java.util.PriorityQueue; import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.fontbox.FontBoxFont; -import org.apache.fontbox.cff.CFFFont; -import org.apache.fontbox.cff.CFFType1Font; import org.apache.fontbox.ttf.OpenTypeFont; import org.apache.fontbox.ttf.TTFParser; import org.apache.fontbox.ttf.TrueTypeFont; @@ -44,6 +44,8 @@ */ final class FontMapperImpl implements FontMapper { + private static final Log LOG = LogFactory.getLog(FontMapperImpl.class); + private static final FontCache fontCache = new FontCache(); // todo: static cache isn't ideal private FontProvider fontProvider; private Map fontInfoByName; @@ -56,44 +58,50 @@ final class FontMapperImpl implements FontMapper { // substitutes for standard 14 fonts substitutes.put("Courier", - Arrays.asList("CourierNew", "CourierNewPSMT", "LiberationMono", "NimbusMonL-Regu")); + new ArrayList(Arrays.asList("CourierNew", "CourierNewPSMT", "LiberationMono", + "NimbusMonL-Regu"))); substitutes.put("Courier-Bold", - Arrays.asList("CourierNewPS-BoldMT", "CourierNew-Bold", "LiberationMono-Bold", - "NimbusMonL-Bold")); + new ArrayList(Arrays.asList("CourierNewPS-BoldMT", "CourierNew-Bold", + "LiberationMono-Bold", "NimbusMonL-Bold"))); substitutes.put("Courier-Oblique", - Arrays.asList("CourierNewPS-ItalicMT","CourierNew-Italic", - "LiberationMono-Italic", "NimbusMonL-ReguObli")); + new ArrayList(Arrays.asList("CourierNewPS-ItalicMT","CourierNew-Italic", + "LiberationMono-Italic", "NimbusMonL-ReguObli"))); substitutes.put("Courier-BoldOblique", - Arrays.asList("CourierNewPS-BoldItalicMT","CourierNew-BoldItalic", - "LiberationMono-BoldItalic", "NimbusMonL-BoldObli")); + new ArrayList(Arrays.asList("CourierNewPS-BoldItalicMT", + "CourierNew-BoldItalic", "LiberationMono-BoldItalic", + "NimbusMonL-BoldObli"))); substitutes.put("Helvetica", - Arrays.asList("ArialMT", "Arial", "LiberationSans", "NimbusSanL-Regu")); + new ArrayList(Arrays.asList("ArialMT", "Arial", "LiberationSans", + "NimbusSanL-Regu"))); substitutes.put("Helvetica-Bold", - Arrays.asList("Arial-BoldMT", "Arial-Bold", "LiberationSans-Bold", - "NimbusSanL-Bold")); + new ArrayList(Arrays.asList("Arial-BoldMT", "Arial-Bold", + "LiberationSans-Bold", "NimbusSanL-Bold"))); substitutes.put("Helvetica-Oblique", - Arrays.asList("Arial-ItalicMT", "Arial-Italic", "Helvetica-Italic", - "LiberationSans-Italic", "NimbusSanL-ReguItal")); + new ArrayList(Arrays.asList("Arial-ItalicMT", "Arial-Italic", + "Helvetica-Italic", "LiberationSans-Italic", "NimbusSanL-ReguItal"))); substitutes.put("Helvetica-BoldOblique", - Arrays.asList("Arial-BoldItalicMT", "Helvetica-BoldItalic", - "LiberationSans-BoldItalic", "NimbusSanL-BoldItal")); + new ArrayList(Arrays.asList("Arial-BoldItalicMT", "Helvetica-BoldItalic", + "LiberationSans-BoldItalic", "NimbusSanL-BoldItal"))); substitutes.put("Times-Roman", - Arrays.asList("TimesNewRomanPSMT", "TimesNewRoman", "TimesNewRomanPS", - "LiberationSerif", "NimbusRomNo9L-Regu")); + new ArrayList(Arrays.asList("TimesNewRomanPSMT", "TimesNewRoman", + "TimesNewRomanPS", "LiberationSerif", "NimbusRomNo9L-Regu"))); substitutes.put("Times-Bold", - Arrays.asList("TimesNewRomanPS-BoldMT", "TimesNewRomanPS-Bold", + new ArrayList(Arrays.asList("TimesNewRomanPS-BoldMT", "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", "LiberationSerif-Bold", - "NimbusRomNo9L-Medi")); + "NimbusRomNo9L-Medi"))); substitutes.put("Times-Italic", - Arrays.asList("TimesNewRomanPS-ItalicMT", "TimesNewRomanPS-Italic", - "TimesNewRoman-Italic", "LiberationSerif-Italic", - "NimbusRomNo9L-ReguItal")); + new ArrayList(Arrays.asList("TimesNewRomanPS-ItalicMT", + "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", "LiberationSerif-Italic", + "NimbusRomNo9L-ReguItal"))); substitutes.put("Times-BoldItalic", - Arrays.asList("TimesNewRomanPS-BoldItalicMT", "TimesNewRomanPS-BoldItalic", - "TimesNewRoman-BoldItalic", "LiberationSerif-BoldItalic", - "NimbusRomNo9L-MediItal")); - substitutes.put("Symbol", Arrays.asList("Symbol", "SymbolMT", "StandardSymL")); - substitutes.put("ZapfDingbats", Arrays.asList("ZapfDingbatsITC", "Dingbats", "MS-Gothic")); + new ArrayList(Arrays.asList("TimesNewRomanPS-BoldItalicMT", + "TimesNewRomanPS-BoldItalic", "TimesNewRoman-BoldItalic", + "LiberationSerif-BoldItalic", "NimbusRomNo9L-MediItal"))); + substitutes.put("Symbol", + new ArrayList(Arrays.asList("Symbol", "SymbolMT", "StandardSymL"))); + substitutes.put("ZapfDingbats", new ArrayList( + Arrays.asList("ZapfDingbatsITCbyBT-Regular", "ZapfDingbatsITC", "Dingbats", + "MS-Gothic"))); // Acrobat also uses alternative names for Standard 14 fonts, which we map to those above // these include names such as "Arial" and "TimesNewRoman" @@ -110,13 +118,13 @@ final class FontMapperImpl implements FontMapper try { - String ttfName = "org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"; - URL url = FontMapper.class.getClassLoader().getResource(ttfName); - if (url == null) + String ttfName = "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"; + InputStream resourceAsStream = FontMapper.class.getResourceAsStream(ttfName); + if (resourceAsStream == null) { - throw new IOException("Error loading resource: " + ttfName); + throw new IOException("resource '" + ttfName + "' not found"); } - InputStream ttfStream = url.openStream(); + InputStream ttfStream = new BufferedInputStream(resourceAsStream); TTFParser ttfParser = new TTFParser(); lastResortFont = ttfParser.parse(ttfStream); } @@ -137,8 +145,8 @@ private static class DefaultFontProvider */ public synchronized void setProvider(FontProvider fontProvider) { - this.fontProvider = fontProvider; fontInfoByName = createFontInfoByName(fontProvider.getFontInfo()); + this.fontProvider = fontProvider; } /** @@ -186,7 +194,7 @@ private Set getPostScriptNames(String postScriptName) names.add(postScriptName); // remove hyphens (e.g. Arial-Black -> ArialBlack) - names.add(postScriptName.replaceAll("-", "")); + names.add(postScriptName.replace("-", "")); return names; } @@ -219,7 +227,7 @@ public void addSubstitute(String match, String replace) */ private List getSubstitutes(String postScriptName) { - List subs = substitutes.get(postScriptName.replaceAll(" ", "")); + List subs = substitutes.get(postScriptName.replace(" ", "")); if (subs != null) { return subs; @@ -381,18 +389,18 @@ private FontBoxFont findFontBoxFont(String postScriptName) return t1; } - CFFFont cff = (CFFFont)findFont(FontFormat.OTF, postScriptName); - if (cff instanceof CFFType1Font) - { - return cff; - } - TrueTypeFont ttf = (TrueTypeFont)findFont(FontFormat.TTF, postScriptName); if (ttf != null) { return ttf; } + OpenTypeFont otf = (OpenTypeFont) findFont(FontFormat.OTF, postScriptName); + if (otf != null) + { + return otf; + } + return null; } @@ -423,7 +431,7 @@ private FontBoxFont findFont(FontFormat format, String postScriptName) } // remove hyphens (e.g. Arial-Black -> ArialBlack) - info = getFont(format, postScriptName.replaceAll("-", "")); + info = getFont(format, postScriptName.replace("-", "")); if (info != null) { return info.getFont(); @@ -440,12 +448,18 @@ private FontBoxFont findFont(FontFormat format, String postScriptName) } // then try converting Windows names e.g. (ArialNarrow,Bold) -> (ArialNarrow-Bold) - info = getFont(format, postScriptName.replaceAll(",", "-")); + info = getFont(format, postScriptName.replace(",", "-")); if (info != null) { return info.getFont(); } + // try appending "-Regular", works for Wingdings on windows + info = getFont(format, postScriptName + "-Regular"); + if (info != null) + { + return info.getFont(); + } // no matches return null; } @@ -465,6 +479,10 @@ private FontInfo getFont(FontFormat format, String postScriptName) FontInfo info = fontInfoByName.get(postScriptName); if (info != null && info.getFormat() == format) { + if (LOG.isDebugEnabled()) + { + LOG.debug(String.format("getFont('%s','%s') returns %s", format, postScriptName, info)); + } return info; } return null; @@ -511,12 +529,16 @@ public CIDFontMapping getCIDFont(String baseFont, PDFontDescriptor fontDescripto FontMatch bestMatch = queue.poll(); if (bestMatch != null) { + if (LOG.isDebugEnabled()) + { + LOG.debug("Best match for '" + baseFont + "': " + bestMatch.info); + } FontBoxFont font = bestMatch.info.getFont(); if (font instanceof OpenTypeFont) { return new CIDFontMapping((OpenTypeFont)font, null, true); } - else + else if (font != null) { return new CIDFontMapping(null, font, true); } @@ -556,6 +578,14 @@ private PriorityQueue getFontMatches(PDFontDescriptor fontDescriptor, PDPanoseClassification panose = fontDescriptor.getPanose().getPanose(); if (panose.getFamilyKind() == info.getPanose().getFamilyKind()) { + if (panose.getFamilyKind() == 0 && + (info.getPostScriptName().toLowerCase().contains("barcode") || + info.getPostScriptName().startsWith("Code")) && + !probablyBarcodeFont(fontDescriptor)) + { + // PDFBOX-4268: ignore barcode font if we aren't searching for one. + continue; + } // serifs if (panose.getSerifStyle() == info.getPanose().getSerifStyle()) { @@ -620,6 +650,22 @@ else if (fontDescriptor.getFontWeight() > 0 && info.getWeightClass() > 0) return queue; } + private boolean probablyBarcodeFont(PDFontDescriptor fontDescriptor) + { + String ff = fontDescriptor.getFontFamily(); + if (ff == null) + { + ff = ""; + } + String fn = fontDescriptor.getFontName(); + if (fn == null) + { + fn = ""; + } + return ff.startsWith("Code") || ff.toLowerCase().contains("barcode") || + fn.startsWith("Code") || fn.toLowerCase().contains("barcode"); + } + /** * Returns true if the character set described by CIDSystemInfo is present in the given font. * Only applies to Adobe-GB1, Adobe-CNS1, Adobe-Japan1, Adobe-Korea1, as per the PDF spec. @@ -641,6 +687,11 @@ private boolean isCharSetMatch(PDCIDSystemInfo cidSystemInfo, FontInfo info) long CHINESE_TRADITIONAL = 1 << 20; long KOREAN_JOHAB = 1 << 21; + if ("MalgunGothic-Semilight".equals(info.getPostScriptName())) + { + // PDFBOX-4793 and PDF.js 10699: This font has only Korean, but has bits 17-21 set. + codePageRange &= ~(JIS_JAPAN | CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL); + } if (cidSystemInfo.getOrdering().equals("GB1") && (codePageRange & CHINESE_SIMPLIFIED) == CHINESE_SIMPLIFIED) { @@ -659,8 +710,8 @@ else if (cidSystemInfo.getOrdering().equals("Japan1") && else { return cidSystemInfo.getOrdering().equals("Korea1") && - (codePageRange & KOREAN_WANSUNG) == KOREAN_WANSUNG || - (codePageRange & KOREAN_JOHAB) == KOREAN_JOHAB; + ((codePageRange & KOREAN_WANSUNG) == KOREAN_WANSUNG || + (codePageRange & KOREAN_JOHAB) == KOREAN_JOHAB); } } } @@ -668,7 +719,7 @@ else if (cidSystemInfo.getOrdering().equals("Japan1") && /** * A potential match for a font substitution. */ - private class FontMatch implements Comparable + private static class FontMatch implements Comparable { double score; final FontInfo info; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java index ea2ec096cc1..e17e52d9435 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java @@ -17,18 +17,20 @@ package org.apache.pdfbox.pdmodel.font; import java.io.IOException; -import java.util.Collection; +import java.io.InputStream; import java.util.HashMap; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; -import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.common.COSObjectable; -import org.apache.pdfbox.util.Matrix; import org.apache.pdfbox.util.Vector; /** @@ -41,6 +43,8 @@ */ public abstract class PDCIDFont implements COSObjectable, PDFontLike, PDVectorFont { + private static final Log LOG = LogFactory.getLog(PDCIDFont.class); + protected final PDType0Font parent; private Map widths; @@ -49,7 +53,7 @@ public abstract class PDCIDFont implements COSObjectable, PDFontLike, PDVectorFo private final Map verticalDisplacementY = new HashMap(); // w1y private final Map positionVectors = new HashMap(); // v - private float[] dw2; + private float[] dw2 = new float[] { 880, -1000 }; protected final COSDictionary dict; private PDFontDescriptor fontDescriptor; @@ -59,7 +63,7 @@ public abstract class PDCIDFont implements COSObjectable, PDFontLike, PDVectorFo * * @param fontDictionary The font dictionary according to the PDF specification. */ - PDCIDFont(COSDictionary fontDictionary, PDType0Font parent) throws IOException + PDCIDFont(COSDictionary fontDictionary, PDType0Font parent) { this.dict = fontDictionary; this.parent = parent; @@ -70,15 +74,22 @@ public abstract class PDCIDFont implements COSObjectable, PDFontLike, PDVectorFo private void readWidths() { widths = new HashMap(); - COSArray widths = (COSArray) dict.getDictionaryObject(COSName.W); - if (widths != null) + COSBase wBase = dict.getDictionaryObject(COSName.W); + if (wBase instanceof COSArray) { - int size = widths.size(); + COSArray wArray = (COSArray) wBase; + int size = wArray.size(); int counter = 0; while (counter < size) { - COSNumber firstCode = (COSNumber) widths.getObject(counter++); - COSBase next = widths.getObject(counter++); + COSBase firstCodeBase = wArray.getObject(counter++); + if (!(firstCodeBase instanceof COSNumber)) + { + LOG.warn("Expected a number array member, got " + firstCodeBase); + continue; + } + COSNumber firstCode = (COSNumber) firstCodeBase; + COSBase next = wArray.getObject(counter++); if (next instanceof COSArray) { COSArray array = (COSArray) next; @@ -86,59 +97,75 @@ private void readWidths() int arraySize = array.size(); for (int i = 0; i < arraySize; i++) { - COSNumber width = (COSNumber) array.get(i); - this.widths.put(startRange + i, width.floatValue()); + COSBase widthBase = array.getObject(i); + if (widthBase instanceof COSNumber) + { + COSNumber width = (COSNumber) widthBase; + widths.put(startRange + i, width.floatValue()); + } + else + { + LOG.warn("Expected a number array member, got " + widthBase); + } } } else { - COSNumber secondCode = (COSNumber) next; - COSNumber rangeWidth = (COSNumber) widths.getObject(counter++); + COSBase secondCodeBase = next; + COSBase rangeWidthBase = wArray.getObject(counter++); + if (!(secondCodeBase instanceof COSNumber) || !(rangeWidthBase instanceof COSNumber)) + { + LOG.warn("Expected two numbers, got " + secondCodeBase + " and " + rangeWidthBase); + continue; + } + COSNumber secondCode = (COSNumber) secondCodeBase; + COSNumber rangeWidth = (COSNumber) rangeWidthBase; int startRange = firstCode.intValue(); int endRange = secondCode.intValue(); float width = rangeWidth.floatValue(); for (int i = startRange; i <= endRange; i++) { - this.widths.put(i, width); + widths.put(i, width); } } } } - } private void readVerticalDisplacements() { // default position vector and vertical displacement vector - COSArray cosDW2 = (COSArray) dict.getDictionaryObject(COSName.DW2); - if (cosDW2 != null) - { - dw2 = new float[2]; - dw2[0] = ((COSNumber)cosDW2.get(0)).floatValue(); - dw2[1] = ((COSNumber)cosDW2.get(1)).floatValue(); - } - else + COSBase dw2Base = dict.getDictionaryObject(COSName.DW2); + if (dw2Base instanceof COSArray) { - dw2 = new float[] { 880, -1000 }; + COSArray dw2Array = (COSArray) dw2Base; + COSBase base0 = dw2Array.getObject(0); + COSBase base1 = dw2Array.getObject(1); + if (base0 instanceof COSNumber && base1 instanceof COSNumber) + { + dw2[0] = ((COSNumber) base0).floatValue(); + dw2[1] = ((COSNumber) base1).floatValue(); + } } // vertical metrics for individual CIDs. - COSArray w2 = (COSArray) dict.getDictionaryObject(COSName.W2); - if (w2 != null) + COSBase w2Base = dict.getDictionaryObject(COSName.W2); + if (w2Base instanceof COSArray) { - for (int i = 0; i < w2.size(); i++) + COSArray w2Array = (COSArray) w2Base; + for (int i = 0; i < w2Array.size(); i++) { - COSNumber c = (COSNumber)w2.get(i); - COSBase next = w2.get(++i); + COSNumber c = (COSNumber) w2Array.getObject(i); + COSBase next = w2Array.getObject(++i); if (next instanceof COSArray) { COSArray array = (COSArray)next; for (int j = 0; j < array.size(); j++) { - int cid = c.intValue() + j; - COSNumber w1y = (COSNumber) array.get(j); - COSNumber v1x = (COSNumber) array.get(++j); - COSNumber v1y = (COSNumber) array.get(++j); + int cid = c.intValue() + j / 3; + COSNumber w1y = (COSNumber) array.getObject(j); + COSNumber v1x = (COSNumber) array.getObject(++j); + COSNumber v1y = (COSNumber) array.getObject(++j); verticalDisplacementY.put(cid, w1y.floatValue()); positionVectors.put(cid, new Vector(v1x.floatValue(), v1y.floatValue())); } @@ -147,9 +174,9 @@ private void readVerticalDisplacements() { int first = c.intValue(); int last = ((COSNumber) next).intValue(); - COSNumber w1y = (COSNumber) w2.get(++i); - COSNumber v1x = (COSNumber) w2.get(++i); - COSNumber v1y = (COSNumber) w2.get(++i); + COSNumber w1y = (COSNumber) w2Array.getObject(++i); + COSNumber v1x = (COSNumber) w2Array.getObject(++i); + COSNumber v1y = (COSNumber) w2Array.getObject(++i); for (int cid = first; cid <= last; cid++) { verticalDisplacementY.put(cid, w1y.floatValue()); @@ -196,9 +223,6 @@ public PDFontDescriptor getFontDescriptor() return fontDescriptor; } - @Override - public abstract Matrix getFontMatrix(); - /** * Returns the Type 0 font which is the parent of this font. * @@ -209,9 +233,6 @@ public final PDType0Font getParent() return parent; } - @Override - public abstract BoundingBox getBoundingBox() throws IOException; - /** * This will get the default width. The default value for the default width is 1000. * @@ -221,10 +242,10 @@ private float getDefaultWidth() { if (defaultWidth == 0) { - COSNumber number = (COSNumber) dict.getDictionaryObject(COSName.DW); - if (number != null) + COSBase base = dict.getDictionaryObject(COSName.DW); + if (base instanceof COSNumber) { - defaultWidth = number.floatValue(); + defaultWidth = ((COSNumber) base).floatValue(); } else { @@ -254,6 +275,12 @@ private float getWidthForCID(int cid) return width; } + @Override + public boolean hasExplicitWidth(int code) throws IOException + { + return widths.get(codeToCID(code)) != null; + } + @Override public Vector getPositionVector(int code) { @@ -283,9 +310,6 @@ public float getVerticalDisplacementVectorY(int code) return w1y; } - @Override - public abstract float getHeight(int code) throws IOException; - @Override public float getWidth(int code) throws IOException { @@ -295,12 +319,6 @@ public float getWidth(int code) throws IOException return getWidthForCID(codeToCID(code)); } - @Override - public abstract float getWidthFromFont(int code) throws IOException; - - @Override - public abstract boolean isEmbedded(); - @Override // todo: this method is highly suspicious, the average glyph width is not usually a good metric public float getAverageFontWidth() @@ -308,17 +326,22 @@ public float getAverageFontWidth() if (averageWidth == 0) { float totalWidths = 0.0f; - float characterCount = 0.0f; + int characterCount = 0; if (widths != null) { - characterCount = widths.size(); - Collection widthsValues = widths.values(); - for (Float width : widthsValues) + for (Float width : widths.values()) { - totalWidths += width; + if (width > 0) + { + totalWidths += width; + ++characterCount; + } } } - float averageWidth = totalWidths / characterCount; + if (characterCount != 0) + { + averageWidth = totalWidths / characterCount; + } if (averageWidth <= 0 || Float.isNaN(averageWidth)) { averageWidth = getDefaultWidth(); @@ -332,15 +355,12 @@ public float getAverageFontWidth() */ public PDCIDSystemInfo getCIDSystemInfo() { - COSDictionary cidSystemInfoDict = (COSDictionary) - dict.getDictionaryObject(COSName.CIDSYSTEMINFO); - - PDCIDSystemInfo cidSystemInfo = null; - if (cidSystemInfoDict != null) + COSBase base = dict.getDictionaryObject(COSName.CIDSYSTEMINFO); + if (base instanceof COSDictionary) { - cidSystemInfo = new PDCIDSystemInfo(cidSystemInfoDict); + return new PDCIDSystemInfo((COSDictionary) base); } - return cidSystemInfo; + return null; } /** @@ -356,6 +376,7 @@ public PDCIDSystemInfo getCIDSystemInfo() * * @param code character code * @return GID + * @throws java.io.IOException */ public abstract int codeToGID(int code) throws IOException; @@ -370,4 +391,28 @@ public PDCIDSystemInfo getCIDSystemInfo() * @throws IOException If the text could not be encoded. */ protected abstract byte[] encode(int unicode) throws IOException; + + final int[] readCIDToGIDMap() throws IOException + { + int[] cid2gid = null; + COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP); + if (map instanceof COSStream) + { + COSStream stream = (COSStream) map; + + InputStream is = stream.createInputStream(); + byte[] mapAsBytes = IOUtils.toByteArray(is); + IOUtils.closeQuietly(is); + int numberOfInts = mapAsBytes.length / 2; + cid2gid = new int[numberOfInts]; + int offset = 0; + for (int index = 0; index < numberOfInts; index++) + { + int gid = (mapAsBytes[offset] & 0xff) << 8 | mapAsBytes[offset + 1] & 0xff; + cid2gid[index] = gid; + offset += 2; + } + } + return cid2gid; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType0.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType0.java index 1f87c63a4ca..e1b90bea818 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType0.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType0.java @@ -33,7 +33,6 @@ import org.apache.fontbox.cff.Type2CharString; import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.util.Matrix; @@ -62,6 +61,7 @@ public class PDCIDFontType0 extends PDCIDFont private Matrix fontMatrix; private final AffineTransform fontMatrixTransform; private BoundingBox fontBBox; + private int[] cid2gid = null; /** * Constructor. @@ -80,7 +80,7 @@ public PDCIDFontType0(COSDictionary fontDictionary, PDType0Font parent) throws I PDStream ff3Stream = fd.getFontFile3(); if (ff3Stream != null) { - bytes = IOUtils.toByteArray(ff3Stream.createInputStream()); + bytes = ff3Stream.toByteArray(); } } @@ -97,7 +97,7 @@ else if (bytes != null) CFFParser cffParser = new CFFParser(); try { - cffFont = cffParser.parse(bytes, new ByteSource()).get(0); + cffFont = cffParser.parse(bytes, new FF3ByteSource()).get(0); } catch (IOException e) { @@ -119,6 +119,7 @@ else if (bytes != null) cidFont = null; t1Font = cffFont; } + cid2gid = readCIDToGIDMap(); isEmbedded = true; isDamaged = false; } @@ -131,9 +132,21 @@ else if (bytes != null) FontBoxFont font; if (mapping.isCIDFont()) { - cidFont = (CFFCIDFont)mapping.getFont().getCFF().getFont(); - t1Font = null; - font = cidFont; + cffFont = mapping.getFont().getCFF().getFont(); + if (cffFont instanceof CFFCIDFont) + { + cidFont = (CFFCIDFont) cffFont; + t1Font = null; + font = cidFont; + } + else + { + // PDFBOX-3515: OpenType fonts are loaded as CFFType1Font + CFFType1Font f = (CFFType1Font) cffFont; + cidFont = null; + t1Font = f; + font = f; + } } else { @@ -190,13 +203,12 @@ public final Matrix getFontMatrix() return fontMatrix; } - private class ByteSource implements CFFParser.ByteSource + private class FF3ByteSource implements CFFParser.ByteSource { @Override public byte[] getBytes() throws IOException { - PDStream ff3Stream = getFontDescriptor().getFontFile3(); - return IOUtils.toByteArray(ff3Stream.createInputStream()); + return getFontDescriptor().getFontFile3().toByteArray(); } } @@ -312,6 +324,11 @@ private String getGlyphName(int code) throws IOException public GeneralPath getPath(int code) throws IOException { int cid = codeToCID(code); + if (cid2gid != null && isEmbedded) + { + // PDFBOX-4093: despite being a type 0 font, there is a CIDToGIDMap + cid = cid2gid[cid]; + } Type2CharString charstring = getType2CharString(cid); if (charstring != null) { @@ -423,12 +440,16 @@ public float getHeight(int code) throws IOException { int cid = codeToCID(code); - float height = 0; + float height; if (!glyphHeights.containsKey(cid)) { - height = (float) getType2CharString(cid).getBounds().getHeight(); + height = (float) getType2CharString(cid).getBounds().getHeight(); glyphHeights.put(cid, height); } + else + { + height = glyphHeights.get(cid); + } return height; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java index aec43290c85..5ca5ba98468 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java @@ -18,25 +18,19 @@ import java.awt.geom.GeneralPath; import java.io.IOException; -import java.io.InputStream; -import java.util.HashMap; -import java.util.Map; +import java.util.HashSet; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.cff.Type2CharString; import org.apache.fontbox.cmap.CMap; -import org.apache.fontbox.ttf.CmapSubtable; +import org.apache.fontbox.ttf.CmapLookup; import org.apache.fontbox.ttf.GlyphData; import org.apache.fontbox.ttf.OTFParser; import org.apache.fontbox.ttf.OpenTypeFont; -import org.apache.fontbox.ttf.TTFParser; import org.apache.fontbox.ttf.TrueTypeFont; import org.apache.fontbox.util.BoundingBox; -import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.util.Matrix; @@ -54,9 +48,10 @@ public class PDCIDFontType2 extends PDCIDFont private final int[] cid2gid; private final boolean isEmbedded; private final boolean isDamaged; - private final CmapSubtable cmap; // may be null + private final CmapLookup cmap; // may be null private Matrix fontMatrix; private BoundingBox fontBBox; + private final Set noMapping = new HashSet(); /** * Constructor. @@ -93,61 +88,37 @@ public PDCIDFontType2(COSDictionary fontDictionary, PDType0Font parent, TrueType { boolean fontIsDamaged = false; TrueTypeFont ttfFont = null; - PDStream ff2Stream = fd.getFontFile2(); - PDStream ff3Stream = fd.getFontFile3(); - - // Acrobat looks in FontFile too, even though it is not in the spec, see PDFBOX-2599 - if (ff2Stream == null && ff3Stream == null) - { - ff2Stream = fd.getFontFile(); - } - - if (ff2Stream != null) + + PDStream stream = null; + if (fd != null) { - try + stream = fd.getFontFile2(); + if (stream == null) { - // embedded - TTFParser ttfParser = new TTFParser(true); - ttfFont = ttfParser.parse(ff2Stream.createInputStream()); + stream = fd.getFontFile3(); } - catch (NullPointerException e) // TTF parser is buggy - { - LOG.warn("Could not read embedded TTF for font " + getBaseFont(), e); - fontIsDamaged = true; - } - catch (IOException e) + if (stream == null) { - LOG.warn("Could not read embedded TTF for font " + getBaseFont(), e); - fontIsDamaged = true; + // Acrobat looks in FontFile too, even though it is not in the spec, see PDFBOX-2599 + stream = fd.getFontFile(); } } - else if (ff3Stream != null) + if (stream != null) { try { - // embedded + // embedded OTF or TTF OTFParser otfParser = new OTFParser(true); - OpenTypeFont otf = otfParser.parse(ff3Stream.createInputStream()); + OpenTypeFont otf = otfParser.parse(stream.createInputStream()); ttfFont = otf; if (otf.isPostScript()) { - // todo: we need more abstraction to support CFF fonts here - throw new IOException("Not implemented: OpenType font with CFF table " + - getBaseFont()); - } - - if (otf.hasLayoutTables()) - { - LOG.error("OpenType Layout tables used in font " + getBaseFont() + - " are not implemented in PDFBox and will be ignored"); + // PDFBOX-3344 contains PostScript outlines instead of TrueType + fontIsDamaged = true; + LOG.warn("Found CFF/OTF but expected embedded TTF font " + fd.getFontName()); } } - catch (NullPointerException e) // TTF parser is buggy - { - fontIsDamaged = true; - LOG.warn("Could not read embedded OTF for font " + getBaseFont(), e); - } catch (IOException e) { fontIsDamaged = true; @@ -159,31 +130,36 @@ else if (ff3Stream != null) if (ttfFont == null) { - // find font or substitute - CIDFontMapping mapping = FontMappers.instance() - .getCIDFont(getBaseFont(), getFontDescriptor(), - getCIDSystemInfo()); - - if (mapping.isCIDFont()) - { - ttfFont = mapping.getFont(); - } - else - { - ttfFont = (TrueTypeFont)mapping.getTrueTypeFont(); - } - - if (mapping.isFallback()) - { - LOG.warn("Using fallback font " + ttfFont.getName() + " for CID-keyed TrueType font " + getBaseFont()); - } + ttfFont = findFontOrSubstitute(); } ttf = ttfFont; } - cmap = ttf.getUnicodeCmap(false); + cmap = ttf.getUnicodeCmapLookup(false); cid2gid = readCIDToGIDMap(); } + private TrueTypeFont findFontOrSubstitute() throws IOException + { + TrueTypeFont ttfFont; + + CIDFontMapping mapping = FontMappers.instance() + .getCIDFont(getBaseFont(), getFontDescriptor(), + getCIDSystemInfo()); + if (mapping.isCIDFont()) + { + ttfFont = mapping.getFont(); + } + else + { + ttfFont = (TrueTypeFont)mapping.getTrueTypeFont(); + } + if (mapping.isFallback()) + { + LOG.warn("Using fallback font " + ttfFont.getName() + " for CID-keyed TrueType font " + getBaseFont()); + } + return ttfFont; + } + @Override public Matrix getFontMatrix() { @@ -207,10 +183,15 @@ public BoundingBox getBoundingBox() throws IOException private BoundingBox generateBoundingBox() throws IOException { - if (getFontDescriptor() != null) { + if (getFontDescriptor() != null) + { PDRectangle bbox = getFontDescriptor().getFontBoundingBox(); - if (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 || - bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0) { + if (bbox != null && + (Float.compare(bbox.getLowerLeftX(), 0) != 0 || + Float.compare(bbox.getLowerLeftY(), 0) != 0 || + Float.compare(bbox.getUpperRightX(), 0) != 0 || + Float.compare(bbox.getUpperRightY(), 0) != 0)) + { return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), bbox.getUpperRightX(), bbox.getUpperRightY()); } @@ -218,44 +199,6 @@ private BoundingBox generateBoundingBox() throws IOException return ttf.getFontBBox(); } - private int[] readCIDToGIDMap() throws IOException - { - int[] cid2gid = null; - COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP); - if (map instanceof COSStream) - { - COSStream stream = (COSStream) map; - - InputStream is = stream.createInputStream(); - byte[] mapAsBytes = IOUtils.toByteArray(is); - IOUtils.closeQuietly(is); - int numberOfInts = mapAsBytes.length / 2; - cid2gid = new int[numberOfInts]; - int offset = 0; - for (int index = 0; index < numberOfInts; index++) - { - int gid = (mapAsBytes[offset] & 0xff) << 8 | mapAsBytes[offset + 1] & 0xff; - cid2gid[index] = gid; - offset += 2; - } - } - return cid2gid; - } - - private Map invert(int[] cid2gid) - { - if (cid2gid == null) - { - return null; - } - Map inverse = new HashMap(cid2gid.length); - for (int i = 0; i < cid2gid.length; i++) - { - inverse.put(cid2gid[i], i); - } - return inverse; - } - @Override public int codeToCID(int code) { @@ -264,7 +207,11 @@ public int codeToCID(int code) // Acrobat allows bad PDFs to use Unicode CMaps here instead of CID CMaps, see PDFBOX-1283 if (!cMap.hasCIDMappings() && cMap.hasUnicodeMappings()) { - return cMap.toUnicode(code).codePointAt(0); // actually: code -> CID + String unicode = cMap.toUnicode(code); + if (unicode != null) + { + return unicode.codePointAt(0); // actually: code -> CID + } } return cMap.toCID(code); @@ -292,7 +239,14 @@ public int codeToGID(int code) throws IOException // Acrobat allows non-embedded GIDs - todo: can we find a test PDF for this? LOG.warn("Using non-embedded GIDs in font " + getName()); int cid = codeToCID(code); - return cid2gid[cid]; + if (cid < cid2gid.length) + { + return cid2gid[cid]; + } + else + { + return 0; + } } else { @@ -300,7 +254,12 @@ public int codeToGID(int code) throws IOException String unicode = parent.toUnicode(code); if (unicode == null) { - LOG.warn("Failed to find a character mapping for " + code + " in " + getName()); + if (!noMapping.contains(code)) + { + // we keep track of which warnings have been issued, so we don't log multiple times + noMapping.add(code); + LOG.warn("Failed to find a character mapping for " + code + " in " + getName()); + } // Acrobat is willing to use the CID as a GID, even when the font isn't embedded // see PDFBOX-2599 return codeToCID(code); @@ -396,7 +355,12 @@ public byte[] encode(int unicode) // otherwise we require an explicit ToUnicode CMap if (cid == -1) { - // todo: invert the ToUnicode CMap? + CMap toUnicodeCMap = parent.getToUnicodeCMap(); + byte[] codes = toUnicodeCMap.getCodesFromUnicode(Character.toString((char) unicode)); + if (codes != null) + { + return codes; + } cid = 0; } } @@ -409,7 +373,7 @@ public byte[] encode(int unicode) if (cid == 0) { throw new IllegalArgumentException( - String.format("No glyph for U+%04X in font %s", unicode, getName())); + String.format("No glyph for U+%04X (%c) in font %s", unicode, (char) unicode, getName())); } // CID is always 2-bytes (16-bit) for TrueType @@ -442,7 +406,9 @@ public GeneralPath getPath(int code) throws IOException { if (ttf instanceof OpenTypeFont && ((OpenTypeFont)ttf).isPostScript()) { - int cid = codeToCID(code); + // we're not supposed to have CFF fonts inside PDCIDFontType2, but if we do, + // then we treat their CIDs as GIDs, see PDFBOX-3344 + int cid = codeToGID(code); Type2CharString charstring = ((OpenTypeFont)ttf).getCFF().getFont().getType2CharString(cid); return charstring.getPath(); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java index 672edfbf82b..00c4b6ffaaa 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java @@ -23,11 +23,19 @@ import java.io.InputStream; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.ttf.GlyphData; +import org.apache.fontbox.ttf.GlyphTable; +import org.apache.fontbox.ttf.HorizontalMetricsTable; import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.fontbox.ttf.VerticalHeaderTable; +import org.apache.fontbox.ttf.VerticalMetricsTable; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSInteger; @@ -44,11 +52,14 @@ */ final class PDCIDFontType2Embedder extends TrueTypeEmbedder { + + private static final Log LOG = LogFactory.getLog(PDCIDFontType2Embedder.class); + private final PDDocument document; private final PDType0Font parent; private final COSDictionary dict; private final COSDictionary cidFont; - private final Map gidToUni; + private final boolean vertical; /** * Creates a new TrueType font embedder for the given TTF as a PDCIDFontType2. @@ -60,17 +71,18 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder * @throws IOException if the TTF could not be read */ PDCIDFontType2Embedder(PDDocument document, COSDictionary dict, TrueTypeFont ttf, - boolean embedSubset, PDType0Font parent) throws IOException + boolean embedSubset, PDType0Font parent, boolean vertical) throws IOException { super(document, dict, ttf, embedSubset); this.document = document; this.dict = dict; this.parent = parent; + this.vertical = vertical; // parent Type 0 font dict.setItem(COSName.SUBTYPE, COSName.TYPE0); dict.setName(COSName.BASE_FONT, fontDescriptor.getFontName()); - dict.setItem(COSName.ENCODING, COSName.IDENTITY_H); // CID = GID + dict.setItem(COSName.ENCODING, vertical ? COSName.IDENTITY_V : COSName.IDENTITY_H); // CID = GID // descendant CIDFont cidFont = createCIDFont(); @@ -78,19 +90,11 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder descendantFonts.add(cidFont); dict.setItem(COSName.DESCENDANT_FONTS, descendantFonts); - // build GID -> Unicode map - gidToUni = new HashMap(ttf.getMaximumProfile().getNumGlyphs()); - for (int gid = 1, max = ttf.getMaximumProfile().getNumGlyphs(); gid <= max; gid++) + if (!embedSubset) { - // skip composite glyph components that have no code point - Integer codePoint = cmap.getCharacterCode(gid); - if (codePoint != null) - { - gidToUni.put(gid, codePoint); // CID = GID - } + // build GID -> Unicode map + buildToUnicodeCMap(null); } - // ToUnicode CMap - buildToUnicodeCMap(null); } /** @@ -108,14 +112,19 @@ protected void buildSubset(InputStream ttfSubset, String tag, Map newGIDToOldCID) throws IOException @@ -143,9 +152,11 @@ private void buildToUnicodeCMap(Map newGIDToOldCID) throws IOE } // skip composite glyph components that have no code point - Integer codePoint = gidToUni.get(cid); // old GID -> Unicode - if (codePoint != null) + List codes = cmapLookup.getCharCodes(cid); // old GID -> Unicode + if (codes != null) { + // use the first entry even for ambiguous mappings + int codePoint = codes.get(0); if (codePoint > 0xFFFF) { hasSurrogates = true; @@ -203,13 +214,19 @@ private COSDictionary createCIDFont() throws IOException // W - widths buildWidths(cidFont); + // Vertical metrics + if (vertical) + { + buildVerticalMetrics(cidFont); + } + // CIDToGIDMap cidFont.setItem(COSName.CID_TO_GID_MAP, COSName.IDENTITY); return cidFont; } - private void addNameTag(String tag) throws IOException + private void addNameTag(String tag) { String name = fontDescriptor.getFontName(); String newName = tag + name; @@ -239,18 +256,19 @@ private void buildCIDToGIDMap(Map cidToGid) throws IOException InputStream input = new ByteArrayInputStream(out.toByteArray()); PDStream stream = new PDStream(document, input, COSName.FLATE_DECODE); - stream.getCOSObject().setInt(COSName.LENGTH1, stream.toByteArray().length); cidFont.setItem(COSName.CID_TO_GID_MAP, stream); } /** - * Builds the CIDSet entry, required by PDF/A. This lists all CIDs in the font. + * Builds the CIDSet entry, required by PDF/A. This lists all CIDs in the font, including those + * that don't have a GID. */ private void buildCIDSet(Map cidToGid) throws IOException { - byte[] bytes = new byte[Collections.max(cidToGid.keySet()) / 8 + 1]; - for (int cid : cidToGid.keySet()) + int cidMax = Collections.max(cidToGid.keySet()); + byte[] bytes = new byte[cidMax / 8 + 1]; + for (int cid = 0; cid <= cidMax; cid++) { int mask = 1 << 7 - cid % 8; bytes[cid / 8] |= mask; @@ -277,7 +295,12 @@ private void buildWidths(Map cidToGid) throws IOException for (int cid : keys) { int gid = cidToGid.get(cid); - float width = ttf.getHorizontalMetrics().getAdvanceWidth(gid) * scaling; + long width = Math.round(ttf.getHorizontalMetrics().getAdvanceWidth(gid) * scaling); + if (width == 1000) + { + // skip default width + continue; + } // c [w1 w2 ... wn] if (prev != cid - 1) { @@ -285,12 +308,96 @@ private void buildWidths(Map cidToGid) throws IOException widths.add(COSInteger.get(cid)); // c widths.add(ws); } - ws.add(COSInteger.get(Math.round(width))); // wi + ws.add(COSInteger.get(width)); // wi prev = cid; } cidFont.setItem(COSName.W, widths); } + private boolean buildVerticalHeader(COSDictionary cidFont) throws IOException + { + VerticalHeaderTable vhea = ttf.getVerticalHeader(); + if (vhea == null) + { + LOG.warn("Font to be subset is set to vertical, but has no 'vhea' table"); + return false; + } + + float scaling = 1000f / ttf.getHeader().getUnitsPerEm(); + + long v = Math.round(vhea.getAscender() * scaling); + long w1 = Math.round(-vhea.getAdvanceHeightMax() * scaling); + if (v != 880 || w1 != -1000) + { + COSArray cosDw2 = new COSArray(); + cosDw2.add(COSInteger.get(v)); + cosDw2.add(COSInteger.get(w1)); + cidFont.setItem(COSName.DW2, cosDw2); + } + return true; + } + + /** + * Builds vertical metrics with a custom CIDToGIDMap (for embedding font subset). + */ + private void buildVerticalMetrics(Map cidToGid) throws IOException + { + // The "vhea" and "vmtx" tables that specify vertical metrics shall never be used by a conforming + // reader. The only way to specify vertical metrics in PDF shall be by means of the DW2 and W2 + // entries in a CIDFont dictionary. + + if (!buildVerticalHeader(cidFont)) + { + return; + } + + float scaling = 1000f / ttf.getHeader().getUnitsPerEm(); + + VerticalHeaderTable vhea = ttf.getVerticalHeader(); + VerticalMetricsTable vmtx = ttf.getVerticalMetrics(); + GlyphTable glyf = ttf.getGlyph(); + HorizontalMetricsTable hmtx = ttf.getHorizontalMetrics(); + + long v_y = Math.round(vhea.getAscender() * scaling); + long w1 = Math.round(-vhea.getAdvanceHeightMax() * scaling); + + COSArray heights = new COSArray(); + COSArray w2 = new COSArray(); + int prev = Integer.MIN_VALUE; + // Use a sorted list to get an optimal width array + Set keys = new TreeSet(cidToGid.keySet()); + for (int cid : keys) + { + // Unlike buildWidths, we look up with cid (not gid) here because this is + // the original TTF, not the rebuilt one. + GlyphData glyph = glyf.getGlyph(cid); + if (glyph == null) + { + continue; + } + long height = Math.round((glyph.getYMaximum() + vmtx.getTopSideBearing(cid)) * scaling); + long advance = Math.round(-vmtx.getAdvanceHeight(cid) * scaling); + if (height == v_y && advance == w1) + { + // skip default metrics + continue; + } + // c [w1_1y v_1x v_1y w1_2y v_2x v_2y ... w1_ny v_nx v_ny] + if (prev != cid - 1) + { + w2 = new COSArray(); + heights.add(COSInteger.get(cid)); // c + heights.add(w2); + } + w2.add(COSInteger.get(advance)); // w1_iy + long width = Math.round(hmtx.getAdvanceWidth(cid) * scaling); + w2.add(COSInteger.get(width / 2)); // v_ix + w2.add(COSInteger.get(height)); // v_iy + prev = cid; + } + cidFont.setItem(COSName.W2, heights); + } + /** * Build widths with Identity CIDToGIDMap (for embedding full font). */ @@ -324,7 +431,7 @@ private COSArray getWidths(int[] widths) throws IOException long lastCid = widths[0]; long lastValue = Math.round(widths[1] * scaling); - COSArray inner = null; + COSArray inner = new COSArray(); COSArray outer = new COSArray(); outer.add(COSInteger.get(lastCid)); @@ -408,6 +515,160 @@ else if (cid == lastCid + 1) return outer; } + /** + * Build vertical metrics with Identity CIDToGIDMap (for embedding full font). + */ + private void buildVerticalMetrics(COSDictionary cidFont) throws IOException + { + if (!buildVerticalHeader(cidFont)) + { + return; + } + + int cidMax = ttf.getNumberOfGlyphs(); + int[] gidMetrics = new int[cidMax * 4]; + for (int cid = 0; cid < cidMax; cid++) + { + GlyphData glyph = ttf.getGlyph().getGlyph(cid); + if (glyph == null) + { + gidMetrics[cid * 4] = Integer.MIN_VALUE; + } + else + { + gidMetrics[cid * 4] = cid; + gidMetrics[cid * 4 + 1] = ttf.getVerticalMetrics().getAdvanceHeight(cid); + gidMetrics[cid * 4 + 2] = ttf.getHorizontalMetrics().getAdvanceWidth(cid); + gidMetrics[cid * 4 + 3] = glyph.getYMaximum() + ttf.getVerticalMetrics().getTopSideBearing(cid); + } + } + + cidFont.setItem(COSName.W2, getVerticalMetrics(gidMetrics)); + } + + private COSArray getVerticalMetrics(int[] values) throws IOException + { + if (values.length == 0) + { + throw new IllegalArgumentException("length of values must be > 0"); + } + + float scaling = 1000f / ttf.getHeader().getUnitsPerEm(); + + long lastCid = values[0]; + long lastW1Value = Math.round(-values[1] * scaling); + long lastVxValue = Math.round(values[2] * scaling / 2f); + long lastVyValue = Math.round(values[3] * scaling); + + COSArray inner = new COSArray(); + COSArray outer = new COSArray(); + outer.add(COSInteger.get(lastCid)); + + State state = State.FIRST; + + for (int i = 4; i < values.length; i += 4) + { + long cid = values[i]; + if (cid == Integer.MIN_VALUE) + { + // no glyph for this cid + continue; + } + long w1Value = Math.round(-values[i + 1] * scaling); + long vxValue = Math.round(values[i + 2] * scaling / 2); + long vyValue = Math.round(values[i + 3] * scaling); + + switch (state) + { + case FIRST: + if (cid == lastCid + 1 && w1Value == lastW1Value && vxValue == lastVxValue && vyValue == lastVyValue) + { + state = State.SERIAL; + } + else if (cid == lastCid + 1) + { + state = State.BRACKET; + inner = new COSArray(); + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + } + else + { + inner = new COSArray(); + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + outer.add(inner); + outer.add(COSInteger.get(cid)); + } + break; + case BRACKET: + if (cid == lastCid + 1 && w1Value == lastW1Value && vxValue == lastVxValue && vyValue == lastVyValue) + { + state = State.SERIAL; + outer.add(inner); + outer.add(COSInteger.get(lastCid)); + } + else if (cid == lastCid + 1) + { + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + } + else + { + state = State.FIRST; + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + outer.add(inner); + outer.add(COSInteger.get(cid)); + } + break; + case SERIAL: + if (cid != lastCid + 1 || w1Value != lastW1Value || vxValue != lastVxValue || vyValue != lastVyValue) + { + outer.add(COSInteger.get(lastCid)); + outer.add(COSInteger.get(lastW1Value)); + outer.add(COSInteger.get(lastVxValue)); + outer.add(COSInteger.get(lastVyValue)); + outer.add(COSInteger.get(cid)); + state = State.FIRST; + } + break; + } + lastW1Value = w1Value; + lastVxValue = vxValue; + lastVyValue = vyValue; + lastCid = cid; + } + + switch (state) + { + case FIRST: + inner = new COSArray(); + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + outer.add(inner); + break; + case BRACKET: + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + outer.add(inner); + break; + case SERIAL: + outer.add(COSInteger.get(lastCid)); + outer.add(COSInteger.get(lastW1Value)); + outer.add(COSInteger.get(lastVxValue)); + outer.add(COSInteger.get(lastVyValue)); + break; + } + return outer; + } + /** * Returns the descendant CIDFont. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java index d2d17ccc0d1..72e885d2e5c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java @@ -21,12 +21,14 @@ import java.io.IOException; import java.io.InputStream; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.afm.FontMetrics; import org.apache.fontbox.cmap.CMap; -import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; @@ -52,12 +54,17 @@ public abstract class PDFont implements COSObjectable, PDFontLike protected final COSDictionary dict; private final CMap toUnicodeCMap; - private final FontMetrics afmStandard14; // AFM for standard 14 fonts + + /** + * AFM for standard 14 fonts + */ + private final FontMetrics afmStandard14; private PDFontDescriptor fontDescriptor; private List widths; private float avgFontWidth; private float fontWidthOfSpace = -1f; + private final Map codeToWidthMap; /** * Constructor for embedding. @@ -69,6 +76,7 @@ public abstract class PDFont implements COSObjectable, PDFontLike toUnicodeCMap = null; fontDescriptor = null; afmStandard14 = null; + codeToWidthMap = new HashMap(); } /** @@ -85,50 +93,80 @@ public abstract class PDFont implements COSObjectable, PDFontLike throw new IllegalArgumentException("No AFM for font " + baseFont); } fontDescriptor = PDType1FontEmbedder.buildFontDescriptor(afmStandard14); + // standard 14 fonts may be accessed concurrently, as they are singletons + codeToWidthMap = new ConcurrentHashMap(); } /** * Constructor. * * @param fontDictionary Font dictionary. + * + * @throws java.io.IOException */ protected PDFont(COSDictionary fontDictionary) throws IOException { dict = fontDictionary; + codeToWidthMap = new HashMap(); // standard 14 fonts use an AFM afmStandard14 = Standard14Fonts.getAFM(getName()); // may be null (it usually is) + fontDescriptor = loadFontDescriptor(); + toUnicodeCMap = loadUnicodeCmap(); + } - // font descriptor - COSDictionary fd = (COSDictionary) dict.getDictionaryObject(COSName.FONT_DESC); + private PDFontDescriptor loadFontDescriptor() + { + COSDictionary fd = dict.getCOSDictionary(COSName.FONT_DESC); if (fd != null) { - fontDescriptor = new PDFontDescriptor(fd); + return new PDFontDescriptor(fd); } else if (afmStandard14 != null) { // build font descriptor from the AFM - fontDescriptor = PDType1FontEmbedder.buildFontDescriptor(afmStandard14); + return PDType1FontEmbedder.buildFontDescriptor(afmStandard14); } else { - fontDescriptor = null; + return null; } + } - // ToUnicode CMap + private CMap loadUnicodeCmap() + { COSBase toUnicode = dict.getDictionaryObject(COSName.TO_UNICODE); - if (toUnicode != null) + if (toUnicode == null) + { + return null; + } + + CMap cmap = null; + try { - toUnicodeCMap = readCMap(toUnicode); - if (toUnicodeCMap != null && !toUnicodeCMap.hasUnicodeMappings()) + cmap = readCMap(toUnicode); + if (cmap != null && !cmap.hasUnicodeMappings()) { LOG.warn("Invalid ToUnicode CMap in font " + getName()); + String cmapName = cmap.getName() != null ? cmap.getName() : ""; + String ordering = cmap.getOrdering() != null ? cmap.getOrdering() : ""; + COSBase encoding = dict.getDictionaryObject(COSName.ENCODING); + if (cmapName.contains("Identity") // + || ordering.contains("Identity") // + || COSName.IDENTITY_H.equals(encoding) // + || COSName.IDENTITY_V.equals(encoding)) + { + // assume that if encoding is identity, then the reverse is also true + cmap = CMapManager.getPredefinedCMap(COSName.IDENTITY_H.getName()); + LOG.warn("Using predefined identity CMap instead"); + } } } - else + catch (IOException ex) { - toUnicodeCMap = null; + LOG.error("Could not read ToUnicode CMap in font " + getName(), ex); } + return cmap; } /** @@ -153,8 +191,6 @@ protected final void setFontDescriptor(PDFontDescriptor fontDescriptor) this.fontDescriptor = fontDescriptor; } - /** - /** * Reads a CMap given a COS Stream or Name. May return null if a predefined CMap does not exist. * @@ -215,6 +251,12 @@ public Vector getDisplacement(int code) throws IOException @Override public float getWidth(int code) throws IOException { + Float width = codeToWidthMap.get(code); + if (width != null) + { + return width; + } + // Acrobat overrides the widths in the font program on the conforming reader's system with // the widths specified in the font dictionary." (Adobe Supplement to the ISO 32000) // @@ -222,7 +264,7 @@ public float getWidth(int code) throws IOException // embedded", however PDFBOX-427 shows that it also applies to embedded fonts. // Type1, Type1C, Type3 - if (dict.containsKey(COSName.WIDTHS) || dict.containsKey(COSName.MISSING_WIDTH)) + if (dict.getDictionaryObject(COSName.WIDTHS) != null || dict.containsKey(COSName.MISSING_WIDTH)) { int firstChar = dict.getInt(COSName.FIRST_CHAR, -1); int lastChar = dict.getInt(COSName.LAST_CHAR, -1); @@ -230,25 +272,37 @@ public float getWidth(int code) throws IOException int idx = code - firstChar; if (siz > 0 && code >= firstChar && code <= lastChar && idx < siz) { - return getWidths().get(idx); + width = getWidths().get(idx); + if (width == null) + { + width = 0f; + } + codeToWidthMap.put(code, width); + return width; } PDFontDescriptor fd = getFontDescriptor(); - if (fd != null && fd.hasMissingWidth()) + if (fd != null) { // get entry from /MissingWidth entry - return fd.getMissingWidth(); + width = fd.getMissingWidth(); + codeToWidthMap.put(code, width); + return width; } } // standard 14 font widths are specified by an AFM if (isStandard14()) { - return getStandard14Width(code); + width = getStandard14Width(code); + codeToWidthMap.put(code, width); + return width; } // if there's nothing to override with, then obviously we fall back to the font - return getWidthFromFont(code); + width = getWidthFromFont(code); + codeToWidthMap.put(code, width); + return width; } /** @@ -258,15 +312,6 @@ public float getWidth(int code) throws IOException * @return width in 1/1000 text space */ protected abstract float getStandard14Width(int code); - - @Override - public abstract float getWidthFromFont(int code) throws IOException; - - @Override - public abstract boolean isEmbedded(); - - @Override - public abstract float getHeight(int code) throws IOException; /** * Encodes the given string for use in a PDF content stream. @@ -274,11 +319,13 @@ public float getWidth(int code) throws IOException * @param text Any Unicode text. * @return Array of PDF content stream bytes. * @throws IOException If the text could not be encoded. + * @throws IllegalArgumentException if a character isn't supported by the font. */ public final byte[] encode(String text) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); - for (int offset = 0; offset < text.length(); ) + int offset = 0; + while (offset < text.length()) { int codePoint = text.codePointAt(offset); @@ -300,6 +347,7 @@ public final byte[] encode(String text) throws IOException * @param unicode Unicode code point. * @return Array of 1 to 4 PDF content stream bytes. * @throws IOException If the text could not be encoded. + * @throws IllegalArgumentException if a character isn't supported by the font. */ protected abstract byte[] encode(int unicode) throws IOException; @@ -309,6 +357,7 @@ public final byte[] encode(String text) throws IOException * @param text The text to get the width of. * @return The width of the string in 1/1000 units of text space. * @throws IOException If there is an error getting the width information. + * @throws IllegalArgumentException if a character isn't supported by the font. */ public float getStringWidth(String text) throws IOException { @@ -343,7 +392,7 @@ public float getAverageFontWidth() { float totalWidth = 0.0f; float characterCount = 0.0f; - COSArray widths = (COSArray) dict.getDictionaryObject(COSName.WIDTHS); + COSArray widths = dict.getCOSArray(COSName.WIDTHS); if (widths != null) { for (int i = 0; i < widths.size(); i++) @@ -402,13 +451,16 @@ public String toUnicode(int code) throws IOException // if the font dictionary containsName a ToUnicode CMap, use that CMap if (toUnicodeCMap != null) { - if (toUnicodeCMap.getName() != null && toUnicodeCMap.getName().startsWith("Identity-") && - dict.getDictionaryObject(COSName.TO_UNICODE) instanceof COSName) + if (toUnicodeCMap.getName() != null && + toUnicodeCMap.getName().startsWith("Identity-") && + (dict.getDictionaryObject(COSName.TO_UNICODE) instanceof COSName || + !toUnicodeCMap.hasUnicodeMappings())) { // handle the undocumented case of using Identity-H/V as a ToUnicode CMap, this // isn't actually valid as the Identity-x CMaps are code->CID maps, not // code->Unicode maps. See sample_fonts_solidconvertor.pdf for an example. // PDFBOX-3123: do this only if the /ToUnicode entry is a name + // PDFBOX-4322: identity streams are OK too return new String(new char[] { (char) code }); } else @@ -441,12 +493,6 @@ public String getSubType() return dict.getNameAsString(COSName.SUBTYPE); } - @Override - public abstract String getName(); - - @Override - public abstract BoundingBox getBoundingBox() throws IOException; - /** * The widths of the characters. This will be null for the standard 14 fonts. * @@ -456,7 +502,7 @@ protected final List getWidths() { if (widths == null) { - COSArray array = (COSArray) dict.getDictionaryObject(COSName.WIDTHS); + COSArray array = dict.getCOSArray(COSName.WIDTHS); if (array != null) { widths = COSArrayList.convertFloatCOSArrayToList(array); @@ -487,7 +533,7 @@ public float getSpaceWidth() COSBase toUnicode = dict.getDictionaryObject(COSName.TO_UNICODE); try { - if (toUnicode != null) + if (toUnicode != null && toUnicodeCMap != null) { int spaceMapping = toUnicodeCMap.getSpaceMapping(); if (spaceMapping > -1) @@ -499,6 +545,12 @@ public float getSpaceWidth() { fontWidthOfSpace = getWidth(32); } + + // try to get it from the font itself + if (fontWidthOfSpace <= 0) + { + fontWidthOfSpace = getWidthFromFont(32); + } // use the average font width as fall back if (fontWidthOfSpace <= 0) { @@ -524,7 +576,7 @@ public float getSpaceWidth() */ public boolean isStandard14() { - // this logic is based on Acrobat's behaviour, see see PDFBOX-2372 + // this logic is based on Acrobat's behaviour, see PDFBOX-2372 // embedded fonts never get special treatment if (isEmbedded()) @@ -554,9 +606,6 @@ public boolean isStandard14() * Returns true if this font will be subset when embedded. */ public abstract boolean willBeSubset(); - - @Override - public abstract boolean isDamaged(); @Override public boolean equals(Object other) @@ -575,4 +624,14 @@ public String toString() { return getClass().getSimpleName() + " " + getName(); } + + /** + * Get the /ToUnicode CMap. + * + * @return The /ToUnicode CMap or null if there is none. + */ + protected CMap getToUnicodeCMap() + { + return toUnicodeCMap; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontDescriptor.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontDescriptor.java index 407009ffb53..b8d832f28c5 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontDescriptor.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontDescriptor.java @@ -262,7 +262,7 @@ private void setFlagBit( int bit, boolean value ) } else { - flags = flags & (0xFFFFFFFF ^ bit); + flags = flags & (~bit); } setFlags( flags ); } @@ -429,7 +429,7 @@ public void setFlags( int flags ) */ public PDRectangle getFontBoundingBox() { - COSArray rect = (COSArray)dic.getDictionaryObject( COSName.FONT_BBOX ); + COSArray rect = dic.getCOSArray(COSName.FONT_BBOX); PDRectangle retval = null; if( rect != null ) { @@ -441,7 +441,7 @@ public PDRectangle getFontBoundingBox() /** * Set the fonts bounding box. * - * @param rect The new bouding box. + * @param rect The new bounding box. */ public void setFontBoundingBox( PDRectangle rect ) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java index a02b4af39d6..0e8ecac081f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontFactory.java @@ -23,6 +23,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.pdmodel.ResourceCache; /** * Creates the appropriate font subtype based on information in the dictionary. @@ -44,6 +45,19 @@ private PDFontFactory() * @throws IOException if something goes wrong */ public static PDFont createFont(COSDictionary dictionary) throws IOException + { + return createFont(dictionary, null); + } + + /** + * Creates a new PDFont instance with the appropriate subclass. + * + * @param dictionary a font dictionary + * @param resourceCache resource cache, only useful for type 3 fonts, can be null + * @return a PDFont instance, based on the SubType entry of the dictionary + * @throws IOException if something goes wrong + */ + public static PDFont createFont(COSDictionary dictionary, ResourceCache resourceCache) throws IOException { COSName type = dictionary.getCOSName(COSName.TYPE, COSName.FONT); if (!COSName.FONT.equals(type)) @@ -76,7 +90,7 @@ else if (COSName.TRUE_TYPE.equals(subType)) } else if (COSName.TYPE3.equals(subType)) { - return new PDType3Font(dictionary); + return new PDType3Font(dictionary, resourceCache); } else if (COSName.TYPE0.equals(subType)) { @@ -135,13 +149,11 @@ else if (COSName.CID_FONT_TYPE2.equals(subType)) * * @return a default font * @throws IOException if something goes wrong + * @deprecated use {@link PDType1Font#HELVETICA} */ + @Deprecated public static PDFont createDefaultFont() throws IOException { - COSDictionary dict = new COSDictionary(); - dict.setItem(COSName.TYPE, COSName.FONT); - dict.setItem(COSName.SUBTYPE, COSName.TRUE_TYPE); - dict.setString(COSName.BASE_FONT, "Arial"); - return createFont(dict); + return PDType1Font.HELVETICA; } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontLike.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontLike.java index ad92a668b43..99a57e6879b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontLike.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFontLike.java @@ -61,31 +61,44 @@ public interface PDFontLike /** * Returns the height of the given character, in glyph space. This can be expensive to - * calculate. Results are only approximate.

+ * calculate. Results are only approximate.

* * Warning: This method is deprecated in PDFBox 2.0 because there is no meaningful value - * which it can return. The {@link #getWidth} method returns the advance width of a glyph, + * which it can return. The {@link #getWidth(int)} method returns the advance width of a glyph, * but there is no corresponding advance height. The logical height of a character is the same * for every character in a font, so if you want that, retrieve the font bbox's height. * Otherwise if you want the visual bounds of the glyph then call getPath(..) on the appropriate - * PDFont subclass to retrieve the glyph outline as a GeneralPath. + * PDFont subclass to retrieve the glyph outline as a GeneralPath. See the cyan rectangles in + * the DrawPrintTextLocations.java example to see this in action. * * @param code character code - * @deprecated Use {@link #getBoundingBox().#getHeight(int)} instead. + * @deprecated Use + * {@link #getBoundingBox() getBoundingBox()}.{@link BoundingBox#getHeight() getHeight()} + * instead. */ @Deprecated float getHeight(int code) throws IOException; /** - * Returns the advance width of the given character, in glyph space.

+ * Returns the advance width of the given character, in glyph space.

* * If you want the visual bounds of the glyph then call getPath(..) on the appropriate - * PDFont subclass to retrieve the glyph outline as a GeneralPath instead. + * PDFont subclass to retrieve the glyph outline as a GeneralPath instead. See the cyan + * rectangles in the DrawPrintTextLocations.java example to see this in action. * * @param code character code */ float getWidth(int code) throws IOException; + /** + * Returns true if the Font dictionary specifies an explicit width for the given glyph. + * This includes Width, W but not default widths entries. + * + * @param code character code + * @throws IOException if the font could not be read + */ + boolean hasExplicitWidth(int code) throws IOException; + /** * Returns the width of a glyph in the embedded font file. * diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDPanoseClassification.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDPanoseClassification.java index bfd3823c828..58c3cf9f701 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDPanoseClassification.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDPanoseClassification.java @@ -18,16 +18,15 @@ package org.apache.pdfbox.pdmodel.font; /** - * Represents a 10-byte PANOSE classification. + * Represents a 10-byte PANOSE classification. * - * @link http://www.monotype.com/services/pan2 * @author John Hewson */ public class PDPanoseClassification { private final byte[] bytes; - PDPanoseClassification(byte[] bytes) + public PDPanoseClassification(byte[] bytes) { this.bytes = bytes; } @@ -90,7 +89,7 @@ public byte[] getBytes() @Override public String toString() { - return "{ FamilyType = " + getFamilyKind() + ", " + + return "{ FamilyKind = " + getFamilyKind() + ", " + "SerifStyle = " + getSerifStyle() + ", " + "Weight = " + getWeight() + ", " + "Proportion = " + getProportion() + ", " + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java index d2a1368ba21..2cc56c4826f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java @@ -47,14 +47,12 @@ public abstract class PDSimpleFont extends PDFont protected GlyphList glyphList; private Boolean isSymbolic; private final Set noUnicode = new HashSet(); // for logging - private Map invertedEncoding; // for writing - + /** * Constructor for embedding. */ PDSimpleFont() { - super(); } /** @@ -63,18 +61,7 @@ public abstract class PDSimpleFont extends PDFont PDSimpleFont(String baseFont) { super(baseFont); - - this.encoding = WinAnsiEncoding.INSTANCE; - - // assign the glyph list based on the font - if ("ZapfDingbats".equals(baseFont)) - { - glyphList = GlyphList.getZapfDingbats(); - } - else - { - glyphList = GlyphList.getAdobeGlyphList(); - } + assignGlyphList(baseFont); } /** @@ -95,57 +82,49 @@ public abstract class PDSimpleFont extends PDFont */ protected void readEncoding() throws IOException { - COSBase encoding = dict.getDictionaryObject(COSName.ENCODING); - if (encoding != null) + COSBase encodingBase = dict.getDictionaryObject(COSName.ENCODING); + if (encodingBase instanceof COSName) { - if (encoding instanceof COSName) + COSName encodingName = (COSName) encodingBase; + this.encoding = Encoding.getInstance(encodingName); + if (this.encoding == null) { - COSName encodingName = (COSName)encoding; - this.encoding = Encoding.getInstance(encodingName); - if (this.encoding == null) - { - LOG.warn("Unknown encoding: " + encodingName.getName()); - this.encoding = readEncodingFromFont(); // fallback - } + LOG.warn("Unknown encoding: " + encodingName.getName()); + this.encoding = readEncodingFromFont(); // fallback } - else if (encoding instanceof COSDictionary) + } + else if (encodingBase instanceof COSDictionary) + { + COSDictionary encodingDict = (COSDictionary) encodingBase; + Encoding builtIn = null; + Boolean symbolic = getSymbolicFlag(); + + COSName baseEncoding = encodingDict.getCOSName(COSName.BASE_ENCODING); + + boolean hasValidBaseEncoding = baseEncoding != null && + Encoding.getInstance(baseEncoding) != null; + + if (!hasValidBaseEncoding && Boolean.TRUE.equals(symbolic)) { - COSDictionary encodingDict = (COSDictionary)encoding; - Encoding builtIn = null; - Boolean symbolic = getSymbolicFlag(); - boolean isFlaggedAsSymbolic = symbolic != null && symbolic; - if (!encodingDict.containsKey(COSName.BASE_ENCODING) && isFlaggedAsSymbolic) - { - builtIn = readEncodingFromFont(); - } + builtIn = readEncodingFromFont(); + } - if (symbolic == null) - { - symbolic = false; - } - this.encoding = new DictionaryEncoding(encodingDict, !symbolic, builtIn); + if (symbolic == null) + { + symbolic = false; } + this.encoding = new DictionaryEncoding(encodingDict, !symbolic, builtIn); } - else + else if (encodingBase == null) { this.encoding = readEncodingFromFont(); } // normalise the standard 14 name, e.g "Symbol,Italic" -> "Symbol" String standard14Name = Standard14Fonts.getMappedFontName(getName()); - - // assign the glyph list based on the font - if ("ZapfDingbats".equals(standard14Name)) - { - glyphList = GlyphList.getZapfDingbats(); - } - else - { - // StandardEncoding and Symbol are in the AGL - glyphList = GlyphList.getAdobeGlyphList(); - } + assignGlyphList(standard14Name); } - + /** * Called by readEncoding() if the encoding needs to be extracted from the font file. * @@ -231,7 +210,7 @@ else if (encoding instanceof DictionaryEncoding) // each name in Differences array must also be in the latin character set for (String name : ((DictionaryEncoding)encoding).getDifferences().values()) { - if (name.equals(".notdef")) + if (".notdef".equals(name)) { // skip } @@ -346,11 +325,24 @@ protected final float getStandard14Width(int code) String nameInAFM = getEncoding().getName(code); // the Adobe AFMs don't include .notdef, but Acrobat uses 250, test with PDFBOX-2334 - if (nameInAFM.equals(".notdef")) + if (".notdef".equals(nameInAFM)) { return 250f; } + if ("nbspace".equals(nameInAFM)) + { + // PDFBOX-4944: nbspace is missing in AFM files, + // but PDF specification tells "it shall be typographically the same as SPACE" + nameInAFM = "space"; + } + else if ("sfthyphen".equals(nameInAFM)) + { + // PDFBOX-5115: sfthyphen is missing in AFM files, + // but PDF specification tells "it shall be typographically the same as hyphen" + nameInAFM = "hyphen"; + } + return getStandard14AFM().getCharacterWidth(nameInAFM); } throw new IllegalStateException("No AFM"); @@ -359,7 +351,7 @@ protected final float getStandard14Width(int code) @Override public boolean isStandard14() { - // this logic is based on Acrobat's behaviour, see see PDFBOX-2372 + // this logic is based on Acrobat's behaviour, see PDFBOX-2372 // the Encoding entry cannot have Differences if we want "standard 14" font handling if (getEncoding() instanceof DictionaryEncoding) { @@ -420,4 +412,31 @@ public boolean willBeSubset() { return false; } + + @Override + public boolean hasExplicitWidth(int code) throws IOException + { + if (dict.containsKey(COSName.WIDTHS)) + { + int firstChar = dict.getInt(COSName.FIRST_CHAR, -1); + if (code >= firstChar && code - firstChar < getWidths().size()) + { + return true; + } + } + return false; + } + + private void assignGlyphList(String baseFont) + { + // assign the glyph list based on the font + if ("ZapfDingbats".equals(baseFont)) + { + glyphList = GlyphList.getZapfDingbats(); + } + else + { + glyphList = GlyphList.getAdobeGlyphList(); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java index 2f450cc06b2..5a2d933d561 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java @@ -18,7 +18,6 @@ import java.awt.geom.GeneralPath; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; @@ -42,6 +41,7 @@ import org.apache.pdfbox.pdmodel.font.encoding.Encoding; import org.apache.pdfbox.pdmodel.font.encoding.GlyphList; import org.apache.pdfbox.pdmodel.font.encoding.MacOSRomanEncoding; +import org.apache.pdfbox.pdmodel.font.encoding.MacRomanEncoding; import org.apache.pdfbox.pdmodel.font.encoding.StandardEncoding; import org.apache.pdfbox.pdmodel.font.encoding.Type1Encoding; import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; @@ -90,7 +90,7 @@ public class PDTrueTypeFont extends PDSimpleFont implements PDVectorFont public static PDTrueTypeFont load(PDDocument doc, File file, Encoding encoding) throws IOException { - return new PDTrueTypeFont(doc, new FileInputStream(file), encoding); + return new PDTrueTypeFont(doc, new TTFParser().parse(file), encoding, true); } /** @@ -108,9 +108,29 @@ public static PDTrueTypeFont load(PDDocument doc, File file, Encoding encoding) public static PDTrueTypeFont load(PDDocument doc, InputStream input, Encoding encoding) throws IOException { - return new PDTrueTypeFont(doc, input, encoding); + return new PDTrueTypeFont(doc, new TTFParser().parse(input), encoding, true); } - + + /** + * Loads a TTF to be embedded into a document as a simple font. + * + *

+ * Note: Simple fonts only support 256 characters. For Unicode support, use + * {@link PDType0Font#load(PDDocument, InputStream)} instead. + *

+ * + * @param doc The PDF document that will hold the embedded font. + * @param ttf A true type font + * @param encoding The PostScript encoding vector to be used for embedding. + * @return a PDTrueTypeFont instance. + * @throws IOException If there is an error loading the data. + */ + public static PDTrueTypeFont load(PDDocument doc, TrueTypeFont ttf, Encoding encoding) + throws IOException + { + return new PDTrueTypeFont(doc, ttf, encoding, false); + } + /** * Loads a TTF to be embedded into a document as a simple font. Only supports WinAnsiEncoding. * @@ -124,7 +144,7 @@ public static PDTrueTypeFont load(PDDocument doc, InputStream input, Encoding en @Deprecated public static PDTrueTypeFont loadTTF(PDDocument doc, File file) throws IOException { - return new PDTrueTypeFont(doc, new FileInputStream(file), WinAnsiEncoding.INSTANCE); + return new PDTrueTypeFont(doc, new TTFParser().parse(file), WinAnsiEncoding.INSTANCE, true); } /** @@ -140,7 +160,8 @@ public static PDTrueTypeFont loadTTF(PDDocument doc, File file) throws IOExcepti @Deprecated public static PDTrueTypeFont loadTTF(PDDocument doc, InputStream input) throws IOException { - return new PDTrueTypeFont(doc, input, WinAnsiEncoding.INSTANCE); + return new PDTrueTypeFont(doc, new TTFParser().parse(input), WinAnsiEncoding.INSTANCE, + true); } private CmapSubtable cmapWinUnicode = null; @@ -177,11 +198,6 @@ public PDTrueTypeFont(COSDictionary fontDictionary) throws IOException TTFParser ttfParser = new TTFParser(true); ttfFont = ttfParser.parse(ff2Stream.createInputStream()); } - catch (NullPointerException e) // TTF parser is buggy - { - LOG.warn("Could not read embedded TTF for font " + getBaseFont(), e); - fontIsDamaged = true; - } catch (IOException e) { LOG.warn("Could not read embedded TTF for font " + getBaseFont(), e); @@ -220,7 +236,7 @@ public final String getBaseFont() @Override protected Encoding readEncodingFromFont() throws IOException { - if (getStandard14AFM() != null) + if (!isEmbedded() && getStandard14AFM() != null) { // read from AFM return new Type1Encoding(getStandard14AFM()); @@ -274,17 +290,23 @@ protected Encoding readEncodingFromFont() throws IOException /** * Creates a new TrueType font for embedding. */ - private PDTrueTypeFont(PDDocument document, InputStream ttfStream, Encoding encoding) + private PDTrueTypeFont(PDDocument document, TrueTypeFont ttf, Encoding encoding, + boolean closeTTF) throws IOException { - PDTrueTypeFontEmbedder embedder = new PDTrueTypeFontEmbedder(document, dict, ttfStream, + PDTrueTypeFontEmbedder embedder = new PDTrueTypeFontEmbedder(document, dict, ttf, encoding); this.encoding = encoding; - ttf = embedder.getTrueTypeFont(); + this.ttf = ttf; setFontDescriptor(embedder.getFontDescriptor()); isEmbedded = true; isDamaged = false; glyphList = GlyphList.getAdobeGlyphList(); + if (closeTTF) + { + // the TTF is fully loaded and it is safe to close the underlying data source + ttf.close(); + } } @Override @@ -313,8 +335,11 @@ private BoundingBox generateBoundingBox() throws IOException { if (getFontDescriptor() != null) { PDRectangle bbox = getFontDescriptor().getFontBoundingBox(); - return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), - bbox.getUpperRightX(), bbox.getUpperRightY()); + if (bbox != null) + { + return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), + bbox.getUpperRightX(), bbox.getUpperRightY()); + } } return ttf.getFontBBox(); } @@ -411,7 +436,7 @@ protected byte[] encode(int unicode) throws IOException } /** - * Inverts the font's code -> GID mapping. Any duplicate (GID -> code) mappings will be lost. + * Inverts the font's code -> GID mapping. Any duplicate (GID -> code) mappings will be lost. */ protected Map getGIDToCode() throws IOException { @@ -517,6 +542,7 @@ public boolean hasGlyph(int code) throws IOException * * @param code character code * @return GID (glyph index) + * @throws java.io.IOException */ public int codeToGID(int code) throws IOException { @@ -526,7 +552,7 @@ public int codeToGID(int code) throws IOException if (!isSymbolic()) // non-symbolic { String name = encoding.getName(code); - if (name.equals(".notdef")) + if (".notdef".equals(name)) { return 0; } @@ -562,6 +588,30 @@ public int codeToGID(int code) throws IOException } else // symbolic { + // PDFBOX-4755 / PDF.js #5501 + // PDFBOX-3965: fallback for font has that the symbol flag but isn't + if (gid == 0 && cmapWinUnicode != null) + { + if (encoding instanceof WinAnsiEncoding || encoding instanceof MacRomanEncoding) + { + String name = encoding.getName(code); + if (".notdef".equals(name)) + { + return 0; + } + String unicode = GlyphList.getAdobeGlyphList().toUnicode(name); + if (unicode != null) + { + int uni = unicode.codePointAt(0); + gid = cmapWinUnicode.getGlyphId(uni); + } + } + else + { + gid = cmapWinUnicode.getGlyphId(code); + } + } + // (3, 0) - (Windows, Symbol) if (cmapWinSymbol != null) { @@ -595,7 +645,6 @@ public int codeToGID(int code) throws IOException gid = cmapMacRoman.getGlyphId(code); } } - return gid; } @@ -632,6 +681,12 @@ else if (CmapTable.PLATFORM_MACINTOSH == cmap.getPlatformId() { cmapMacRoman = cmap; } + else if (CmapTable.PLATFORM_UNICODE == cmap.getPlatformId() + && CmapTable.ENCODING_UNICODE_1_0 == cmap.getPlatformEncodingId()) + { + // PDFBOX-4755 / PDF.js #5501 + cmapWinUnicode = cmap; + } } } cmapInitialized = true; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java index cdfd9506b55..8fe979f19d1 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java @@ -1,128 +1,129 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.pdmodel.font; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.apache.fontbox.ttf.HorizontalMetricsTable; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.common.COSArrayList; -import org.apache.pdfbox.pdmodel.font.encoding.Encoding; -import org.apache.pdfbox.pdmodel.font.encoding.GlyphList; - -/** - * Embedded PDTrueTypeFont builder. Helper class to populate a PDTrueTypeFont from a TTF. - * - * @author John Hewson - * @author Ben Litchfield - */ -final class PDTrueTypeFontEmbedder extends TrueTypeEmbedder -{ - private final Encoding fontEncoding; - - /** - * Creates a new TrueType font embedder for the given TTF as a PDTrueTypeFont. - * - * @param document The parent document - * @param dict Font dictionary - * @param ttfStream TTF stream - * @param encoding The PostScript encoding vector to be used for embedding. - * @throws IOException if the TTF could not be read - */ - PDTrueTypeFontEmbedder(PDDocument document, COSDictionary dict, InputStream ttfStream, - Encoding encoding) throws IOException - { - super(document, dict, ttfStream, false); - dict.setItem(COSName.SUBTYPE, COSName.TRUE_TYPE); - - GlyphList glyphList = GlyphList.getAdobeGlyphList(); - this.fontEncoding = encoding; - dict.setItem(COSName.ENCODING, encoding.getCOSObject()); - fontDescriptor.setSymbolic(false); - fontDescriptor.setNonSymbolic(true); - - // add the font descriptor - dict.setItem(COSName.FONT_DESC, fontDescriptor); - - // set the glyph widths - setWidths(dict, glyphList); - } - - /** - * Sets the glyph widths in the font dictionary. - */ - private void setWidths(COSDictionary font, GlyphList glyphList) throws IOException - { - float scaling = 1000f / ttf.getHeader().getUnitsPerEm(); - HorizontalMetricsTable hmtx = ttf.getHorizontalMetrics(); - - Map codeToName = getFontEncoding().getCodeToNameMap(); - - int firstChar = Collections.min(codeToName.keySet()); - int lastChar = Collections.max(codeToName.keySet()); - - List widths = new ArrayList(lastChar - firstChar + 1); - for (int i = 0; i < lastChar - firstChar + 1; i++) - { - widths.add(0); - } - - // a character code is mapped to a glyph name via the provided font encoding - // afterwards, the glyph name is translated to a glyph ID. - for (Map.Entry entry : codeToName.entrySet()) - { - int code = entry.getKey(); - String name = entry.getValue(); - - if (code >= firstChar && code <= lastChar) - { - String uni = glyphList.toUnicode(name); - int charCode = uni.codePointAt(0); - int gid = cmap.getGlyphId(charCode); - widths.set(entry.getKey() - firstChar, - Math.round(hmtx.getAdvanceWidth(gid) * scaling)); - } - } - - font.setInt(COSName.FIRST_CHAR, firstChar); - font.setInt(COSName.LAST_CHAR, lastChar); - font.setItem(COSName.WIDTHS, COSArrayList.converterToCOSArray(widths)); - } - - /** - * Returns the font's encoding. - */ - public Encoding getFontEncoding() - { - return fontEncoding; - } - - @Override - protected void buildSubset(InputStream ttfSubset, String tag, - Map gidToCid) throws IOException - { - // use PDType0Font instead - throw new UnsupportedOperationException(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.font; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.fontbox.ttf.HorizontalMetricsTable; +import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.common.COSArrayList; +import org.apache.pdfbox.pdmodel.font.encoding.Encoding; +import org.apache.pdfbox.pdmodel.font.encoding.GlyphList; + +/** + * Embedded PDTrueTypeFont builder. Helper class to populate a PDTrueTypeFont from a TTF. + * + * @author John Hewson + * @author Ben Litchfield + */ +final class PDTrueTypeFontEmbedder extends TrueTypeEmbedder +{ + private final Encoding fontEncoding; + + /** + * Creates a new TrueType font embedder for the given TTF as a PDTrueTypeFont. + * + * @param document The parent document + * @param dict Font dictionary + * @param ttf TrueType font + * @param encoding The PostScript encoding vector to be used for embedding. + * @throws IOException if the TTF could not be read + */ + PDTrueTypeFontEmbedder(PDDocument document, COSDictionary dict, TrueTypeFont ttf, + Encoding encoding) throws IOException + { + super(document, dict, ttf, false); + dict.setItem(COSName.SUBTYPE, COSName.TRUE_TYPE); + + GlyphList glyphList = GlyphList.getAdobeGlyphList(); + this.fontEncoding = encoding; + dict.setItem(COSName.ENCODING, encoding.getCOSObject()); + fontDescriptor.setSymbolic(false); + fontDescriptor.setNonSymbolic(true); + + // add the font descriptor + dict.setItem(COSName.FONT_DESC, fontDescriptor); + + // set the glyph widths + setWidths(dict, glyphList); + } + + /** + * Sets the glyph widths in the font dictionary. + */ + private void setWidths(COSDictionary font, GlyphList glyphList) throws IOException + { + float scaling = 1000f / ttf.getHeader().getUnitsPerEm(); + HorizontalMetricsTable hmtx = ttf.getHorizontalMetrics(); + + Map codeToName = getFontEncoding().getCodeToNameMap(); + + int firstChar = Collections.min(codeToName.keySet()); + int lastChar = Collections.max(codeToName.keySet()); + + List widths = new ArrayList(lastChar - firstChar + 1); + for (int i = 0; i < lastChar - firstChar + 1; i++) + { + widths.add(0); + } + + // a character code is mapped to a glyph name via the provided font encoding + // afterwards, the glyph name is translated to a glyph ID. + for (Map.Entry entry : codeToName.entrySet()) + { + int code = entry.getKey(); + String name = entry.getValue(); + + if (code >= firstChar && code <= lastChar) + { + String uni = glyphList.toUnicode(name); + int charCode = uni.codePointAt(0); + int gid = cmapLookup.getGlyphId(charCode); + widths.set(entry.getKey() - firstChar, + Math.round(hmtx.getAdvanceWidth(gid) * scaling)); + } + } + + font.setInt(COSName.FIRST_CHAR, firstChar); + font.setInt(COSName.LAST_CHAR, lastChar); + font.setItem(COSName.WIDTHS, COSArrayList.converterToCOSArray(widths)); + } + + /** + * Returns the font's encoding. + */ + public Encoding getFontEncoding() + { + return fontEncoding; + } + + @Override + protected void buildSubset(InputStream ttfSubset, String tag, + Map gidToCid) throws IOException + { + // use PDType0Font instead + throw new UnsupportedOperationException(); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java index 1544af189bf..d3249d88d2f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java @@ -51,46 +51,50 @@ public class PDType0Font extends PDFont implements PDVectorFont private boolean isDescendantCJK; private PDCIDFontType2Embedder embedder; private final Set noUnicode = new HashSet(); + private TrueTypeFont ttf; /** - * Loads a TTF to be embedded into a document as a Type 0 font. - * - * @param doc The PDF document that will hold the embedded font. - * @param file A TrueType font. - * @return A Type0 font with a CIDFontType2 descendant. - * @throws IOException If there is an error reading the font file. - */ + * Loads a TTF to be embedded and subset into a document as a Type 0 font. If you are loading a + * font for AcroForm, then use the 3-parameter constructor instead. + * + * @param doc The PDF document that will hold the embedded font. + * @param file A TrueType font. + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font file. + */ public static PDType0Font load(PDDocument doc, File file) throws IOException { - return new PDType0Font(doc, new TTFParser().parse(file), true); + return new PDType0Font(doc, new TTFParser().parse(file), true, true, false); } /** - * Loads a TTF to be embedded into a document as a Type 0 font. - * - * @param doc The PDF document that will hold the embedded font. - * @param input A TrueType font. - * @return A Type0 font with a CIDFontType2 descendant. - * @throws IOException If there is an error reading the font stream. - */ + * Loads a TTF to be embedded and subset into a document as a Type 0 font. If you are loading a + * font for AcroForm, then use the 3-parameter constructor instead. + * + * @param doc The PDF document that will hold the embedded font. + * @param input An input stream of a TrueType font. It will be closed before returning. + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font stream. + */ public static PDType0Font load(PDDocument doc, InputStream input) throws IOException { - return new PDType0Font(doc, new TTFParser().parse(input), true); + return load(doc, input, true); } /** * Loads a TTF to be embedded into a document as a Type 0 font. * * @param doc The PDF document that will hold the embedded font. - * @param input A TrueType font. - * @param embedSubset True if the font will be subset before embedding + * @param input An input stream of a TrueType font. It will be closed before returning. + * @param embedSubset True if the font will be subset before embedding. Set this to false when + * creating a font for AcroForm. * @return A Type0 font with a CIDFontType2 descendant. * @throws IOException If there is an error reading the font stream. */ public static PDType0Font load(PDDocument doc, InputStream input, boolean embedSubset) throws IOException { - return new PDType0Font(doc, new TTFParser().parse(input), embedSubset); + return new PDType0Font(doc, new TTFParser().parse(input), embedSubset, true, false); } /** @@ -98,16 +102,73 @@ public static PDType0Font load(PDDocument doc, InputStream input, boolean embedS * * @param doc The PDF document that will hold the embedded font. * @param ttf A TrueType font. - * @param embedSubset True if the font will be subset before embedding + * @param embedSubset True if the font will be subset before embedding. Set this to false when + * creating a font for AcroForm. * @return A Type0 font with a CIDFontType2 descendant. * @throws IOException If there is an error reading the font stream. */ public static PDType0Font load(PDDocument doc, TrueTypeFont ttf, boolean embedSubset) throws IOException { - return new PDType0Font(doc, ttf, embedSubset); + return new PDType0Font(doc, ttf, embedSubset, false, false); + } + + /** + * Loads a TTF to be embedded into a document as a vertical Type 0 font. + * + * @param doc The PDF document that will hold the embedded font. + * @param file A TrueType font. + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font file. + */ + public static PDType0Font loadVertical(PDDocument doc, File file) throws IOException + { + return new PDType0Font(doc, new TTFParser().parse(file), true, true, true); + } + + /** + * Loads a TTF to be embedded into a document as a vertical Type 0 font. + * + * @param doc The PDF document that will hold the embedded font. + * @param input A TrueType font. + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font stream. + */ + public static PDType0Font loadVertical(PDDocument doc, InputStream input) throws IOException + { + return new PDType0Font(doc, new TTFParser().parse(input), true, true, true); + } + + /** + * Loads a TTF to be embedded into a document as a vertical Type 0 font. + * + * @param doc The PDF document that will hold the embedded font. + * @param input A TrueType font. + * @param embedSubset True if the font will be subset before embedding + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font stream. + */ + public static PDType0Font loadVertical(PDDocument doc, InputStream input, boolean embedSubset) + throws IOException + { + return new PDType0Font(doc, new TTFParser().parse(input), embedSubset, true, true); } + /** + * Loads a TTF to be embedded into a document as a vertical Type 0 font. + * + * @param doc The PDF document that will hold the embedded font. + * @param ttf A TrueType font. + * @param embedSubset True if the font will be subset before embedding + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font stream. + */ + public static PDType0Font loadVertical(PDDocument doc, TrueTypeFont ttf, boolean embedSubset) + throws IOException + { + return new PDType0Font(doc, ttf, embedSubset, false, true); + } + /** * Constructor for reading a Type0 font from a PDF file. * @@ -117,29 +178,62 @@ public static PDType0Font load(PDDocument doc, TrueTypeFont ttf, boolean embedSu public PDType0Font(COSDictionary fontDictionary) throws IOException { super(fontDictionary); - COSArray descendantFonts = (COSArray)dict.getDictionaryObject(COSName.DESCENDANT_FONTS); - COSDictionary descendantFontDictionary = (COSDictionary) descendantFonts.getObject(0); - - if (descendantFontDictionary == null) + COSBase base = dict.getDictionaryObject(COSName.DESCENDANT_FONTS); + if (!(base instanceof COSArray)) + { + throw new IOException("Missing descendant font array"); + } + COSArray descendantFonts = (COSArray) base; + if (descendantFonts.size() == 0) + { + throw new IOException("Descendant font array is empty"); + } + COSBase descendantFontDictBase = descendantFonts.getObject(0); + if (!(descendantFontDictBase instanceof COSDictionary)) { throw new IOException("Missing descendant font dictionary"); } - - descendantFont = PDFontFactory.createDescendantFont(descendantFontDictionary, this); + descendantFont = PDFontFactory.createDescendantFont((COSDictionary) descendantFontDictBase, this); readEncoding(); fetchCMapUCS2(); } /** - * Private. Creates a new TrueType font for embedding. - */ - private PDType0Font(PDDocument document, TrueTypeFont ttf, boolean embedSubset) - throws IOException + * Private. Creates a new PDType0Font font for embedding. + * + * @param document + * @param ttf + * @param embedSubset + * @param closeTTF whether to close the ttf parameter after embedding. Must be true when the ttf + * parameter was created in the load() method, false when the ttf parameter was passed to the + * load() method. + * @param vertical + * @throws IOException + */ + private PDType0Font(PDDocument document, TrueTypeFont ttf, boolean embedSubset, + boolean closeTTF, boolean vertical) throws IOException { - embedder = new PDCIDFontType2Embedder(document, dict, ttf, embedSubset, this); + if (vertical) + { + ttf.enableVerticalSubstitutions(); + } + embedder = new PDCIDFontType2Embedder(document, dict, ttf, embedSubset, this, vertical); descendantFont = embedder.getCIDFont(); readEncoding(); fetchCMapUCS2(); + if (closeTTF) + { + if (embedSubset) + { + this.ttf = ttf; + document.registerTrueTypeFontForClosing(ttf); + } + else + { + // the TTF is fully loaded and it is safe to close the underlying data source + ttf.close(); + } + } } @Override @@ -160,6 +254,11 @@ public void subset() throws IOException throw new IllegalStateException("This font was created with subsetting disabled"); } embedder.subset(); + if (ttf != null) + { + ttf.close(); + ttf = null; + } } @Override @@ -205,11 +304,11 @@ else if (!cMap.hasCIDMappings()) PDCIDSystemInfo ros = descendantFont.getCIDSystemInfo(); if (ros != null) { - isDescendantCJK = ros.getRegistry().equals("Adobe") && - (ros.getOrdering().equals("GB1") || - ros.getOrdering().equals("CNS1") || - ros.getOrdering().equals("Japan1") || - ros.getOrdering().equals("Korea1")); + isDescendantCJK = "Adobe".equals(ros.getRegistry()) && + ("GB1".equals(ros.getOrdering()) || + "CNS1".equals(ros.getOrdering()) || + "Japan1".equals(ros.getOrdering()) || + "Korea1".equals(ros.getOrdering())); } } @@ -247,48 +346,20 @@ else if (name != null) // try to find the corresponding Unicode (UC2) CMap if (strName != null) { - CMap cMap = CMapManager.getPredefinedCMap(strName); - if (cMap != null) + try + { + CMap prdCMap = CMapManager.getPredefinedCMap(strName); + String ucs2Name = prdCMap.getRegistry() + "-" + prdCMap.getOrdering() + "-UCS2"; + cMapUCS2 = CMapManager.getPredefinedCMap(ucs2Name); + } + catch (IOException ex) { - String ucs2Name = cMap.getRegistry() + "-" + cMap.getOrdering() + "-UCS2"; - CMap ucs2CMap = CMapManager.getPredefinedCMap(ucs2Name); - if (ucs2CMap != null) - { - cMapUCS2 = ucs2CMap; - } + LOG.warn("Could not get " + strName + " UC2 map for font " + getName(), ex); } } } } - /** - * Returns the name of CJK CMap represented by the given CIDSystemInfo, if any. - */ - private String getCJKCMap(PDCIDSystemInfo ros) - { - // CJK can fallback to using CIDSystemInfo - if (ros.getOrdering().equals("GB1")) - { - return "Adobe-GB1-0"; - } - else if (ros.getOrdering().equals("CNS1")) - { - return "Adobe-CNS1-0"; - } - else if (ros.getOrdering().equals("Japan1")) - { - return "Adobe-Japan1-1"; - } - else if (ros.getOrdering().equals("Korea1")) - { - return "Adobe-Korea1-0"; - } - else - { - throw new IllegalStateException(); - } - } - /** * Returns the PostScript name of the font. */ @@ -351,6 +422,12 @@ protected byte[] encode(int unicode) throws IOException return descendantFont.encode(unicode); } + @Override + public boolean hasExplicitWidth(int code) throws IOException + { + return descendantFont.hasExplicitWidth(code); + } + @Override public float getAverageFontWidth() { @@ -386,7 +463,7 @@ public float getWidth(int code) throws IOException @Override protected float getStandard14Width(int code) { - throw new UnsupportedOperationException("not suppported"); + throw new UnsupportedOperationException("not supported"); } @Override @@ -414,7 +491,7 @@ public String toUnicode(int code) throws IOException if ((isCMapPredefined || isDescendantCJK) && cMapUCS2 != null) { // if the font is composite and uses a predefined cmap (excluding Identity-H/V) then - // or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1 + // or if its descendant font uses Adobe-GB1/CNS1/Japan1/Korea1 // a) Map the character code to a character identifier (CID) according to the font?s CMap int cid = codeToCID(code); @@ -497,7 +574,7 @@ public String toString() { descendant = getDescendantFont().getClass().getSimpleName(); } - return getClass().getSimpleName() + "/" + descendant + " " + getBaseFont(); + return getClass().getSimpleName() + "/" + descendant + ", PostScript name: " + getBaseFont(); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java index 45f18e342e2..69b97da78ea 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java @@ -34,7 +34,6 @@ import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.font.encoding.Encoding; @@ -83,7 +82,7 @@ public PDType1CFont(COSDictionary fontDictionary) throws IOException PDStream ff3Stream = fd.getFontFile3(); if (ff3Stream != null) { - bytes = IOUtils.toByteArray(ff3Stream.createInputStream()); + bytes = ff3Stream.toByteArray(); if (bytes.length == 0) { LOG.error("Invalid data for embedded Type1C font " + getName()); @@ -100,7 +99,7 @@ public PDType1CFont(COSDictionary fontDictionary) throws IOException { // note: this could be an OpenType file, fortunately CFFParser can handle that CFFParser cffParser = new CFFParser(); - cffEmbedded = (CFFType1Font)cffParser.parse(bytes, new ByteSource()).get(0); + cffEmbedded = (CFFType1Font)cffParser.parse(bytes, new FF3ByteSource()).get(0); } } catch (IOException e) @@ -133,13 +132,12 @@ public PDType1CFont(COSDictionary fontDictionary) throws IOException fontMatrixTransform.scale(1000, 1000); } - private class ByteSource implements CFFParser.ByteSource + private class FF3ByteSource implements CFFParser.ByteSource { @Override public byte[] getBytes() throws IOException { - PDStream ff3Stream = getFontDescriptor().getFontFile3(); - return IOUtils.toByteArray(ff3Stream.createInputStream()); + return getFontDescriptor().getFontFile3().toByteArray(); } } @@ -165,10 +163,15 @@ public GeneralPath getPath(String name) throws IOException { return new GeneralPath(); } - else + if ("sfthyphen".equals(name)) + { + return genericFont.getPath("hyphen"); + } + if ("nbspace".equals(name)) { - return genericFont.getPath(name); + return genericFont.getPath("space"); } + return genericFont.getPath(name); } @Override @@ -197,8 +200,10 @@ private BoundingBox generateBoundingBox() throws IOException { if (getFontDescriptor() != null) { PDRectangle bbox = getFontDescriptor().getFontBoundingBox(); - if (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 || - bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0) { + if (bbox != null + && (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 + || bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0)) + { return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), bbox.getUpperRightX(), bbox.getUpperRightY()); } @@ -215,7 +220,7 @@ public String codeToName(int code) @Override protected Encoding readEncodingFromFont() throws IOException { - if (getStandard14AFM() != null) + if (!isEmbedded() && getStandard14AFM() != null) { // read from AFM return new Type1Encoding(getStandard14AFM()); @@ -281,6 +286,7 @@ public boolean isDamaged() public float getWidthFromFont(int code) throws IOException { String name = codeToName(code); + name = getNameInFont(name); float width = genericFont.getWidth(name); Point2D p = new Point2D.Float(width, 0); @@ -298,12 +304,21 @@ public boolean isEmbedded() public float getHeight(int code) throws IOException { String name = codeToName(code); - float height = 0; + float height; if (!glyphHeights.containsKey(name)) { - height = (float)cffFont.getType1CharString(name).getBounds().getHeight(); // todo: cffFont could be null + if (cffFont == null) + { + LOG.warn("No embedded CFF font, returning 0"); + return 0; + } + height = (float) cffFont.getType1CharString(name).getBounds().getHeight(); glyphHeights.put(name, height); } + else + { + height = glyphHeights.get(name); + } return height; } @@ -335,6 +350,11 @@ protected byte[] encode(int unicode) throws IOException @Override public float getStringWidth(String string) throws IOException { + if (cffFont == null) + { + LOG.warn("No embedded CFF font, returning 0"); + return 0; + } float width = 0; for (int i = 0; i < string.length(); i++) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java index feb3ca390a7..b2ec59748b4 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java @@ -25,6 +25,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.EncodedFont; @@ -41,11 +42,13 @@ import org.apache.pdfbox.pdmodel.font.encoding.Encoding; import org.apache.pdfbox.pdmodel.font.encoding.StandardEncoding; import org.apache.pdfbox.pdmodel.font.encoding.Type1Encoding; -import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; import org.apache.pdfbox.util.Matrix; import static org.apache.pdfbox.pdmodel.font.UniUtil.getUniNameOfCodePoint; +import org.apache.pdfbox.pdmodel.font.encoding.SymbolEncoding; +import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; +import org.apache.pdfbox.pdmodel.font.encoding.ZapfDingbatsEncoding; /** * A PostScript Type 1 Font. @@ -88,14 +91,27 @@ public class PDType1Font extends PDSimpleFont public static final PDType1Font SYMBOL = new PDType1Font("Symbol"); public static final PDType1Font ZAPF_DINGBATS = new PDType1Font("ZapfDingbats"); - private final Type1Font type1font; // embedded font - private final FontBoxFont genericFont; // embedded or system font for rendering + /** + * embedded font. + */ + private final Type1Font type1font; + + /** + * embedded or system font for rendering. + */ + private final FontBoxFont genericFont; + private final boolean isEmbedded; private final boolean isDamaged; private Matrix fontMatrix; private final AffineTransform fontMatrixTransform; private BoundingBox fontBBox; + /** + * to improve encoding speed. + */ + private final Map codeToBytesMap; + /** * Creates a Type 1 standard 14 font for embedding. * @@ -107,8 +123,22 @@ private PDType1Font(String baseFont) dict.setItem(COSName.SUBTYPE, COSName.TYPE1); dict.setName(COSName.BASE_FONT, baseFont); - encoding = new WinAnsiEncoding(); - dict.setItem(COSName.ENCODING, COSName.WIN_ANSI_ENCODING); + if ("ZapfDingbats".equals(baseFont)) + { + encoding = ZapfDingbatsEncoding.INSTANCE; + } + else if ("Symbol".equals(baseFont)) + { + encoding = SymbolEncoding.INSTANCE; + } + else + { + encoding = WinAnsiEncoding.INSTANCE; + dict.setItem(COSName.ENCODING, COSName.WIN_ANSI_ENCODING); + } + + // standard 14 fonts may be accessed concurrently, as they are singletons + codeToBytesMap = new ConcurrentHashMap(); // todo: could load the PFB font here if we wanted to support Standard 14 embedding type1font = null; @@ -144,14 +174,7 @@ private PDType1Font(String baseFont) */ public PDType1Font(PDDocument doc, InputStream pfbIn) throws IOException { - PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, null); - encoding = embedder.getFontEncoding(); - glyphList = embedder.getGlyphList(); - type1font = embedder.getType1Font(); - genericFont = embedder.getType1Font(); - isEmbedded = true; - isDamaged = false; - fontMatrixTransform = new AffineTransform(); + this(doc, pfbIn, null); } /** @@ -165,13 +188,14 @@ public PDType1Font(PDDocument doc, InputStream pfbIn) throws IOException public PDType1Font(PDDocument doc, InputStream pfbIn, Encoding encoding) throws IOException { PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, encoding); - this.encoding = encoding; + this.encoding = encoding == null ? embedder.getFontEncoding() : encoding; glyphList = embedder.getGlyphList(); type1font = embedder.getType1Font(); genericFont = embedder.getType1Font(); isEmbedded = true; isDamaged = false; fontMatrixTransform = new AffineTransform(); + codeToBytesMap = new HashMap(); } /** @@ -184,6 +208,7 @@ public PDType1Font(PDDocument doc, InputStream pfbIn, Encoding encoding) throws public PDType1Font(COSDictionary fontDictionary) throws IOException { super(fontDictionary); + codeToBytesMap = new HashMap(); PDFontDescriptor fd = getFontDescriptor(); Type1Font t1 = null; @@ -207,10 +232,15 @@ public PDType1Font(COSDictionary fontDictionary) throws IOException int length1 = stream.getInt(COSName.LENGTH1); int length2 = stream.getInt(COSName.LENGTH2); - // repair Length1 if necessary + // repair Length1 and Length2 if necessary byte[] bytes = fontFile.toByteArray(); + if (bytes.length == 0) + { + throw new IOException("Font data unavailable"); + } length1 = repairLength1(bytes, length1); - + length2 = repairLength2(bytes, length1, length2); + if (bytes.length > 0 && (bytes[0] & 0xff) == PFB_START_MARKER) { // some bad files embed the entire PFB, see PDFBOX-2607 @@ -219,6 +249,11 @@ public PDType1Font(COSDictionary fontDictionary) throws IOException else { // the PFB embedded as two segments back-to-back + if (length1 < 0 || length1 > length1 + length2) + { + throw new IOException("Invalid length data, actual length: " + + bytes.length + ", /Length1: " + length1 + ", /Length2: " + length2); + } byte[] segment1 = Arrays.copyOfRange(bytes, 0, length1); byte[] segment2 = Arrays.copyOfRange(bytes, length1, length1 + length2); @@ -268,7 +303,7 @@ public PDType1Font(COSDictionary fontDictionary) throws IOException /** * Some Type 1 fonts have an invalid Length1, which causes the binary segment of the font - * to be truncated, see PDFBOX-2350. + * to be truncated, see PDFBOX-2350, PDFBOX-3677. * * @param bytes Type 1 stream bytes * @param length1 Length1 from the Type 1 stream @@ -282,16 +317,41 @@ private int repairLength1(byte[] bytes, int length1) { offset = bytes.length - 4; } + + offset = findBinaryOffsetAfterExec(bytes, offset); + if (offset == 0 && length1 > 0) + { + // 2nd try with brute force + offset = findBinaryOffsetAfterExec(bytes, bytes.length - 4); + } + + if (length1 - offset != 0 && offset > 0) + { + if (LOG.isWarnEnabled()) + { + LOG.warn("Ignored invalid Length1 " + length1 + " for Type 1 font " + getName()); + } + return offset; + } + + return length1; + } + + private static int findBinaryOffsetAfterExec(byte[] bytes, int startOffset) + { + int offset = startOffset; while (offset > 0) { - if (bytes[offset + 0] == 'e' && - bytes[offset + 1] == 'x' && - bytes[offset + 2] == 'e' && - bytes[offset + 3] == 'c') + if (bytes[offset + 0] == 'e' + && bytes[offset + 1] == 'x' + && bytes[offset + 2] == 'e' + && bytes[offset + 3] == 'c') { offset += 4; // skip additional CR LF space characters - while (offset < length1 && (bytes[offset] == '\r' || bytes[offset] == '\n' || bytes[offset] == ' ')) + while (offset < bytes.length && + (bytes[offset] == '\r' || bytes[offset] == '\n' || + bytes[offset] == ' ' || bytes[offset] == '\t')) { offset++; } @@ -299,17 +359,28 @@ private int repairLength1(byte[] bytes, int length1) } offset--; } + return offset; + } - if (length1 - offset != 0 && offset > 0) + /** + * Some Type 1 fonts have an invalid Length2, see PDFBOX-3475. A negative /Length2 brings an + * IllegalArgumentException in Arrays.copyOfRange(), a huge value eats up memory because of + * padding. + * + * @param bytes Type 1 stream bytes + * @param length1 Length1 from the Type 1 stream + * @param length2 Length2 from the Type 1 stream + * @return repaired Length2 value + */ + private int repairLength2(byte[] bytes, int length1, int length2) + { + // repair Length2 if necessary + if (length2 < 0 || length2 > bytes.length - length1) { - if (LOG.isWarnEnabled()) - { - LOG.warn("Ignored invalid Length1 " + length1 + " for Type 1 font " + getName()); - } - return offset; + LOG.warn("Ignored invalid Length2 " + length2 + " for Type 1 font " + getName()); + return bytes.length - length1; } - - return length1; + return length2; } /** @@ -339,25 +410,52 @@ public float getHeight(int code) throws IOException @Override protected byte[] encode(int unicode) throws IOException { - String name = getGlyphList().codePointToName(unicode); - if (!encoding.contains(name)) + byte[] bytes = codeToBytesMap.get(unicode); + if (bytes != null) { - throw new IllegalArgumentException( - String.format("U+%04X ('%s') is not available in this font's encoding: %s", - unicode, name, encoding.getEncodingName())); + return bytes; } - - String nameInFont = getNameInFont(name); - Map inverted = encoding.getNameToCodeMap(); - if (nameInFont.equals(".notdef") || !genericFont.hasGlyph(nameInFont)) + String name = getGlyphList().codePointToName(unicode); + if (isStandard14()) { - throw new IllegalArgumentException( - String.format("No glyph for U+%04X in font %s", unicode, getName())); + // genericFont not needed, thus simplified code + // this is important on systems with no installed fonts + if (!encoding.contains(name)) + { + throw new IllegalArgumentException( + String.format("U+%04X ('%s') is not available in this font %s encoding: %s", + unicode, name, getName(), encoding.getEncodingName())); + } + if (".notdef".equals(name)) + { + throw new IllegalArgumentException( + String.format("No glyph for U+%04X in font %s", unicode, getName())); + } + } + else + { + if (!encoding.contains(name)) + { + throw new IllegalArgumentException( + String.format("U+%04X ('%s') is not available in this font %s (generic: %s) encoding: %s", + unicode, name, getName(), genericFont.getName(), encoding.getEncodingName())); + } + + String nameInFont = getNameInFont(name); + + if (nameInFont.equals(".notdef") || !genericFont.hasGlyph(nameInFont)) + { + throw new IllegalArgumentException( + String.format("No glyph for U+%04X in font %s (generic: %s)", unicode, getName(), genericFont.getName())); + } } + Map inverted = encoding.getNameToCodeMap(); int code = inverted.get(name); - return new byte[] { (byte)code }; + bytes = new byte[] { (byte)code }; + codeToBytesMap.put(unicode, bytes); + return bytes; } @Override @@ -366,7 +464,7 @@ public float getWidthFromFont(int code) throws IOException String name = codeToName(code); // width of .notdef is ignored for substitutes, see PDFBOX-1900 - if (!isEmbedded && name.equals(".notdef")) + if (!isEmbedded && ".notdef".equals(name)) { return 250; } @@ -405,7 +503,7 @@ public int readCode(InputStream in) throws IOException @Override protected Encoding readEncodingFromFont() throws IOException { - if (getStandard14AFM() != null) + if (!isEmbedded() && getStandard14AFM() != null) { // read from AFM return new Type1Encoding(getStandard14AFM()); @@ -459,8 +557,10 @@ private BoundingBox generateBoundingBox() throws IOException { if (getFontDescriptor() != null) { PDRectangle bbox = getFontDescriptor().getFontBoundingBox(); - if (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 || - bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0) { + if (bbox != null && + (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 || + bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0)) + { return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), bbox.getUpperRightX(), bbox.getUpperRightY()); } @@ -485,21 +585,34 @@ private String getNameInFont(String name) throws IOException { return name; } - else + + // try alternative name + String altName = ALT_NAMES.get(name); + if (altName != null && !name.equals(".notdef") && genericFont.hasGlyph(altName)) { - // try alternative name - String altName = ALT_NAMES.get(name); - if (altName != null && !name.equals(".notdef") && genericFont.hasGlyph(altName)) + return altName; + } + + // try unicode name + String unicodes = getGlyphList().toUnicode(name); + if (unicodes != null && unicodes.length() == 1) + { + String uniName = getUniNameOfCodePoint(unicodes.codePointAt(0)); + if (genericFont.hasGlyph(uniName)) { - return altName; + return uniName; } - else + // PDFBOX-4017: no postscript table on Windows 10, and the low uni00NN + // names are not found in Symbol font. What works is using the PDF code plus 0xF000 + // while disregarding encoding from the PDF (because of file from PDFBOX-1606, + // makes sense because this segment is about finding the name in a standard font) + //TODO bring up better solution than this + if ("SymbolMT".equals(genericFont.getName())) { - // try unicode name - String unicodes = getGlyphList().toUnicode(name); - if (unicodes != null && unicodes.length() == 1) + Integer code = SymbolEncoding.INSTANCE.getNameToCodeMap().get(name); + if (code != null) { - String uniName = getUniNameOfCodePoint(unicodes.codePointAt(0)); + uniName = getUniNameOfCodePoint(code + 0xF000); if (genericFont.hasGlyph(uniName)) { return uniName; @@ -507,6 +620,7 @@ private String getNameInFont(String name) throws IOException } } } + return ".notdef"; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1FontEmbedder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1FontEmbedder.java index f4e1a44479c..f53270ad422 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1FontEmbedder.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1FontEmbedder.java @@ -98,6 +98,7 @@ class PDType1FontEmbedder dict.setInt(COSName.FIRST_CHAR, 0); dict.setInt(COSName.LAST_CHAR, 255); dict.setItem(COSName.WIDTHS, COSArrayList.converterToCOSArray(widths)); + dict.setItem(COSName.ENCODING, encoding); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java index b71e5a4a804..b35b1da9c59 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java @@ -1,195 +1,180 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.pdmodel.font; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; -import org.apache.pdfbox.contentstream.PDContentStream; -import org.apache.pdfbox.contentstream.operator.Operator; -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSNumber; -import org.apache.pdfbox.cos.COSObject; -import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.pdfparser.PDFStreamParser; -import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.common.COSObjectable; -import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.util.Matrix; - -/** - * A Type 3 character procedure. This is a standalone PDF content stream. - * - * @author John Hewson - */ -public final class PDType3CharProc implements COSObjectable, PDContentStream -{ - private final PDType3Font font; - private final COSStream charStream; - - public PDType3CharProc(PDType3Font font, COSStream charStream) - { - this.font = font; - this.charStream = charStream; - } - - @Override - public COSStream getCOSObject() - { - return charStream; - } - - public PDType3Font getFont() - { - return font; - } - - public PDStream getContentStream() - { - return new PDStream(charStream); - } - - @Override - public InputStream getContents() throws IOException - { - return charStream.createInputStream(); - } - - @Override - public PDResources getResources() - { - return font.getResources(); - } - - @Override - public PDRectangle getBBox() - { - return font.getFontBBox(); - } - - /** - * Calculate the bounding box of this glyph. This will work only if the first operator in the - * stream is d1. - * - * @return the bounding box of this glyph, or null if the first operator is not d1. - * @throws IOException If an io error occurs while parsing the stream. - */ - public PDRectangle getGlyphBBox() throws IOException - { - List arguments = new ArrayList(); - PDFStreamParser parser = new PDFStreamParser(this); - Object token = parser.parseNextToken(); - while (token != null) - { - if (token instanceof COSObject) - { - arguments.add(((COSObject) token).getObject()); - } - else if (token instanceof Operator) - { - if (((Operator) token).getName().equals("d1") && arguments.size() == 6) - { - for (int i = 0; i < 6; ++i) - { - if (!(arguments.get(i) instanceof COSNumber)) - { - return null; - } - } - return new PDRectangle( - ((COSNumber) arguments.get(2)).floatValue(), - ((COSNumber) arguments.get(3)).floatValue(), - ((COSNumber) arguments.get(4)).floatValue() - ((COSNumber) arguments.get(2)).floatValue(), - ((COSNumber) arguments.get(5)).floatValue() - ((COSNumber) arguments.get(3)).floatValue()); - } - else - { - return null; - } - } - else - { - arguments.add((COSBase) token); - } - token = parser.parseNextToken(); - } - return null; - } - - @Override - public Matrix getMatrix() - { - return font.getFontMatrix(); - } - - /** - * todo. - * - * @return - * @throws IOException - */ - public float getWidth() throws IOException - { - List arguments = new ArrayList(); - PDFStreamParser parser = new PDFStreamParser(this); - Object token = parser.parseNextToken(); - while (token != null) - { - if (token instanceof COSObject) - { - arguments.add(((COSObject) token).getObject()); - } - else if (token instanceof Operator) - { - return parseWidth((Operator) token, arguments); - } - else - { - arguments.add((COSBase) token); - } - token = parser.parseNextToken(); - } - throw new IOException("Unexpected end of stream"); - } - - private float parseWidth(Operator operator, List arguments) throws IOException - { - if (operator.getName().equals("d0") || operator.getName().equals("d1")) - { - Object obj = arguments.get(0); - if (obj instanceof Number) - { - return ((Number) obj).floatValue(); - } - else if (obj instanceof COSNumber) - { - return ((COSNumber) obj).floatValue(); - } - else - { - throw new IOException("Unexpected argument type: " + obj.getClass().getName()); - } - } - else - { - throw new IOException("First operator must be d0 or d1"); - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.font; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import org.apache.pdfbox.contentstream.PDContentStream; +import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.pdfparser.PDFStreamParser; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.util.Matrix; + +/** + * A Type 3 character procedure. This is a standalone PDF content stream. + * + * @author John Hewson + */ +public final class PDType3CharProc implements COSObjectable, PDContentStream +{ + private final PDType3Font font; + private final COSStream charStream; + + public PDType3CharProc(PDType3Font font, COSStream charStream) + { + this.font = font; + this.charStream = charStream; + } + + @Override + public COSStream getCOSObject() + { + return charStream; + } + + public PDType3Font getFont() + { + return font; + } + + public PDStream getContentStream() + { + return new PDStream(charStream); + } + + @Override + public InputStream getContents() throws IOException + { + return charStream.createInputStream(); + } + + @Override + public PDResources getResources() + { + return font.getResources(); + } + + @Override + public PDRectangle getBBox() + { + return font.getFontBBox(); + } + + /** + * Calculate the bounding box of this glyph. This will work only if the first operator in the + * stream is d1. + * + * @return the bounding box of this glyph, or null if the first operator is not d1. + * @throws IOException If an io error occurs while parsing the stream. + */ + public PDRectangle getGlyphBBox() throws IOException + { + List arguments = new ArrayList(); + PDFStreamParser parser = new PDFStreamParser(this); + Object token = parser.parseNextToken(); + while (token != null) + { + if (token instanceof Operator) + { + if (((Operator) token).getName().equals("d1") && arguments.size() == 6) + { + for (int i = 0; i < 6; ++i) + { + if (!(arguments.get(i) instanceof COSNumber)) + { + return null; + } + } + return new PDRectangle( + ((COSNumber) arguments.get(2)).floatValue(), + ((COSNumber) arguments.get(3)).floatValue(), + ((COSNumber) arguments.get(4)).floatValue() - ((COSNumber) arguments.get(2)).floatValue(), + ((COSNumber) arguments.get(5)).floatValue() - ((COSNumber) arguments.get(3)).floatValue()); + } + else + { + return null; + } + } + else + { + arguments.add((COSBase) token); + } + token = parser.parseNextToken(); + } + return null; + } + + @Override + public Matrix getMatrix() + { + return font.getFontMatrix(); + } + + /** + * Get the width from a type3 charproc stream. + * + * @return the glyph width. + * @throws IOException if the stream could not be read, or did not have d0 or d1 as first + * operator, or if their first argument was not a number. + */ + public float getWidth() throws IOException + { + List arguments = new ArrayList(); + PDFStreamParser parser = new PDFStreamParser(this); + Object token = parser.parseNextToken(); + while (token != null) + { + if (token instanceof Operator) + { + return parseWidth((Operator) token, arguments); + } + else + { + arguments.add((COSBase) token); + } + token = parser.parseNextToken(); + } + throw new IOException("Unexpected end of stream"); + } + + private float parseWidth(Operator operator, List arguments) throws IOException + { + if (operator.getName().equals("d0") || operator.getName().equals("d1")) + { + COSBase obj = arguments.get(0); + if (obj instanceof COSNumber) + { + return ((COSNumber) obj).floatValue(); + } + throw new IOException("Unexpected argument type: " + obj.getClass().getName()); + } + else + { + throw new IOException("First operator must be d0 or d1"); + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java index 2a9bf0eb31f..b9d805a022c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java @@ -19,6 +19,9 @@ import java.awt.geom.GeneralPath; import java.io.IOException; import java.io.InputStream; +import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.fontbox.FontBoxFont; import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.cos.COSArray; @@ -27,6 +30,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.ResourceCache; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding; import org.apache.pdfbox.pdmodel.font.encoding.Encoding; @@ -41,10 +45,16 @@ */ public class PDType3Font extends PDSimpleFont { + /** + * Log instance. + */ + private static final Log LOG = LogFactory.getLog(PDType3Font.class); + private PDResources resources; private COSDictionary charProcs; private Matrix fontMatrix; private BoundingBox fontBBox; + private final ResourceCache resourceCache; /** * Constructor. @@ -52,8 +62,20 @@ public class PDType3Font extends PDSimpleFont * @param fontDictionary The font dictionary according to the PDF specification. */ public PDType3Font(COSDictionary fontDictionary) throws IOException + { + this(fontDictionary, null); + } + + /** + * Constructor. + * + * @param fontDictionary The font dictionary according to the PDF specification. + * @param resourceCache Resource cache, can be null. + */ + public PDType3Font(COSDictionary fontDictionary, ResourceCache resourceCache) throws IOException { super(fontDictionary); + this.resourceCache = resourceCache; readEncoding(); } @@ -66,8 +88,20 @@ public String getName() @Override protected final void readEncoding() throws IOException { - COSDictionary encodingDict = (COSDictionary)dict.getDictionaryObject(COSName.ENCODING); - encoding = new DictionaryEncoding(encodingDict); + COSBase encodingBase = dict.getDictionaryObject(COSName.ENCODING); + if (encodingBase instanceof COSName) + { + COSName encodingName = (COSName) encodingBase; + encoding = Encoding.getInstance(encodingName); + if (encoding == null) + { + LOG.warn("Unknown encoding: " + encodingName.getName()); + } + } + else if (encodingBase instanceof COSDictionary) + { + encoding = new DictionaryEncoding((COSDictionary) encodingBase); + } glyphList = GlyphList.getAdobeGlyphList(); } @@ -94,8 +128,8 @@ public GeneralPath getPath(String name) throws IOException @Override public boolean hasGlyph(String name) throws IOException { - COSStream stream = (COSStream) getCharProcs().getDictionaryObject(COSName.getPDFName(name)); - return stream != null; + COSBase base = getCharProcs().getDictionaryObject(COSName.getPDFName(name)); + return base instanceof COSStream; } @Override @@ -116,9 +150,15 @@ public float getWidth(int code) throws IOException { int firstChar = dict.getInt(COSName.FIRST_CHAR, -1); int lastChar = dict.getInt(COSName.LAST_CHAR, -1); - if (getWidths().size() > 0 && code >= firstChar && code <= lastChar) + List widths = getWidths(); + if (!widths.isEmpty() && code >= firstChar && code <= lastChar) { - return getWidths().get(code - firstChar); + if (code - firstChar >= widths.size()) + { + return 0; + } + Float w = widths.get(code - firstChar); + return w == null ? 0 : w; } else { @@ -138,7 +178,8 @@ public float getWidth(int code) throws IOException public float getWidthFromFont(int code) throws IOException { PDType3CharProc charProc = getCharProc(code); - if (charProc == null) + if (charProc == null || charProc.getContentStream() == null || + charProc.getContentStream().getLength() == 0) { return 0; } @@ -160,11 +201,11 @@ public float getHeight(int code) throws IOException // the following values are all more or less accurate at least all are average // values. Maybe we'll find another way to get those value for every single glyph // in the future if needed - PDRectangle fontBBox = desc.getFontBoundingBox(); + PDRectangle bbox = desc.getFontBoundingBox(); float retval = 0; - if (fontBBox != null) + if (bbox != null) { - retval = fontBBox.getHeight() / 2; + retval = bbox.getHeight() / 2; } if (retval == 0) { @@ -204,10 +245,10 @@ public Matrix getFontMatrix() { if (fontMatrix == null) { - COSArray array = (COSArray) dict.getDictionaryObject(COSName.FONT_MATRIX); - if (array != null) + COSBase base = dict.getDictionaryObject(COSName.FONT_MATRIX); + if (base instanceof COSArray) { - fontMatrix = new Matrix(array); + fontMatrix = new Matrix((COSArray) base); } else { @@ -224,6 +265,12 @@ public boolean isDamaged() return false; } + @Override + public boolean isStandard14() + { + return false; + } + /** * Returns the optional resources of the type3 stream. * @@ -233,10 +280,10 @@ public PDResources getResources() { if (resources == null) { - COSDictionary resources = (COSDictionary) dict.getDictionaryObject(COSName.RESOURCES); - if (resources != null) + COSBase base = dict.getDictionaryObject(COSName.RESOURCES); + if (base instanceof COSDictionary) { - this.resources = new PDResources(resources); + this.resources = new PDResources((COSDictionary) base, resourceCache); } } return resources; @@ -249,11 +296,11 @@ public PDResources getResources() */ public PDRectangle getFontBBox() { - COSArray rect = (COSArray) dict.getDictionaryObject(COSName.FONT_BBOX); + COSBase base = dict.getDictionaryObject(COSName.FONT_BBOX); PDRectangle retval = null; - if(rect != null) + if (base instanceof COSArray) { - retval = new PDRectangle(rect); + retval = new PDRectangle((COSArray) base); } return retval; } @@ -314,7 +361,7 @@ public COSDictionary getCharProcs() { if (charProcs == null) { - charProcs = (COSDictionary) dict.getDictionaryObject(COSName.CHAR_PROCS); + charProcs = dict.getCOSDictionary(COSName.CHAR_PROCS); } return charProcs; } @@ -328,14 +375,13 @@ public COSDictionary getCharProcs() public PDType3CharProc getCharProc(int code) { String name = getEncoding().getName(code); - if (!name.equals(".notdef")) + if (getCharProcs() == null) + { + return null; + } + COSStream stream = getCharProcs().getCOSStream(COSName.getPDFName(name)); + if (stream != null) { - COSStream stream; - stream = (COSStream)getCharProcs().getDictionaryObject(COSName.getPDFName(name)); - if (stream == null) - { - return null; - } return new PDType3CharProc(this, stream); } return null; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDVectorFont.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDVectorFont.java index 21680d8264f..7b011fc44b3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDVectorFont.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDVectorFont.java @@ -28,17 +28,17 @@ public interface PDVectorFont { /** - * Returns the glyph path for the given character code. + * Returns the glyph path for the given character code in a PDF. * - * @param code character code + * @param code character code in a PDF. Not to be confused with unicode. * @throws java.io.IOException if the font could not be read */ GeneralPath getPath(int code) throws IOException; - + /** - * Returns true if this font contains a glyph for the given character code. + * Returns true if this font contains a glyph for the given character code in a PDF. * - * @param code character code + * @param code character code in a PDF. Not to be confused with unicode. */ boolean hasGlyph(int code) throws IOException; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/Standard14Fonts.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/Standard14Fonts.java index a523c08fa72..4ab81f1277c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/Standard14Fonts.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/Standard14Fonts.java @@ -17,12 +17,11 @@ package org.apache.pdfbox.pdmodel.font; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; -import java.net.URL; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.fontbox.afm.AFMParser; @@ -36,131 +35,193 @@ */ final class Standard14Fonts { + /** + * Contains all base names and alias names for the known fonts. + * For base fonts both the key and the value will be the base name. + * For aliases, the key is an alias, and the value is a base name. + * We want a single lookup in the map to find the font both by a base name or an alias. + */ + private static final Map ALIASES = new HashMap(38); + + /** + * Contains the font metrics for the base fonts. + * The key is a base font name, value is a FontMetrics instance. + * Metrics are loaded into this map on demand, only if needed. + * @see #getAFM + */ + private static final Map FONTS = new HashMap(14); + + static + { + // the 14 standard fonts + mapName("Courier-Bold"); + mapName("Courier-BoldOblique"); + mapName("Courier"); + mapName("Courier-Oblique"); + mapName("Helvetica"); + mapName("Helvetica-Bold"); + mapName("Helvetica-BoldOblique"); + mapName("Helvetica-Oblique"); + mapName("Symbol"); + mapName("Times-Bold"); + mapName("Times-BoldItalic"); + mapName("Times-Italic"); + mapName("Times-Roman"); + mapName("ZapfDingbats"); + + // alternative names from Adobe Supplement to the ISO 32000 + mapName("CourierCourierNew", "Courier"); + mapName("CourierNew", "Courier"); + mapName("CourierNew,Italic", "Courier-Oblique"); + mapName("CourierNew,Bold", "Courier-Bold"); + mapName("CourierNew,BoldItalic", "Courier-BoldOblique"); + mapName("Arial", "Helvetica"); + mapName("Arial,Italic", "Helvetica-Oblique"); + mapName("Arial,Bold", "Helvetica-Bold"); + mapName("Arial,BoldItalic", "Helvetica-BoldOblique"); + mapName("TimesNewRoman", "Times-Roman"); + mapName("TimesNewRoman,Italic", "Times-Italic"); + mapName("TimesNewRoman,Bold", "Times-Bold"); + mapName("TimesNewRoman,BoldItalic", "Times-BoldItalic"); + + // Acrobat treats these fonts as "standard 14" too (at least Acrobat preflight says so) + mapName("Symbol,Italic", "Symbol"); + mapName("Symbol,Bold", "Symbol"); + mapName("Symbol,BoldItalic", "Symbol"); + mapName("Times", "Times-Roman"); + mapName("Times,Italic", "Times-Italic"); + mapName("Times,Bold", "Times-Bold"); + mapName("Times,BoldItalic", "Times-BoldItalic"); + + // PDFBOX-3457: PDF.js file bug864847.pdf + mapName("ArialMT", "Helvetica"); + mapName("Arial-ItalicMT", "Helvetica-Oblique"); + mapName("Arial-BoldMT", "Helvetica-Bold"); + mapName("Arial-BoldItalicMT", "Helvetica-BoldOblique"); + } + private Standard14Fonts() { } - private static final Set STANDARD_14_NAMES = new HashSet(34); - private static final Map STANDARD_14_MAPPING = new HashMap(34); - private static final Map STANDARD14_AFM_MAP = new HashMap(34); - static + /** + * Loads the metrics for the base font specified by name. Metric file must exist in the pdfbox + * jar under /org/apache/pdfbox/resources/afm/ + * + * @param fontName one of the standard 14 font names for which to lod the metrics. + * @throws IOException if no metrics exist for that font. + */ + private static void loadMetrics(String fontName) throws IOException { - try + String resourceName = "/org/apache/pdfbox/resources/afm/" + fontName + ".afm"; + InputStream resourceAsStream = PDType1Font.class.getResourceAsStream(resourceName); + if (resourceAsStream == null) { - addAFM("Courier-Bold"); - addAFM("Courier-BoldOblique"); - addAFM("Courier"); - addAFM("Courier-Oblique"); - addAFM("Helvetica"); - addAFM("Helvetica-Bold"); - addAFM("Helvetica-BoldOblique"); - addAFM("Helvetica-Oblique"); - addAFM("Symbol"); - addAFM("Times-Bold"); - addAFM("Times-BoldItalic"); - addAFM("Times-Italic"); - addAFM("Times-Roman"); - addAFM("ZapfDingbats"); - - // alternative names from Adobe Supplement to the ISO 32000 - addAFM("CourierCourierNew", "Courier"); - addAFM("CourierNew", "Courier"); - addAFM("CourierNew,Italic", "Courier-Oblique"); - addAFM("CourierNew,Bold", "Courier-Bold"); - addAFM("CourierNew,BoldItalic", "Courier-BoldOblique"); - addAFM("Arial", "Helvetica"); - addAFM("Arial,Italic", "Helvetica-Oblique"); - addAFM("Arial,Bold", "Helvetica-Bold"); - addAFM("Arial,BoldItalic", "Helvetica-BoldOblique"); - addAFM("TimesNewRoman", "Times-Roman"); - addAFM("TimesNewRoman,Italic", "Times-Italic"); - addAFM("TimesNewRoman,Bold", "Times-Bold"); - addAFM("TimesNewRoman,BoldItalic", "Times-BoldItalic"); - - // Acrobat treats these fonts as "standard 14" too (at least Acrobat preflight says so) - addAFM("Symbol,Italic", "Symbol"); - addAFM("Symbol,Bold", "Symbol"); - addAFM("Symbol,BoldItalic", "Symbol"); - addAFM("Times", "Times-Roman"); - addAFM("Times,Italic", "Times-Italic"); - addAFM("Times,Bold", "Times-Bold"); - addAFM("Times,BoldItalic", "Times-BoldItalic"); + throw new IOException("resource '" + resourceName + "' not found"); } - catch (IOException e) + InputStream afmStream = new BufferedInputStream(resourceAsStream); + try { - throw new RuntimeException(e); + AFMParser parser = new AFMParser(afmStream); + FontMetrics metric = parser.parse(true); + FONTS.put(fontName, metric); } + finally + { + afmStream.close(); + } } - private static void addAFM(String fontName) throws IOException + /** + * Adds a standard font name to the map of known aliases, to simplify the logic of finding + * font metrics by name. We want a single lookup in the map to find the font both by a base name or + * an alias. + * + * @see #getAFM + * @param baseName the base name of the font; must be one of the 14 standard fonts + */ + private static void mapName(String baseName) { - addAFM(fontName, fontName); + ALIASES.put(baseName, baseName); } - private static void addAFM(String fontName, String afmName) throws IOException + /** + * Adds an alias name for a standard font to the map of known aliases to the map of aliases + * (alias as key, standard name as value). We want a single lookup in the map to find the font + * both by a base name or an alias. + * + * @param alias an alias for the font + * @param baseName the base name of the font; must be one of the 14 standard fonts + */ + private static void mapName(String alias, String baseName) { - STANDARD_14_NAMES.add(fontName); - STANDARD_14_MAPPING.put(fontName, afmName); + ALIASES.put(alias, baseName); + } - if (STANDARD14_AFM_MAP.containsKey(afmName)) + /** + * Returns the metrics for font specified by fontName. Loads the font metrics if not already + * loaded. + * + * @param fontName name of font; either a base name or alias + * @return the font metrics or null if the name is not one of the known names + * @throws IllegalArgumentException if no metrics exist for that font. + */ + public static FontMetrics getAFM(String fontName) + { + String baseName = ALIASES.get(fontName); + if (baseName == null) { - STANDARD14_AFM_MAP.put(fontName, STANDARD14_AFM_MAP.get(afmName)); + return null; } - String resourceName = "org/apache/pdfbox/resources/afm/" + afmName + ".afm"; - URL url = PDType1Font.class.getClassLoader().getResource(resourceName); - if (url != null) + if (FONTS.get(baseName) == null) { - InputStream afmStream = url.openStream(); - try + synchronized (FONTS) { - AFMParser parser = new AFMParser(afmStream); - FontMetrics metric = parser.parse(true); - STANDARD14_AFM_MAP.put(fontName, metric); + if (FONTS.get(baseName) == null) + { + try + { + loadMetrics(baseName); + } + catch (IOException ex) + { + throw new IllegalArgumentException(ex); + } + } } - finally - { - afmStream.close(); - } - } - else - { - throw new IOException(resourceName + " not found"); } - } - /** - * Returns the AFM for the given font. - * @param baseName base name of font - */ - public static FontMetrics getAFM(String baseName) - { - return STANDARD14_AFM_MAP.get(baseName); + return FONTS.get(baseName); } /** - * Returns true if the given font name a Standard 14 font. - * @param baseName base name of font + * Returns true if the given font name is one of the known names, including alias. + * + * @param fontName the name of font, either a base name or alias + * @return true if the name is one of the known names */ - public static boolean containsName(String baseName) + public static boolean containsName(String fontName) { - return STANDARD_14_NAMES.contains(baseName); + return ALIASES.containsKey(fontName); } /** - * Returns the set of Standard 14 font names, including additional names. + * Returns the set of known font names, including aliases. */ public static Set getNames() { - return Collections.unmodifiableSet(STANDARD_14_NAMES); + return Collections.unmodifiableSet(ALIASES.keySet()); } /** - * Returns the name of the actual font which the given font name maps to. - * @param baseName base name of font + * Returns the base name of the font which the given font name maps to. + * + * @param fontName name of font, either a base name or an alias + * @return the base name or null if this is not one of the known names */ - public static String getMappedFontName(String baseName) + public static String getMappedFontName(String fontName) { - return STANDARD_14_MAPPING.get(baseName); + return ALIASES.get(fontName); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java index a7df741a2e9..76386fda19f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.TreeMap; import org.apache.pdfbox.util.Charsets; +import org.apache.pdfbox.util.Hex; /** * Writes ToUnicode Mapping Files. @@ -37,6 +38,11 @@ final class ToUnicodeWriter private final Map cidToUnicode = new TreeMap(); private int wMode; + /** + * To test corner case of PDFBOX-4302. + */ + static final int MAX_ENTRIES_PER_OPERATOR = 100; + /** * Creates a new ToUnicode CMap writer. */ @@ -144,25 +150,28 @@ public void writeTo(OutputStream out) throws IOException dstPrev = text; } - // limit of 100 entries per operator - int batchCount = (int)Math.ceil(srcFrom.size() / 100.0); + // limit entries per operator + int batchCount = (int) Math.ceil(srcFrom.size() / + (double) MAX_ENTRIES_PER_OPERATOR); for (int batch = 0; batch < batchCount; batch++) { - int count = batch == batchCount - 1 ? srcFrom.size() % 100 : 100; + int count = batch == batchCount - 1 ? + srcFrom.size() - MAX_ENTRIES_PER_OPERATOR * batch : + MAX_ENTRIES_PER_OPERATOR; writer.write(count + " beginbfrange\n"); for (int j = 0; j < count; j++) { - int index = batch * 100 + j; + int index = batch * MAX_ENTRIES_PER_OPERATOR + j; writer.write('<'); - writer.write(toHex(srcFrom.get(index))); + writer.write(Hex.getChars(srcFrom.get(index).shortValue())); writer.write("> "); writer.write('<'); - writer.write(toHex(srcTo.get(index))); + writer.write(Hex.getChars(srcTo.get(index).shortValue())); writer.write("> "); - writer.write("<"); - writer.write(stringToHex(dstString.get(index))); + writer.write('<'); + writer.write(Hex.getCharsUTF16BE(dstString.get(index))); writer.write(">\n"); } writeLine(writer, "endbfrange\n"); @@ -182,20 +191,4 @@ private void writeLine(BufferedWriter writer, String text) throws IOException writer.write(text); writer.write('\n'); } - - private String toHex(int num) - { - return String.format("%04X", num); - } - - private String stringToHex(String text) - { - // use of non-BMP code points requires PDF 1.5 or later, otherwise we're limited to UCS-2 - StringBuilder sb = new StringBuilder(); - for (byte b : text.getBytes(Charsets.UTF_16BE)) - { - sb.append(String.format("%02X", b)); - } - return sb.toString(); - } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/TrueTypeEmbedder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/TrueTypeEmbedder.java index 54a8a48e2cd..c502f9c159b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/TrueTypeEmbedder.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/TrueTypeEmbedder.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.pdmodel.font; +import java.awt.geom.GeneralPath; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -26,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.fontbox.ttf.CmapLookup; import org.apache.fontbox.ttf.CmapSubtable; import org.apache.fontbox.ttf.HeaderTable; import org.apache.fontbox.ttf.HorizontalHeaderTable; @@ -50,31 +52,24 @@ abstract class TrueTypeEmbedder implements Subsetter { private static final int ITALIC = 1; - private static final int OBLIQUE = 256; + private static final int OBLIQUE = 512; private static final String BASE25 = "BCDEFGHIJKLMNOPQRSTUVWXYZ"; private final PDDocument document; protected TrueTypeFont ttf; protected PDFontDescriptor fontDescriptor; - protected final CmapSubtable cmap; - private final Set subsetCodePoints = new HashSet(); - private final boolean embedSubset; - + /** - * Creates a new TrueType font for embedding. + * For API backwards compatibility. + * + * @deprecated */ - TrueTypeEmbedder(PDDocument document, COSDictionary dict, InputStream ttfStream, - boolean embedSubset) throws IOException - { - this.document = document; - this.embedSubset = embedSubset; - - buildFontFile2(ttfStream); - dict.setName(COSName.BASE_FONT, ttf.getName()); + @Deprecated + protected final CmapSubtable cmap; - // choose a Unicode "cmap" - cmap = ttf.getUnicodeCmap(); - } + protected final CmapLookup cmapLookup; + private final Set subsetCodePoints = new HashSet(); + private final boolean embedSubset; /** * Creates a new TrueType font for embedding. @@ -87,16 +82,48 @@ abstract class TrueTypeEmbedder implements Subsetter this.ttf = ttf; fontDescriptor = createFontDescriptor(ttf); + if (!isEmbeddingPermitted(ttf)) + { + throw new IOException("This font does not permit embedding"); + } + + if (!embedSubset) + { + // full embedding + + // TrueType collections are not supported + InputStream is = ttf.getOriginalData(); + byte[] b = new byte[4]; + is.mark(b.length); + if (is.read(b) == b.length && new String(b).equals("ttcf")) + { + is.close(); + throw new IOException("Full embedding of TrueType font collections not supported"); + } + if (is.markSupported()) + { + is.reset(); + } + else + { + is.close(); + is = ttf.getOriginalData(); + } + PDStream stream = new PDStream(document, is, COSName.FLATE_DECODE); + stream.getCOSObject().setLong(COSName.LENGTH1, ttf.getOriginalDataSize()); + fontDescriptor.setFontFile2(stream); + } + dict.setName(COSName.BASE_FONT, ttf.getName()); // choose a Unicode "cmap" cmap = ttf.getUnicodeCmap(); + cmapLookup = ttf.getUnicodeCmapLookup(); } public void buildFontFile2(InputStream ttfStream) throws IOException { PDStream stream = new PDStream(document, ttfStream, COSName.FLATE_DECODE); - stream.getCOSObject().setInt(COSName.LENGTH1, stream.toByteArray().length); // as the stream was closed within the PDStream constructor, we have to recreate it InputStream input = null; @@ -117,27 +144,26 @@ public void buildFontFile2(InputStream ttfStream) throws IOException { IOUtils.closeQuietly(input); } - + stream.getCOSObject().setLong(COSName.LENGTH1, ttf.getOriginalDataSize()); fontDescriptor.setFontFile2(stream); } /** * Returns true if the fsType in the OS/2 table permits embedding. */ - private boolean isEmbeddingPermitted(TrueTypeFont ttf) throws IOException + boolean isEmbeddingPermitted(TrueTypeFont ttf) throws IOException { if (ttf.getOS2Windows() != null) { int fsType = ttf.getOS2Windows().getFsType(); - int exclusive = fsType & 0x8; // bits 0-3 are a set of exclusive bits - - if ((exclusive & OS2WindowsMetricsTable.FSTYPE_RESTRICTED) == - OS2WindowsMetricsTable.FSTYPE_RESTRICTED) + int maskedFsType = fsType & 0x000F; + // PDFBOX-5191: don't check the bit because permissions are exclusive + if (maskedFsType == OS2WindowsMetricsTable.FSTYPE_RESTRICTED) { // restricted License embedding return false; } - else if ((exclusive & OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) == + else if ((fsType & OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) == OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) { // bitmap embedding only @@ -173,15 +199,22 @@ private PDFontDescriptor createFontDescriptor(TrueTypeFont ttf) throws IOExcepti fd.setFontName(ttf.getName()); OS2WindowsMetricsTable os2 = ttf.getOS2Windows(); + if (os2 == null) + { + throw new IOException("os2 table is missing in font " + ttf.getName()); + } PostScriptTable post = ttf.getPostScript(); + if (post == null) + { + throw new IOException("post table is missing in font " + ttf.getName()); + } // Flags fd.setFixedPitch(post.getIsFixedPitch() > 0 || ttf.getHorizontalHeader().getNumberOfHMetrics() == 1); int fsSelection = os2.getFsSelection(); - fd.setItalic((fsSelection & ITALIC) == fsSelection || - (fsSelection & OBLIQUE) == fsSelection); + fd.setItalic(((fsSelection & (ITALIC | OBLIQUE)) != 0)); switch (os2.getFamilyClass()) { @@ -195,6 +228,8 @@ private PDFontDescriptor createFontDescriptor(TrueTypeFont ttf) throws IOExcepti case OS2WindowsMetricsTable.FAMILY_CLASS_SCRIPTS: fd.setScript(true); break; + default: + break; } fd.setFontWeight(os2.getWeightClass()); @@ -228,11 +263,26 @@ private PDFontDescriptor createFontDescriptor(TrueTypeFont ttf) throws IOExcepti } else { - // estimate by summing the typographical +ve ascender and -ve descender - fd.setCapHeight((os2.getTypoAscender() + os2.getTypoDescender()) * scaling); - - // estimate by halving the typographical ascender - fd.setXHeight(os2.getTypoAscender() / 2.0f * scaling); + GeneralPath capHPath = ttf.getPath("H"); + if (capHPath != null) + { + fd.setCapHeight(Math.round(capHPath.getBounds2D().getMaxY()) * scaling); + } + else + { + // estimate by summing the typographical +ve ascender and -ve descender + fd.setCapHeight((os2.getTypoAscender() + os2.getTypoDescender()) * scaling); + } + GeneralPath xPath = ttf.getPath("x"); + if (xPath != null) + { + fd.setXHeight(Math.round(xPath.getBounds2D().getMaxY()) * scaling); + } + else + { + // estimate by halving the typographical ascender + fd.setXHeight(os2.getTypoAscender() / 2.0f * scaling); + } } // StemV - there's no true TTF equivalent of this, so we estimate it @@ -243,7 +293,10 @@ private PDFontDescriptor createFontDescriptor(TrueTypeFont ttf) throws IOExcepti /** * Returns the FontBox font. + * + * @deprecated */ + @Deprecated public TrueTypeFont getTrueTypeFont() { return ttf; @@ -291,7 +344,7 @@ public void subset() throws IOException tables.add("gasp"); // set the GIDs to subset - TTFSubsetter subsetter = new TTFSubsetter(getTrueTypeFont(), tables); + TTFSubsetter subsetter = new TTFSubsetter(ttf, tables); subsetter.addAll(subsetCodePoints); // calculate deterministic tag based on the chosen subset diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/DictionaryEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/DictionaryEncoding.java index a56f24d2cbe..856a13c3ac9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/DictionaryEncoding.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/DictionaryEncoding.java @@ -38,6 +38,9 @@ public class DictionaryEncoding extends Encoding /** * Creates a new DictionaryEncoding for embedding. + * + * @param baseEncoding + * @param differences */ public DictionaryEncoding(COSName baseEncoding, COSArray differences) { @@ -88,10 +91,11 @@ public DictionaryEncoding(COSDictionary fontEncoding, boolean isNonSymbolic, Enc encoding = fontEncoding; Encoding base = null; - if (encoding.containsKey(COSName.BASE_ENCODING)) + boolean hasBaseEncoding = encoding.containsKey(COSName.BASE_ENCODING); + if (hasBaseEncoding) { COSName name = encoding.getCOSName(COSName.BASE_ENCODING); - base = Encoding.getInstance(name); // may be null + base = Encoding.getInstance(name); // null when the name is invalid } if (base == null) @@ -110,6 +114,8 @@ public DictionaryEncoding(COSDictionary fontEncoding, boolean isNonSymbolic, Enc } else { + // triggering this error indicates a bug in PDFBox. Every font should always have + // a built-in encoding, if not, we parsed it incorrectly. throw new IllegalArgumentException("Symbolic fonts must have a built-in " + "encoding"); } @@ -125,11 +131,16 @@ public DictionaryEncoding(COSDictionary fontEncoding, boolean isNonSymbolic, Enc private void applyDifferences() { // now replace with the differences - COSArray differences = (COSArray)encoding.getDictionaryObject( COSName.DIFFERENCES ); + COSBase base = encoding.getDictionaryObject(COSName.DIFFERENCES); + if (!(base instanceof COSArray)) + { + return; + } + COSArray diffArray = (COSArray) base; int currentIndex = -1; - for( int i=0; differences != null && i getDifferences() return differences; } - /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. - */ + @Override public COSBase getCOSObject() { return encoding; @@ -173,6 +180,11 @@ public COSBase getCOSObject() @Override public String getEncodingName() { + if (baseEncoding == null) + { + // In type 3 the /Differences array shall specify the complete character encoding + return "differences"; + } return baseEncoding.getEncodingName() + " with differences"; } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/Encoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/Encoding.java index ea7dce2bcdb..b88589ef71c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/Encoding.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/Encoding.java @@ -18,9 +18,8 @@ import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; -import java.util.Set; + import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.COSObjectable; @@ -51,20 +50,30 @@ else if (COSName.MAC_ROMAN_ENCODING.equals(name)) { return MacRomanEncoding.INSTANCE; } + else if (COSName.MAC_EXPERT_ENCODING.equals(name)) + { + return MacExpertEncoding.INSTANCE; + } else { return null; } } + /** + * code-to-name map. Derived classes should not modify the map after class construction. + */ protected final Map codeToName = new HashMap(250); + + /** + * name-to-code map. Derived classes should not modify the map after class construction. + */ protected final Map inverted = new HashMap(250); - private Set names; /** - * Returns an unmodifiable view of the code -> name mapping. + * Returns an unmodifiable view of the code -> name mapping. * - * @return the code -> name map + * @return the code -> name map */ public Map getCodeToNameMap() { @@ -72,10 +81,10 @@ public Map getCodeToNameMap() } /** - * Returns an unmodifiable view of the name -> code mapping. More than one name may map to + * Returns an unmodifiable view of the name -> code mapping. More than one name may map to * the same code. * - * @return the name -> code map + * @return the name -> code map */ public Map getNameToCodeMap() { @@ -83,7 +92,10 @@ public Map getNameToCodeMap() } /** - * This will add a character encoding. + * This will add a character encoding. An already existing mapping is preserved when creating + * the reverse mapping. Should only be used during construction of the class. + * + * @see #overwrite(int, String) * * @param code character code * @param name PostScript glyph name @@ -91,7 +103,35 @@ public Map getNameToCodeMap() protected void add(int code, String name) { codeToName.put(code, name); + if (!inverted.containsKey(name)) + { + inverted.put(name, code); + } + } + + /** + * This will add a character encoding. An already existing mapping is overwritten when creating + * the reverse mapping. + * + * @see Encoding#add(int, String) + * + * @param code character code + * @param name PostScript glyph name + */ + protected void overwrite(int code, String name) + { + // remove existing reverse mapping first + String oldName = codeToName.get(code); + if (oldName != null) + { + Integer oldCode = inverted.get(oldName); + if (oldCode != null && oldCode == code) + { + inverted.remove(oldName); + } + } inverted.put(name, code); + codeToName.put(code, name); } /** @@ -101,14 +141,7 @@ protected void add(int code, String name) */ public boolean contains(String name) { - // we have to wait until all add() calls are done before building the name cache - // otherwise /Differences won't be accounted for - if (names == null) - { - names = new HashSet(codeToName.size()); - names.addAll(codeToName.values()); - } - return names.contains(name); + return inverted.containsKey(name); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/GlyphList.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/GlyphList.java index 1c9a950a8c6..4613f9e00ce 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/GlyphList.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/GlyphList.java @@ -19,12 +19,14 @@ import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.io.IOUtils; /** * PostScript glyph list, maps glyph names to sequences of Unicode characters. @@ -45,16 +47,25 @@ public final class GlyphList */ private static GlyphList load(String filename, int numberOfEntries) { - ClassLoader loader = GlyphList.class.getClassLoader(); - String path = "org/apache/pdfbox/resources/glyphlist/"; + String path = "/org/apache/pdfbox/resources/glyphlist/" + filename; + InputStream resourceAsStream = null; try { - return new GlyphList(loader.getResourceAsStream(path + filename), numberOfEntries); + resourceAsStream = GlyphList.class.getResourceAsStream(path); + if (resourceAsStream == null) + { + throw new IOException("GlyphList '" + path + "' not found"); + } + return new GlyphList(resourceAsStream, numberOfEntries); } catch (IOException e) { throw new RuntimeException(e); } + finally + { + IOUtils.closeQuietly(resourceAsStream); + } } static @@ -96,7 +107,7 @@ public static GlyphList getZapfDingbats() private final Map unicodeToName; // additional read/write cache for uniXXXX names - private final Map uniNameToUnicodeCache = new HashMap(); + private final Map uniNameToUnicodeCache = new ConcurrentHashMap(); /** * Creates a new GlyphList from a glyph list file. @@ -163,7 +174,15 @@ private void loadList(InputStream input) throws IOException nameToUnicode.put(name, string); // reverse mapping - if (!unicodeToName.containsKey(string)) + // PDFBOX-3884: take the various standard encodings as canonical, + // e.g. tilde over ilde + final boolean forceOverride = + WinAnsiEncoding.INSTANCE.contains(name) || + MacRomanEncoding.INSTANCE.contains(name) || + MacExpertEncoding.INSTANCE.contains(name) || + SymbolEncoding.INSTANCE.contains(name) || + ZapfDingbatsEncoding.INSTANCE.contains(name); + if (!unicodeToName.containsKey(string) || forceOverride) { unicodeToName.put(string, name); } @@ -282,7 +301,11 @@ else if (name.startsWith("u") && name.length() == 5) LOG.warn("Not a number in Unicode character name: " + name); } } - uniNameToUnicodeCache.put(name, unicode); + if (unicode != null) + { + // null value not allowed in ConcurrentHashMap + uniNameToUnicodeCache.put(name, unicode); + } } return unicode; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacExpertEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacExpertEncoding.java new file mode 100644 index 00000000000..1fa76f05a46 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacExpertEncoding.java @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.font.encoding; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; + +/** + * This is an interface to a text encoder. + */ +public class MacExpertEncoding extends Encoding +{ + + private static final int CHAR_CODE = 0; + private static final int CHAR_NAME = 1; + + /** + * Table of octal character codes and their corresponding names. + */ + private static final Object[][] MAC_EXPERT_ENCODING_TABLE = { + {0276, "AEsmall"}, + {0207, "Aacutesmall"}, + {0211, "Acircumflexsmall"}, + {047, "Acutesmall"}, + {0212, "Adieresissmall"}, + {0210, "Agravesmall"}, + {0214, "Aringsmall"}, + {0141, "Asmall"}, + {0213, "Atildesmall"}, + {0363, "Brevesmall"}, + {0142, "Bsmall"}, + {0256, "Caronsmall"}, + {0215, "Ccedillasmall"}, + {0311, "Cedillasmall"}, + {0136, "Circumflexsmall"}, + {0143, "Csmall"}, + {0254, "Dieresissmall"}, + {0372, "Dotaccentsmall"}, + {0144, "Dsmall"}, + {0216, "Eacutesmall"}, + {0220, "Ecircumflexsmall"}, + {0221, "Edieresissmall"}, + {0217, "Egravesmall"}, + {0145, "Esmall"}, + {0104, "Ethsmall"}, + {0146, "Fsmall"}, + {0140, "Gravesmall"}, + {0147, "Gsmall"}, + {0150, "Hsmall"}, + {042, "Hungarumlautsmall"}, + {0222, "Iacutesmall"}, + {0224, "Icircumflexsmall"}, + {0225, "Idieresissmall"}, + {0223, "Igravesmall"}, + {0151, "Ismall"}, + {0152, "Jsmall"}, + {0153, "Ksmall"}, + {0302, "Lslashsmall"}, + {0154, "Lsmall"}, + {0364, "Macronsmall"}, + {0155, "Msmall"}, + {0156, "Nsmall"}, + {0226, "Ntildesmall"}, + {0317, "OEsmall"}, + {0227, "Oacutesmall"}, + {0231, "Ocircumflexsmall"}, + {0232, "Odieresissmall"}, + {0362, "Ogoneksmall"}, + {0230, "Ogravesmall"}, + {0277, "Oslashsmall"}, + {0157, "Osmall"}, + {0233, "Otildesmall"}, + {0160, "Psmall"}, + {0161, "Qsmall"}, + {0373, "Ringsmall"}, + {0162, "Rsmall"}, + {0247, "Scaronsmall"}, + {0163, "Ssmall"}, + {0271, "Thornsmall"}, + {0176, "Tildesmall"}, + {0164, "Tsmall"}, + {0234, "Uacutesmall"}, + {0236, "Ucircumflexsmall"}, + {0237, "Udieresissmall"}, + {0235, "Ugravesmall"}, + {0165, "Usmall"}, + {0166, "Vsmall"}, + {0167, "Wsmall"}, + {0170, "Xsmall"}, + {0264, "Yacutesmall"}, + {0330, "Ydieresissmall"}, + {0171, "Ysmall"}, + {0275, "Zcaronsmall"}, + {0172, "Zsmall"}, + {046, "ampersandsmall"}, + {0201, "asuperior"}, + {0365, "bsuperior"}, + {0251, "centinferior"}, + {043, "centoldstyle"}, + {0202, "centsuperior"}, + {072, "colon"}, + {0173, "colonmonetary"}, + {054, "comma"}, + {0262, "commainferior"}, + {0370, "commasuperior"}, + {0266, "dollarinferior"}, + {044, "dollaroldstyle"}, + {045, "dollarsuperior"}, + {0353, "dsuperior"}, + {0245, "eightinferior"}, + {070, "eightoldstyle"}, + {0241, "eightsuperior"}, + {0344, "esuperior"}, + {0326, "exclamdownsmall"}, + {041, "exclamsmall"}, + {0126, "ff"}, + {0131, "ffi"}, + {0132, "ffl"}, + {0127, "fi"}, + {0320, "figuredash"}, + {0114, "fiveeighths"}, + {0260, "fiveinferior"}, + {065, "fiveoldstyle"}, + {0336, "fivesuperior"}, + {0130, "fl"}, + {0242, "fourinferior"}, + {064, "fouroldstyle"}, + {0335, "foursuperior"}, + {057, "fraction"}, + {055, "hyphen"}, + {0137, "hypheninferior"}, + {0321, "hyphensuperior"}, + {0351, "isuperior"}, + {0361, "lsuperior"}, + {0367, "msuperior"}, + {0273, "nineinferior"}, + {071, "nineoldstyle"}, + {0341, "ninesuperior"}, + {0366, "nsuperior"}, + {053, "onedotenleader"}, + {0112, "oneeighth"}, + {0174, "onefitted"}, + {0110, "onehalf"}, + {0301, "oneinferior"}, + {061, "oneoldstyle"}, + {0107, "onequarter"}, + {0332, "onesuperior"}, + {0116, "onethird"}, + {0257, "osuperior"}, + {0133, "parenleftinferior"}, + {050, "parenleftsuperior"}, + {0135, "parenrightinferior"}, + {051, "parenrightsuperior"}, + {056, "period"}, + {0263, "periodinferior"}, + {0371, "periodsuperior"}, + {0300, "questiondownsmall"}, + {077, "questionsmall"}, + {0345, "rsuperior"}, + {0175, "rupiah"}, + {073, "semicolon"}, + {0115, "seveneighths"}, + {0246, "seveninferior"}, + {067, "sevenoldstyle"}, + {0340, "sevensuperior"}, + {0244, "sixinferior"}, + {066, "sixoldstyle"}, + {0337, "sixsuperior"}, + {040, "space"}, + {0352, "ssuperior"}, + {0113, "threeeighths"}, + {0243, "threeinferior"}, + {063, "threeoldstyle"}, + {0111, "threequarters"}, + {075, "threequartersemdash"}, + {0334, "threesuperior"}, + {0346, "tsuperior"}, + {052, "twodotenleader"}, + {0252, "twoinferior"}, + {062, "twooldstyle"}, + {0333, "twosuperior"}, + {0117, "twothirds"}, + {0274, "zeroinferior"}, + {060, "zerooldstyle"}, + {0342, "zerosuperior"} + }; + + /** + * Singleton instance of this class. + */ + public static final MacExpertEncoding INSTANCE = new MacExpertEncoding(); + + /** + * Constructor. + */ + public MacExpertEncoding() + { + for (Object[] encodingEntry : MAC_EXPERT_ENCODING_TABLE) + { + add((Integer) encodingEntry[CHAR_CODE], encodingEntry[CHAR_NAME].toString()); + } + } + + @Override + public COSBase getCOSObject() + { + return COSName.MAC_EXPERT_ENCODING; + } + + @Override + public String getEncodingName() + { + return "MacExpertEncoding"; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacOSRomanEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacOSRomanEncoding.java index fb467f845be..b602a8e8708 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacOSRomanEncoding.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacOSRomanEncoding.java @@ -73,11 +73,7 @@ public MacOSRomanEncoding() } - /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. - */ + @Override public COSBase getCOSObject() { return null; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacRomanEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacRomanEncoding.java index aac797c415b..33c3976cabe 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacRomanEncoding.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/MacRomanEncoding.java @@ -242,7 +242,7 @@ public class MacRomanEncoding extends Encoding {0172, "z"}, {060, "zero"}, // adding an additional mapping as defined in Appendix D of the pdf spec - {0312, "space"} + {0312, "nbspace"} }; /** @@ -263,11 +263,7 @@ public MacRomanEncoding() } } - /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. - */ + @Override public COSBase getCOSObject() { return COSName.MAC_ROMAN_ENCODING; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/StandardEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/StandardEncoding.java index d3d676ff564..ab49f5b6279 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/StandardEncoding.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/StandardEncoding.java @@ -204,11 +204,7 @@ public StandardEncoding() } } - /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. - */ + @Override public COSBase getCOSObject() { return COSName.STANDARD_ENCODING; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/SymbolEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/SymbolEncoding.java new file mode 100644 index 00000000000..779b32bf454 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/SymbolEncoding.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.font.encoding; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; + +/** + * This is an interface to a text encoder. + */ +public class SymbolEncoding extends Encoding +{ + + private static final int CHAR_CODE = 0; + private static final int CHAR_NAME = 1; + + /** + * Table of octal character codes and their corresponding names. + */ + private static final Object[][] SYMBOL_ENCODING_TABLE = { + {0101, "Alpha"}, + {0102, "Beta"}, + {0103, "Chi"}, + {0104, "Delta"}, + {0105, "Epsilon"}, + {0110, "Eta"}, + {0240, "Euro"}, + {0107, "Gamma"}, + {0301, "Ifraktur"}, + {0111, "Iota"}, + {0113, "Kappa"}, + {0114, "Lambda"}, + {0115, "Mu"}, + {0116, "Nu"}, + {0127, "Omega"}, + {0117, "Omicron"}, + {0106, "Phi"}, + {0120, "Pi"}, + {0131, "Psi"}, + {0302, "Rfraktur"}, + {0122, "Rho"}, + {0123, "Sigma"}, + {0124, "Tau"}, + {0121, "Theta"}, + {0125, "Upsilon"}, + {0241, "Upsilon1"}, + {0130, "Xi"}, + {0132, "Zeta"}, + {0300, "aleph"}, + {0141, "alpha"}, + {0046, "ampersand"}, + {0320, "angle"}, + {0341, "angleleft"}, + {0361, "angleright"}, + {0273, "approxequal"}, + {0253, "arrowboth"}, + {0333, "arrowdblboth"}, + {0337, "arrowdbldown"}, + {0334, "arrowdblleft"}, + {0336, "arrowdblright"}, + {0335, "arrowdblup"}, + {0257, "arrowdown"}, + {0276, "arrowhorizex"}, + {0254, "arrowleft"}, + {0256, "arrowright"}, + {0255, "arrowup"}, + {0275, "arrowvertex"}, + {0052, "asteriskmath"}, + {0174, "bar"}, + {0142, "beta"}, + {0173, "braceleft"}, + {0175, "braceright"}, + {0354, "bracelefttp"}, + {0355, "braceleftmid"}, + {0356, "braceleftbt"}, + {0374, "bracerighttp"}, + {0375, "bracerightmid"}, + {0376, "bracerightbt"}, + {0357, "braceex"}, + {0133, "bracketleft"}, + {0135, "bracketright"}, + {0351, "bracketlefttp"}, + {0352, "bracketleftex"}, + {0353, "bracketleftbt"}, + {0371, "bracketrighttp"}, + {0372, "bracketrightex"}, + {0373, "bracketrightbt"}, + {0267, "bullet"}, + {0277, "carriagereturn"}, + {0143, "chi"}, + {0304, "circlemultiply"}, + {0305, "circleplus"}, + {0247, "club"}, + {0072, "colon"}, + {0054, "comma"}, + {0100, "congruent"}, + {0343, "copyrightsans"}, + {0323, "copyrightserif"}, + {0260, "degree"}, + {0144, "delta"}, + {0250, "diamond"}, + {0270, "divide"}, + {0327, "dotmath"}, + {0070, "eight"}, + {0316, "element"}, + {0274, "ellipsis"}, + {0306, "emptyset"}, + {0145, "epsilon"}, + {0075, "equal"}, + {0272, "equivalence"}, + {0150, "eta"}, + {0041, "exclam"}, + {0044, "existential"}, + {0065, "five"}, + {0246, "florin"}, + {0064, "four"}, + {0244, "fraction"}, + {0147, "gamma"}, + {0321, "gradient"}, + {0076, "greater"}, + {0263, "greaterequal"}, + {0251, "heart"}, + {0245, "infinity"}, + {0362, "integral"}, + {0363, "integraltp"}, + {0364, "integralex"}, + {0365, "integralbt"}, + {0307, "intersection"}, + {0151, "iota"}, + {0153, "kappa"}, + {0154, "lambda"}, + {0074, "less"}, + {0243, "lessequal"}, + {0331, "logicaland"}, + {0330, "logicalnot"}, + {0332, "logicalor"}, + {0340, "lozenge"}, + {0055, "minus"}, + {0242, "minute"}, + {0155, "mu"}, + {0264, "multiply"}, + {0071, "nine"}, + {0317, "notelement"}, + {0271, "notequal"}, + {0313, "notsubset"}, + {0156, "nu"}, + {0043, "numbersign"}, + {0167, "omega"}, + {0166, "omega1"}, + {0157, "omicron"}, + {0061, "one"}, + {0050, "parenleft"}, + {0051, "parenright"}, + {0346, "parenlefttp"}, + {0347, "parenleftex"}, + {0350, "parenleftbt"}, + {0366, "parenrighttp"}, + {0367, "parenrightex"}, + {0370, "parenrightbt"}, + {0266, "partialdiff"}, + {0045, "percent"}, + {0056, "period"}, + {0136, "perpendicular"}, + {0146, "phi"}, + {0152, "phi1"}, + {0160, "pi"}, + {0053, "plus"}, + {0261, "plusminus"}, + {0325, "product"}, + {0314, "propersubset"}, + {0311, "propersuperset"}, + {0265, "proportional"}, + {0171, "psi"}, + {0077, "question"}, + {0326, "radical"}, + {0140, "radicalex"}, + {0315, "reflexsubset"}, + {0312, "reflexsuperset"}, + {0342, "registersans"}, + {0322, "registerserif"}, + {0162, "rho"}, + {0262, "second"}, + {0073, "semicolon"}, + {0067, "seven"}, + {0163, "sigma"}, + {0126, "sigma1"}, + {0176, "similar"}, + {0066, "six"}, + {0057, "slash"}, + {0040, "space"}, + {0252, "spade"}, + {0047, "suchthat"}, + {0345, "summation"}, + {0164, "tau"}, + {0134, "therefore"}, + {0161, "theta"}, + {0112, "theta1"}, + {0063, "three"}, + {0344, "trademarksans"}, + {0324, "trademarkserif"}, + {0062, "two"}, + {0137, "underscore"}, + {0310, "union"}, + {0042, "universal"}, + {0165, "upsilon"}, + {0303, "weierstrass"}, + {0170, "xi"}, + {0060, "zero"}, + {0172, "zeta"} + }; + + /** + * Singleton instance of this class. + */ + public static final SymbolEncoding INSTANCE = new SymbolEncoding(); + + /** + * Constructor. + */ + public SymbolEncoding() + { + for (Object[] encodingEntry : SYMBOL_ENCODING_TABLE) + { + add((Integer) encodingEntry[CHAR_CODE], encodingEntry[CHAR_NAME].toString()); + } + } + + @Override + public COSBase getCOSObject() + { + return COSName.getPDFName("SymbolEncoding"); + } + + @Override + public String getEncodingName() + { + return "SymbolEncoding"; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java index 41a2eb4999a..73754e1a261 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java @@ -251,8 +251,8 @@ public class WinAnsiEncoding extends Encoding {0236, "zcaron"}, {060, "zero"}, // adding some additional mappings as defined in Appendix D of the pdf spec - {0240, "space"}, - {0255, "hyphen"} + {0240, "nbspace"}, + {0255, "sfthyphen"} }; /** @@ -283,11 +283,7 @@ public WinAnsiEncoding() } } - /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. - */ + @Override public COSBase getCOSObject() { return COSName.WIN_ANSI_ENCODING; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/ZapfDingbatsEncoding.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/ZapfDingbatsEncoding.java new file mode 100644 index 00000000000..d7ec2953a4b --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/ZapfDingbatsEncoding.java @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.font.encoding; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; + +/** + * This is an interface to a text encoder. + */ +public class ZapfDingbatsEncoding extends Encoding +{ + + private static final int CHAR_CODE = 0; + private static final int CHAR_NAME = 1; + + /** + * Table of octal character codes and their corresponding names. + */ + private static final Object[][] ZAPFDINGBATS_ENCODING_TABLE = { + {040, "space"}, + {041, "a1"}, + {042, "a2"}, + {043, "a202"}, + {044, "a3"}, + {045, "a4"}, + {046, "a5"}, + {047, "a119"}, + {050, "a118"}, + {051, "a117"}, + {052, "a11"}, + {053, "a12"}, + {054, "a13"}, + {055, "a14"}, + {056, "a15"}, + {057, "a16"}, + {060, "a105"}, + {061, "a17"}, + {062, "a18"}, + {063, "a19"}, + {064, "a20"}, + {065, "a21"}, + {066, "a22"}, + {067, "a23"}, + {070, "a24"}, + {071, "a25"}, + {072, "a26"}, + {073, "a27"}, + {074, "a28"}, + {075, "a6"}, + {076, "a7"}, + {077, "a8"}, + {0100, "a9"}, + {0101, "a10"}, + {0102, "a29"}, + {0103, "a30"}, + {0104, "a31"}, + {0105, "a32"}, + {0106, "a33"}, + {0107, "a34"}, + {0110, "a35"}, + {0111, "a36"}, + {0112, "a37"}, + {0113, "a38"}, + {0114, "a39"}, + {0115, "a40"}, + {0116, "a41"}, + {0117, "a42"}, + {0120, "a43"}, + {0121, "a44"}, + {0122, "a45"}, + {0123, "a46"}, + {0124, "a47"}, + {0125, "a48"}, + {0126, "a49"}, + {0127, "a50"}, + {0130, "a51"}, + {0131, "a52"}, + {0132, "a53"}, + {0133, "a54"}, + {0134, "a55"}, + {0135, "a56"}, + {0136, "a57"}, + {0137, "a58"}, + {0140, "a59"}, + {0141, "a60"}, + {0142, "a61"}, + {0143, "a62"}, + {0144, "a63"}, + {0145, "a64"}, + {0146, "a65"}, + {0147, "a66"}, + {0150, "a67"}, + {0151, "a68"}, + {0152, "a69"}, + {0153, "a70"}, + {0154, "a71"}, + {0155, "a72"}, + {0156, "a73"}, + {0157, "a74"}, + {0160, "a203"}, + {0161, "a75"}, + {0162, "a204"}, + {0163, "a76"}, + {0164, "a77"}, + {0165, "a78"}, + {0166, "a79"}, + {0167, "a81"}, + {0170, "a82"}, + {0171, "a83"}, + {0172, "a84"}, + {0173, "a97"}, + {0174, "a98"}, + {0175, "a99"}, + {0176, "a100"}, + {0241, "a101"}, + {0242, "a102"}, + {0243, "a103"}, + {0244, "a104"}, + {0245, "a106"}, + {0246, "a107"}, + {0247, "a108"}, + {0250, "a112"}, + {0251, "a111"}, + {0252, "a110"}, + {0253, "a109"}, + {0254, "a120"}, + {0255, "a121"}, + {0256, "a122"}, + {0257, "a123"}, + {0260, "a124"}, + {0261, "a125"}, + {0262, "a126"}, + {0263, "a127"}, + {0264, "a128"}, + {0265, "a129"}, + {0266, "a130"}, + {0267, "a131"}, + {0270, "a132"}, + {0271, "a133"}, + {0272, "a134"}, + {0273, "a135"}, + {0274, "a136"}, + {0275, "a137"}, + {0276, "a138"}, + {0277, "a139"}, + {0300, "a140"}, + {0301, "a141"}, + {0302, "a142"}, + {0303, "a143"}, + {0304, "a144"}, + {0305, "a145"}, + {0306, "a146"}, + {0307, "a147"}, + {0310, "a148"}, + {0311, "a149"}, + {0312, "a150"}, + {0313, "a151"}, + {0314, "a152"}, + {0315, "a153"}, + {0316, "a154"}, + {0317, "a155"}, + {0320, "a156"}, + {0321, "a157"}, + {0322, "a158"}, + {0323, "a159"}, + {0324, "a160"}, + {0325, "a161"}, + {0326, "a163"}, + {0327, "a164"}, + {0330, "a196"}, + {0331, "a165"}, + {0332, "a192"}, + {0333, "a166"}, + {0334, "a167"}, + {0335, "a168"}, + {0336, "a169"}, + {0337, "a170"}, + {0340, "a171"}, + {0341, "a172"}, + {0342, "a173"}, + {0343, "a162"}, + {0344, "a174"}, + {0345, "a175"}, + {0346, "a176"}, + {0347, "a177"}, + {0350, "a178"}, + {0351, "a179"}, + {0352, "a193"}, + {0353, "a180"}, + {0354, "a199"}, + {0355, "a181"}, + {0356, "a200"}, + {0357, "a182"}, + {0361, "a201"}, + {0362, "a183"}, + {0363, "a184"}, + {0364, "a197"}, + {0365, "a185"}, + {0366, "a194"}, + {0367, "a198"}, + {0370, "a186"}, + {0371, "a195"}, + {0372, "a187"}, + {0373, "a188"}, + {0374, "a189"}, + {0375, "a190"}, + {0376, "a191"} + }; + + /** + * Singleton instance of this class. + */ + public static final ZapfDingbatsEncoding INSTANCE = new ZapfDingbatsEncoding(); + + /** + * Constructor. + */ + public ZapfDingbatsEncoding() + { + for (Object[] encodingEntry : ZAPFDINGBATS_ENCODING_TABLE) + { + add((Integer) encodingEntry[CHAR_CODE], encodingEntry[CHAR_NAME].toString()); + } + } + + @Override + public COSBase getCOSObject() + { + return COSName.getPDFName("ZapfDingbatsEncoding"); + } + + @Override + public String getEncodingName() + { + return "ZapfDingbatsEncoding"; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/package.html index 15f426055de..a5c46b3b4cb 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/PDLineDashPattern.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/PDLineDashPattern.java index d36cb15da71..f6c92c0c634 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/PDLineDashPattern.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/PDLineDashPattern.java @@ -20,7 +20,6 @@ import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSInteger; -import org.apache.pdfbox.pdmodel.common.COSArrayList; import org.apache.pdfbox.pdmodel.common.COSObjectable; import java.util.Arrays; @@ -60,7 +59,9 @@ public PDLineDashPattern(COSArray array, int phase) public COSBase getCOSObject() { COSArray cos = new COSArray(); - cos.add(COSArrayList.converterToCOSArray(Arrays.asList(array))); + COSArray patternArray = new COSArray(); + patternArray.setFloatArray(array); + cos.add(patternArray); cos.add(COSInteger.get(phase)); return cos; } @@ -77,10 +78,16 @@ public int getPhase() /** * Returns the dash array. - * @return the dash array + * @return the dash array, never null. */ public float[] getDashArray() { return array.clone(); } + + @Override + public String toString() + { + return "PDLineDashPattern{array=" + Arrays.toString(array) + ", phase=" + phase + "}"; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendComposite.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendComposite.java index e8d33c60b0c..3b4af50dbc3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendComposite.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendComposite.java @@ -1,225 +1,270 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.blend; - -import java.awt.AlphaComposite; -import java.awt.Composite; -import java.awt.CompositeContext; -import java.awt.RenderingHints; -import java.awt.color.ColorSpace; -import java.awt.image.ColorModel; -import java.awt.image.Raster; -import java.awt.image.WritableRaster; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -/** - * AWT composite for blend modes. - * - * @author Kühn & Weyh Software, GmbH - */ -public final class BlendComposite implements Composite -{ - /** - * Log instance. - */ - private static final Log LOG = LogFactory.getLog(BlendComposite.class); - - /** - * Creates a blend composite - * - * @param blendMode Desired blend mode - * @param constantAlpha Constant alpha, must be in the inclusive range - * [0.0...1.0] or it will be clipped. - */ - public static Composite getInstance(BlendMode blendMode, float constantAlpha) - { - if (blendMode == BlendMode.NORMAL) - { - if (constantAlpha < 0) - { - LOG.warn("using 0 instead of incorrect Alpha " + constantAlpha); - constantAlpha = 0; - } - else if (constantAlpha > 1) - { - LOG.warn("using 1 instead of incorrect Alpha " + constantAlpha); - constantAlpha = 1; - } - return AlphaComposite.getInstance(AlphaComposite.SRC_OVER, constantAlpha); - } - else - { - return new BlendComposite(blendMode, constantAlpha); - } - } - - // TODO - non-separable blending modes - - private final BlendMode blendMode; - private final float constantAlpha; - - private BlendComposite(BlendMode blendMode, float constantAlpha) - { - super(); - this.blendMode = blendMode; - this.constantAlpha = constantAlpha; - } - - @Override - public CompositeContext createContext(ColorModel srcColorModel, ColorModel dstColorModel, - RenderingHints hints) - { - return new BlendCompositeContext(srcColorModel, dstColorModel, hints); - } - - class BlendCompositeContext implements CompositeContext - { - private final ColorModel srcColorModel; - private final ColorModel dstColorModel; - private final RenderingHints hints; - - BlendCompositeContext(ColorModel srcColorModel, ColorModel dstColorModel, - RenderingHints hints) - { - this.srcColorModel = srcColorModel; - this.dstColorModel = dstColorModel; - this.hints = hints; - } - - @Override - public void dispose() - { - // nothing needed - } - - @Override - public void compose(Raster src, Raster dstIn, WritableRaster dstOut) - { - int x0 = src.getMinX(); - int y0 = src.getMinY(); - int width = Math.min(Math.min(src.getWidth(), dstIn.getWidth()), dstOut.getWidth()); - int height = Math.min(Math.min(src.getHeight(), dstIn.getHeight()), dstOut.getHeight()); - int x1 = x0 + width; - int y1 = y0 + height; - int dstInXShift = dstIn.getMinX() - x0; - int dstInYShift = dstIn.getMinY() - y0; - int dstOutXShift = dstOut.getMinX() - x0; - int dstOutYShift = dstOut.getMinY() - y0; - - ColorSpace srcColorSpace = srcColorModel.getColorSpace(); - int numSrcColorComponents = srcColorModel.getNumColorComponents(); - int numSrcComponents = src.getNumBands(); - boolean srcHasAlpha = (numSrcComponents > numSrcColorComponents); - ColorSpace dstColorSpace = dstColorModel.getColorSpace(); - int numDstColorComponents = dstColorModel.getNumColorComponents(); - int numDstComponents = dstIn.getNumBands(); - boolean dstHasAlpha = (numDstComponents > numDstColorComponents); - - int colorSpaceType = dstColorSpace.getType(); - boolean subtractive = (colorSpaceType != ColorSpace.TYPE_RGB) - && (colorSpaceType != ColorSpace.TYPE_GRAY); - - boolean blendModeIsSeparable = blendMode instanceof SeparableBlendMode; - SeparableBlendMode separableBlendMode = blendModeIsSeparable ? - (SeparableBlendMode) blendMode : null; - - boolean needsColorConversion = !srcColorSpace.equals(dstColorSpace); - - Object srcPixel = null; - Object dstPixel = null; - float[] srcComponents = new float[numSrcComponents]; - float[] dstComponents = new float[numDstComponents]; - - float[] srcColor = new float[numSrcColorComponents]; - float[] srcConverted; - - for (int y = y0; y < y1; y++) - { - for (int x = x0; x < x1; x++) - { - srcPixel = src.getDataElements(x, y, srcPixel); - dstPixel = dstIn.getDataElements(dstInXShift + x, dstInYShift + y, dstPixel); - - srcComponents = srcColorModel.getNormalizedComponents(srcPixel, srcComponents, - 0); - dstComponents = dstColorModel.getNormalizedComponents(dstPixel, dstComponents, - 0); - - float srcAlpha = srcHasAlpha ? srcComponents[numSrcColorComponents] : 1.0f; - float dstAlpha = dstHasAlpha ? dstComponents[numDstColorComponents] : 1.0f; - - srcAlpha = srcAlpha * constantAlpha; - - float resultAlpha = dstAlpha + srcAlpha - srcAlpha * dstAlpha; - float srcAlphaRatio = (resultAlpha > 0) ? srcAlpha / resultAlpha : 0; - - // convert color - System.arraycopy(srcComponents, 0, srcColor, 0, numSrcColorComponents); - if (needsColorConversion) - { - // TODO - very very slow - Hash results??? - float[] cieXYZ = srcColorSpace.toCIEXYZ(srcColor); - srcConverted = dstColorSpace.fromCIEXYZ(cieXYZ); - } - else - { - srcConverted = srcColor; - } - - if (separableBlendMode != null) - { - for (int k = 0; k < numDstColorComponents; k++) - { - float srcValue = srcConverted[k]; - float dstValue = dstComponents[k]; - - if (subtractive) - { - srcValue = 1 - srcValue; - dstValue = 1 - dstValue; - } - - float value = separableBlendMode.blendChannel(srcValue, dstValue); - value = srcValue + dstAlpha * (value - srcValue); - value = dstValue + srcAlphaRatio * (value - dstValue); - - if (subtractive) - { - value = 1 - value; - } - - dstComponents[k] = value; - } - } - else - { - // TODO - nonseparable modes - } - - if (dstHasAlpha) - { - dstComponents[numDstColorComponents] = resultAlpha; - } - - dstPixel = dstColorModel.getDataElements(dstComponents, 0, dstPixel); - dstOut.setDataElements(dstOutXShift + x, dstOutYShift + y, dstPixel); - } - } - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.blend; + +import java.awt.AlphaComposite; +import java.awt.Composite; +import java.awt.CompositeContext; +import java.awt.RenderingHints; +import java.awt.color.ColorSpace; +import java.awt.image.ColorModel; +import java.awt.image.Raster; +import java.awt.image.WritableRaster; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * AWT composite for blend modes. + * + * @author Kühn & Weyh Software GmbH + */ +public final class BlendComposite implements Composite +{ + /** + * Log instance. + */ + private static final Log LOG = LogFactory.getLog(BlendComposite.class); + + /** + * Creates a blend composite + * + * @param blendMode Desired blend mode + * @param constantAlpha Constant alpha, must be in the inclusive range + * [0.0...1.0] or it will be clipped. + * @return a blend composite. + */ + public static Composite getInstance(BlendMode blendMode, float constantAlpha) + { + if (constantAlpha < 0) + { + LOG.warn("using 0 instead of incorrect Alpha " + constantAlpha); + constantAlpha = 0; + } + else if (constantAlpha > 1) + { + LOG.warn("using 1 instead of incorrect Alpha " + constantAlpha); + constantAlpha = 1; + } + if (blendMode == BlendMode.NORMAL) + { + return AlphaComposite.getInstance(AlphaComposite.SRC_OVER, constantAlpha); + } + else + { + return new BlendComposite(blendMode, constantAlpha); + } + } + + private final BlendMode blendMode; + private final float constantAlpha; + + private BlendComposite(BlendMode blendMode, float constantAlpha) + { + this.blendMode = blendMode; + this.constantAlpha = constantAlpha; + } + + @Override + public CompositeContext createContext(ColorModel srcColorModel, ColorModel dstColorModel, + RenderingHints hints) + { + return new BlendCompositeContext(srcColorModel, dstColorModel); + } + + class BlendCompositeContext implements CompositeContext + { + private final ColorModel srcColorModel; + private final ColorModel dstColorModel; + + BlendCompositeContext(ColorModel srcColorModel, ColorModel dstColorModel) + { + this.srcColorModel = srcColorModel; + this.dstColorModel = dstColorModel; + } + + @Override + public void dispose() + { + // nothing needed + } + + @Override + public void compose(Raster src, Raster dstIn, WritableRaster dstOut) + { + int x0 = src.getMinX(); + int y0 = src.getMinY(); + int width = Math.min(Math.min(src.getWidth(), dstIn.getWidth()), dstOut.getWidth()); + int height = Math.min(Math.min(src.getHeight(), dstIn.getHeight()), dstOut.getHeight()); + int x1 = x0 + width; + int y1 = y0 + height; + int dstInXShift = dstIn.getMinX() - x0; + int dstInYShift = dstIn.getMinY() - y0; + int dstOutXShift = dstOut.getMinX() - x0; + int dstOutYShift = dstOut.getMinY() - y0; + + ColorSpace srcColorSpace = srcColorModel.getColorSpace(); + int numSrcColorComponents = srcColorModel.getNumColorComponents(); + int numSrcComponents = src.getNumBands(); + boolean srcHasAlpha = (numSrcComponents > numSrcColorComponents); + ColorSpace dstColorSpace = dstColorModel.getColorSpace(); + int numDstColorComponents = dstColorModel.getNumColorComponents(); + int numDstComponents = dstIn.getNumBands(); + boolean dstHasAlpha = (numDstComponents > numDstColorComponents); + + int srcColorSpaceType = srcColorSpace.getType(); + int dstColorSpaceType = dstColorSpace.getType(); + boolean subtractive = (dstColorSpaceType != ColorSpace.TYPE_RGB) + && (dstColorSpaceType != ColorSpace.TYPE_GRAY); + + boolean blendModeIsSeparable = blendMode instanceof SeparableBlendMode; + SeparableBlendMode separableBlendMode = blendModeIsSeparable ? + (SeparableBlendMode) blendMode : null; + NonSeparableBlendMode nonSeparableBlendMode = !blendModeIsSeparable ? + (NonSeparableBlendMode) blendMode : null; + + boolean needsColorConversion = !srcColorSpace.equals(dstColorSpace); + + Object srcPixel = null; + Object dstPixel = null; + float[] srcComponents = new float[numSrcComponents]; + // PDFBOX-3501 let getNormalizedComponents allocate to avoid + // ArrayIndexOutOfBoundsException for bitonal target + float[] dstComponents = null; + + float[] srcColor = new float[numSrcColorComponents]; + float[] srcConverted; + float[] dstConverted; + float[] rgbResult = blendModeIsSeparable ? null : new float[dstHasAlpha ? 4 : 3]; + + for (int y = y0; y < y1; y++) + { + for (int x = x0; x < x1; x++) + { + srcPixel = src.getDataElements(x, y, srcPixel); + dstPixel = dstIn.getDataElements(dstInXShift + x, dstInYShift + y, dstPixel); + + srcComponents = srcColorModel.getNormalizedComponents(srcPixel, srcComponents, + 0); + dstComponents = dstColorModel.getNormalizedComponents(dstPixel, dstComponents, + 0); + + float srcAlpha = srcHasAlpha ? srcComponents[numSrcColorComponents] : 1.0f; + float dstAlpha = dstHasAlpha ? dstComponents[numDstColorComponents] : 1.0f; + + srcAlpha = srcAlpha * constantAlpha; + + float resultAlpha = dstAlpha + srcAlpha - srcAlpha * dstAlpha; + float srcAlphaRatio = (resultAlpha > 0) ? srcAlpha / resultAlpha : 0; + + if (separableBlendMode != null) + { + // convert color + System.arraycopy(srcComponents, 0, srcColor, 0, numSrcColorComponents); + if (needsColorConversion) + { + // TODO - very very slow - Hash results??? + float[] cieXYZ = srcColorSpace.toCIEXYZ(srcColor); + srcConverted = dstColorSpace.fromCIEXYZ(cieXYZ); + } + else + { + srcConverted = srcColor; + } + + for (int k = 0; k < numDstColorComponents; k++) + { + float srcValue = srcConverted[k]; + float dstValue = dstComponents[k]; + + if (subtractive) + { + srcValue = 1 - srcValue; + dstValue = 1 - dstValue; + } + + float value = separableBlendMode.blendChannel(srcValue, dstValue); + value = srcValue + dstAlpha * (value - srcValue); + value = dstValue + srcAlphaRatio * (value - dstValue); + + if (subtractive) + { + value = 1 - value; + } + + dstComponents[k] = value; + } + } + else + { + // Nonseparable blend modes are computed in RGB color space. + // TODO - CMYK color spaces need special treatment. + + if (srcColorSpaceType == ColorSpace.TYPE_RGB) + { + srcConverted = srcComponents; + } + else + { + srcConverted = srcColorSpace.toRGB(srcComponents); + } + + if (dstColorSpaceType == ColorSpace.TYPE_RGB) + { + dstConverted = dstComponents; + } + else + { + dstConverted = dstColorSpace.toRGB(dstComponents); + } + + nonSeparableBlendMode.blend(srcConverted, dstConverted, rgbResult); + + for (int k = 0; k < 3; k++) + { + float srcValue = srcConverted[k]; + float dstValue = dstConverted[k]; + float value = rgbResult[k]; + value = Math.max(Math.min(value, 1.0f), 0.0f); + value = srcValue + dstAlpha * (value - srcValue); + value = dstValue + srcAlphaRatio * (value - dstValue); + rgbResult[k] = value; + } + + if (dstColorSpaceType == ColorSpace.TYPE_RGB) + { + System.arraycopy(rgbResult, 0, dstComponents, 0, dstComponents.length); + } + else + { + float[] temp = dstColorSpace.fromRGB(rgbResult); + System.arraycopy(temp, 0, dstComponents, 0, + Math.min(dstComponents.length, temp.length)); + } + } + + if (dstHasAlpha) + { + dstComponents[numDstColorComponents] = resultAlpha; + } + + dstPixel = dstColorModel.getDataElements(dstComponents, 0, dstPixel); + dstOut.setDataElements(dstOutXShift + x, dstOutYShift + y, dstPixel); + } + } + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendMode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendMode.java index c36d2b18e85..c23b0cefc2d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendMode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendMode.java @@ -1,213 +1,434 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.blend; - -import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSName; - -import java.util.HashMap; -import java.util.Map; - -/** - * Blend mode. - * - * @author Kühn & Weyh Software, GmbH - */ -public abstract class BlendMode -{ - /** - * Determines the blend mode from the BM entry in the COS ExtGState. - * - * @param cosBlendMode name or array - * @return blending mode - */ - public static BlendMode getInstance(COSBase cosBlendMode) - { - BlendMode result = null; - if (cosBlendMode instanceof COSName) - { - result = BLEND_MODES.get((COSName)cosBlendMode); - } - else if (cosBlendMode instanceof COSArray) - { - COSArray cosBlendModeArray = (COSArray) cosBlendMode; - for (int i = 0; i < cosBlendModeArray.size(); i++) - { - result = BLEND_MODES.get(cosBlendModeArray.get(i)); - if (result != null) - { - break; - } - } - } - - if (result != null) - { - return result; - } - return BlendMode.COMPATIBLE; - } - - public static final SeparableBlendMode NORMAL = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return srcValue; - } - }; - - public static final SeparableBlendMode COMPATIBLE = NORMAL; - - public static final SeparableBlendMode MULTIPLY = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return srcValue * dstValue; - } - }; - - public static final SeparableBlendMode SCREEN = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return srcValue + dstValue - srcValue * dstValue; - } - }; - - public static final SeparableBlendMode OVERLAY = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return (dstValue <= 0.5) ? 2 * dstValue * srcValue : 2 * (srcValue + dstValue - srcValue - * dstValue) - 1; - } - }; - - public static final SeparableBlendMode DARKEN = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return Math.min(srcValue, dstValue); - } - }; - - public static final SeparableBlendMode LIGHTEN = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return Math.max(srcValue, dstValue); - } - }; - - public static final SeparableBlendMode COLOR_DODGE = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return (srcValue < 1) ? Math.min(1, dstValue / (1 - srcValue)) : 1; - } - }; - - public static final SeparableBlendMode COLOR_BURN = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return (srcValue > 0) ? 1 - Math.min(1, (1 - dstValue) / srcValue) : 0; - } - }; - - public static final SeparableBlendMode HARD_LIGHT = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return (srcValue <= 0.5) ? 2 * dstValue * srcValue : - 2 * (srcValue + dstValue - srcValue * dstValue) - 1; - } - }; - - public static final SeparableBlendMode SOFT_LIGHT = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - if (srcValue <= 0.5) - { - return dstValue - (1 - 2 * srcValue) * dstValue * (1 - dstValue); - } - else - { - float d = (dstValue <= 0.25) ? ((16 * dstValue - 12) * dstValue + 4) * dstValue - : (float) Math .sqrt(dstValue); - return dstValue + (2 * srcValue - 1) * (d - dstValue); - } - } - }; - - public static final SeparableBlendMode DIFFERENCE = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return Math.abs(dstValue - srcValue); - } - }; - - public static final SeparableBlendMode EXCLUSION = new SeparableBlendMode() - { - @Override - public float blendChannel(float srcValue, float dstValue) - { - return dstValue + srcValue - 2 * dstValue * srcValue; - } - }; - - // this map *must* come after the declarations above, otherwise its values will be null - private static final Map BLEND_MODES = createBlendModeMap(); - - private static Map createBlendModeMap() - { - Map map = new HashMap(13); - map.put(COSName.NORMAL, BlendMode.NORMAL); - map.put(COSName.COMPATIBLE, BlendMode.COMPATIBLE); - map.put(COSName.MULTIPLY, BlendMode.MULTIPLY); - map.put(COSName.SCREEN, BlendMode.SCREEN); - map.put(COSName.OVERLAY, BlendMode.OVERLAY); - map.put(COSName.DARKEN, BlendMode.DARKEN); - map.put(COSName.LIGHTEN, BlendMode.LIGHTEN); - map.put(COSName.COLOR_DODGE, BlendMode.COLOR_DODGE); - map.put(COSName.COLOR_BURN, BlendMode.COLOR_BURN); - map.put(COSName.HARD_LIGHT, BlendMode.HARD_LIGHT); - map.put(COSName.SOFT_LIGHT, BlendMode.SOFT_LIGHT); - map.put(COSName.DIFFERENCE, BlendMode.DIFFERENCE); - map.put(COSName.EXCLUSION, BlendMode.EXCLUSION); - // TODO - non-separable blending modes - return map; - } - - BlendMode() - { - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.blend; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; + +import java.util.HashMap; +import java.util.Map; + +/** + * Blend mode. + * + * @author Kühn & Weyh Software GmbH + */ +public abstract class BlendMode +{ + public static final SeparableBlendMode NORMAL = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return srcValue; + } + }; + + public static final SeparableBlendMode COMPATIBLE = NORMAL; + + public static final SeparableBlendMode MULTIPLY = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return srcValue * dstValue; + } + }; + + public static final SeparableBlendMode SCREEN = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return srcValue + dstValue - srcValue * dstValue; + } + }; + + public static final SeparableBlendMode OVERLAY = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return (dstValue <= 0.5) ? 2 * dstValue * srcValue : 2 * (srcValue + dstValue - srcValue + * dstValue) - 1; + } + }; + + public static final SeparableBlendMode DARKEN = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return Math.min(srcValue, dstValue); + } + }; + + public static final SeparableBlendMode LIGHTEN = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return Math.max(srcValue, dstValue); + } + }; + + public static final SeparableBlendMode COLOR_DODGE = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + // See PDF 2.0 specification + if (dstValue == 0) + { + return 0; + } + if (dstValue >= 1 - srcValue) + { + return 1; + } + return dstValue / (1 - srcValue); + } + }; + + public static final SeparableBlendMode COLOR_BURN = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + // See PDF 2.0 specification + if (dstValue == 1) + { + return 1; + } + if (1 - dstValue >= srcValue) + { + return 0; + } + return 1 - (1 - dstValue) / srcValue; + } + }; + + public static final SeparableBlendMode HARD_LIGHT = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return (srcValue <= 0.5) ? 2 * dstValue * srcValue : + 2 * (srcValue + dstValue - srcValue * dstValue) - 1; + } + }; + + public static final SeparableBlendMode SOFT_LIGHT = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + if (srcValue <= 0.5) + { + return dstValue - (1 - 2 * srcValue) * dstValue * (1 - dstValue); + } + else + { + float d = (dstValue <= 0.25) ? ((16 * dstValue - 12) * dstValue + 4) * dstValue + : (float) Math .sqrt(dstValue); + return dstValue + (2 * srcValue - 1) * (d - dstValue); + } + } + }; + + public static final SeparableBlendMode DIFFERENCE = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return Math.abs(dstValue - srcValue); + } + }; + + public static final SeparableBlendMode EXCLUSION = new SeparableBlendMode() + { + @Override + public float blendChannel(float srcValue, float dstValue) + { + return dstValue + srcValue - 2 * dstValue * srcValue; + } + }; + + public static final NonSeparableBlendMode HUE = new NonSeparableBlendMode() + { + @Override + public void blend(float[] srcValues, float[] dstValues, float[] result) + { + float[] temp = new float[3]; + getSaturationRGB(dstValues, srcValues, temp); + getLuminosityRGB(dstValues, temp, result); + } + }; + + public static final NonSeparableBlendMode SATURATION = new NonSeparableBlendMode() + { + @Override + public void blend(float[] srcValues, float[] dstValues, float[] result) + { + getSaturationRGB(srcValues, dstValues, result); + } + }; + + public static final NonSeparableBlendMode COLOR = new NonSeparableBlendMode() + { + @Override + public void blend(float[] srcValues, float[] dstValues, float[] result) + { + getLuminosityRGB(dstValues, srcValues, result); + } + }; + + public static final NonSeparableBlendMode LUMINOSITY = new NonSeparableBlendMode() + { + @Override + public void blend(float[] srcValues, float[] dstValues, float[] result) + { + getLuminosityRGB(srcValues, dstValues, result); + } + }; + + // these maps *must* come after the declarations above, otherwise its values will be null + private static final Map BLEND_MODES = createBlendModeMap(); + private static final Map BLEND_MODE_NAMES = createBlendModeNamesMap(); + + BlendMode() + { + } + + /** + * Determines the blend mode from the BM entry in the COS ExtGState. + * + * @param cosBlendMode name or array + * @return blending mode + */ + public static BlendMode getInstance(COSBase cosBlendMode) + { + BlendMode result = null; + if (cosBlendMode instanceof COSName) + { + result = BLEND_MODES.get(cosBlendMode); + } + else if (cosBlendMode instanceof COSArray) + { + COSArray cosBlendModeArray = (COSArray) cosBlendMode; + for (int i = 0; i < cosBlendModeArray.size(); i++) + { + result = BLEND_MODES.get(cosBlendModeArray.getObject(i)); + if (result != null) + { + break; + } + } + } + + if (result != null) + { + return result; + } + return BlendMode.NORMAL; + } + + /** + * Determines the blend mode name from the BM object. + * + * @param bm Blend mode. + * @return name of blend mode. + */ + public static COSName getCOSName(BlendMode bm) + { + return BLEND_MODE_NAMES.get(bm); + } + + private static int get255Value(float val) + { + return (int) Math.floor(val >= 1.0 ? 255 : val * 255.0); + } + + private static void getSaturationRGB(float[] srcValues, float[] dstValues, float[] result) + { + int minb; + int maxb; + int mins; + int maxs; + int y; + int scale; + int r; + int g; + int b; + + int rd = get255Value(dstValues[0]); + int gd = get255Value(dstValues[1]); + int bd = get255Value(dstValues[2]); + int rs = get255Value(srcValues[0]); + int gs = get255Value(srcValues[1]); + int bs = get255Value(srcValues[2]); + + minb = Math.min(rd, Math.min(gd, bd)); + maxb = Math.max(rd, Math.max(gd, bd)); + if (minb == maxb) + { + /* backdrop has zero saturation, avoid divide by 0 */ + result[0] = gd / 255.0f; + result[1] = gd / 255.0f; + result[2] = gd / 255.0f; + return; + } + + mins = Math.min(rs, Math.min(gs, bs)); + maxs = Math.max(rs, Math.max(gs, bs)); + + scale = ((maxs - mins) << 16) / (maxb - minb); + y = (rd * 77 + gd * 151 + bd * 28 + 0x80) >> 8; + r = y + ((((rd - y) * scale) + 0x8000) >> 16); + g = y + ((((gd - y) * scale) + 0x8000) >> 16); + b = y + ((((bd - y) * scale) + 0x8000) >> 16); + + if (((r | g | b) & 0x100) == 0x100) + { + int scalemin; + int scalemax; + int min; + int max; + + min = Math.min(r, Math.min(g, b)); + max = Math.max(r, Math.max(g, b)); + + if (min < 0) + { + scalemin = (y << 16) / (y - min); + } + else + { + scalemin = 0x10000; + } + + if (max > 255) + { + scalemax = ((255 - y) << 16) / (max - y); + } + else + { + scalemax = 0x10000; + } + + scale = Math.min(scalemin, scalemax); + r = y + (((r - y) * scale + 0x8000) >> 16); + g = y + (((g - y) * scale + 0x8000) >> 16); + b = y + (((b - y) * scale + 0x8000) >> 16); + } + result[0] = r / 255.0f; + result[1] = g / 255.0f; + result[2] = b / 255.0f; + } + + private static void getLuminosityRGB(float[] srcValues, float[] dstValues, float[] result) + { + int delta; + int scale; + int r; + int g; + int b; + int y; + int rd = get255Value(dstValues[0]); + int gd = get255Value(dstValues[1]); + int bd = get255Value(dstValues[2]); + int rs = get255Value(srcValues[0]); + int gs = get255Value(srcValues[1]); + int bs = get255Value(srcValues[2]); + delta = ((rs - rd) * 77 + (gs - gd) * 151 + (bs - bd) * 28 + 0x80) >> 8; + r = rd + delta; + g = gd + delta; + b = bd + delta; + + if (((r | g | b) & 0x100) == 0x100) + { + y = (rs * 77 + gs * 151 + bs * 28 + 0x80) >> 8; + if (delta > 0) + { + int max; + max = Math.max(r, Math.max(g, b)); + scale = max == y ? 0 : ((255 - y) << 16) / (max - y); + } + else + { + int min; + min = Math.min(r, Math.min(g, b)); + scale = y == min ? 0 : (y << 16) / (y - min); + } + r = y + (((r - y) * scale + 0x8000) >> 16); + g = y + (((g - y) * scale + 0x8000) >> 16); + b = y + (((b - y) * scale + 0x8000) >> 16); + } + result[0] = r / 255.0f; + result[1] = g / 255.0f; + result[2] = b / 255.0f; + } + + private static Map createBlendModeMap() + { + Map map = new HashMap(13); + map.put(COSName.NORMAL, BlendMode.NORMAL); + // BlendMode.COMPATIBLE should not be used + map.put(COSName.COMPATIBLE, BlendMode.NORMAL); + map.put(COSName.MULTIPLY, BlendMode.MULTIPLY); + map.put(COSName.SCREEN, BlendMode.SCREEN); + map.put(COSName.OVERLAY, BlendMode.OVERLAY); + map.put(COSName.DARKEN, BlendMode.DARKEN); + map.put(COSName.LIGHTEN, BlendMode.LIGHTEN); + map.put(COSName.COLOR_DODGE, BlendMode.COLOR_DODGE); + map.put(COSName.COLOR_BURN, BlendMode.COLOR_BURN); + map.put(COSName.HARD_LIGHT, BlendMode.HARD_LIGHT); + map.put(COSName.SOFT_LIGHT, BlendMode.SOFT_LIGHT); + map.put(COSName.DIFFERENCE, BlendMode.DIFFERENCE); + map.put(COSName.EXCLUSION, BlendMode.EXCLUSION); + map.put(COSName.HUE, BlendMode.HUE); + map.put(COSName.SATURATION, BlendMode.SATURATION); + map.put(COSName.LUMINOSITY, BlendMode.LUMINOSITY); + map.put(COSName.COLOR, BlendMode.COLOR); + return map; + } + + private static Map createBlendModeNamesMap() + { + Map map = new HashMap(13); + map.put(BlendMode.NORMAL, COSName.NORMAL); + // BlendMode.COMPATIBLE should not be used + map.put(BlendMode.COMPATIBLE, COSName.NORMAL); + map.put(BlendMode.MULTIPLY, COSName.MULTIPLY); + map.put(BlendMode.SCREEN, COSName.SCREEN); + map.put(BlendMode.OVERLAY, COSName.OVERLAY); + map.put(BlendMode.DARKEN, COSName.DARKEN); + map.put(BlendMode.LIGHTEN, COSName.LIGHTEN); + map.put(BlendMode.COLOR_DODGE, COSName.COLOR_DODGE); + map.put(BlendMode.COLOR_BURN, COSName.COLOR_BURN); + map.put(BlendMode.HARD_LIGHT, COSName.HARD_LIGHT); + map.put(BlendMode.SOFT_LIGHT, COSName.SOFT_LIGHT); + map.put(BlendMode.DIFFERENCE, COSName.DIFFERENCE); + map.put(BlendMode.EXCLUSION, COSName.EXCLUSION); + map.put(BlendMode.HUE, COSName.HUE); + map.put(BlendMode.SATURATION, COSName.SATURATION); + map.put(BlendMode.LUMINOSITY, COSName.LUMINOSITY); + map.put(BlendMode.COLOR, COSName.COLOR); + return map; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/NonSeparableBlendMode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/NonSeparableBlendMode.java index 7ab7262dfdd..1e40abcaaa3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/NonSeparableBlendMode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/NonSeparableBlendMode.java @@ -19,7 +19,7 @@ /** * Non-separable blend mode (supports blend function). * - * @author Kühn & Weyh Software, GmbH + * @author Kühn & Weyh Software GmbH */ public abstract class NonSeparableBlendMode extends BlendMode { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/SeparableBlendMode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/SeparableBlendMode.java index 0fdaf02a95e..c525fb79c9b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/SeparableBlendMode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/SeparableBlendMode.java @@ -19,7 +19,7 @@ /** * Separable blend mode (support blendChannel) * - * @author Kühn & Weyh Software, GmbH + * @author Kühn & Weyh Software GmbH */ public abstract class SeparableBlendMode extends BlendMode { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/SoftMaskPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/SoftMaskPaint.java deleted file mode 100644 index af874148b3e..00000000000 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/blend/SoftMaskPaint.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.blend; - -import java.awt.Graphics; -import java.awt.Paint; -import java.awt.PaintContext; -import java.awt.Point; -import java.awt.Rectangle; -import java.awt.RenderingHints; -import java.awt.Transparency; -import java.awt.geom.AffineTransform; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; -import java.awt.image.ColorConvertOp; -import java.awt.image.ColorModel; -import java.awt.image.ComponentColorModel; -import java.awt.image.DataBuffer; -import java.awt.image.Raster; -import java.awt.image.WritableRaster; -import java.io.IOException; - -/** - * AWT Paint that adds a soft mask to the alpha channel of the existing parent paint. If the parent - * paint does not have an alpha channel, a new raster is created. - * - * @author Kühn & Weyh Software, GmbH - */ -public final class SoftMaskPaint implements Paint -{ - private final Paint parentPaint; - private final Raster softMaskRaster; - - /** - * Applies the soft mask to the parent. - */ - public SoftMaskPaint(Paint parentPaint, Raster softMaskRaster) - { - this.parentPaint = parentPaint; - this.softMaskRaster = softMaskRaster; - } - - @Override - public int getTransparency() - { - return Transparency.TRANSLUCENT; - } - - @Override - public PaintContext createContext(ColorModel cm, Rectangle deviceBounds, - Rectangle2D userBounds, AffineTransform at, RenderingHints hints) - { - try - { - PaintContext parentContext = parentPaint.createContext(null, deviceBounds, userBounds, - at, hints); - return new Context(parentContext); - } - catch (IOException e) - { - return null; // context cannot be created - } - } - - private class Context implements PaintContext - { - private final PaintContext parentContext; - private final ColorModel colorModel; - private final int numColorComponents; - private final ColorModel parentColorModel; - - Context(PaintContext parentContext) throws IOException - { - this.parentContext = parentContext; - parentColorModel = parentContext.getColorModel(); - if (parentContext.getColorModel().hasAlpha()) - { - colorModel = parentColorModel; - } - else - { - colorModel = new ComponentColorModel(parentContext.getColorModel() - .getColorSpace(), true, false, Transparency.OPAQUE, DataBuffer.TYPE_BYTE); - } - numColorComponents = colorModel.getNumColorComponents(); - } - - @Override - public ColorModel getColorModel() - { - return colorModel; - } - - @Override - public Raster getRaster(int x, int y, int w, int h) - { - Raster parentRaster = parentContext.getRaster(x, y, w, h); - - // getRaster can return the raster with origin (0,0) even if we applied for (x,y) - int parentMinX = parentRaster.getMinX(); - int parentMinY = parentRaster.getMinY(); - - WritableRaster result; - if (parentRaster instanceof WritableRaster) - { - if (parentColorModel.equals(colorModel)) - { - result = parentRaster.createCompatibleWritableRaster(); - result.setDataElements(-parentMinX, -parentMinY, parentRaster); - } - else - { - BufferedImage parentImage = new BufferedImage(parentColorModel, - (WritableRaster) parentRaster, - parentColorModel.isAlphaPremultiplied(), null); - result = Raster.createWritableRaster( - colorModel.createCompatibleSampleModel(w, h), new Point(0, 0)); - BufferedImage resultImage = new BufferedImage(colorModel, result, false, null); - Graphics graphics = resultImage.getGraphics(); - graphics.drawImage(parentImage, 0, 0, null); - graphics.dispose(); - } - } - else - { - result = Raster.createInterleavedRaster(DataBuffer.TYPE_BYTE, w, h, getColorModel() - .getNumComponents(), new Point(0, 0)); - ColorConvertOp colorConvertOp = new ColorConvertOp( - parentColorModel.getColorSpace(), colorModel.getColorSpace(), null); - colorConvertOp.filter(parentRaster, result); - } - - int softMaskMinX = softMaskRaster.getMinX(); - int softMaskMinY = softMaskRaster.getMinY(); - int softMaskMaxX = softMaskMinX + softMaskRaster.getWidth(); - int softMaskMaxY = softMaskMinY + softMaskRaster.getHeight(); - - for (int j = 0; j < h; j++) - { - for (int i = 0; i < w; i++) - { - int rx = x + i; - int ry = y + j; - - int alpha; - if ((rx >= softMaskMinX) && (rx < softMaskMaxX) && (ry >= softMaskMinY) - && (ry < softMaskMaxY)) - { - alpha = softMaskRaster.getSample(rx, ry, 0); - } - else - { - alpha = 0; - } - alpha = alpha * result.getSample(i, j, numColorComponents) / 255; - result.setSample(i, j, numColorComponents, alpha); - } - } - - return result; - } - - @Override - public void dispose() - { - // do nothing - } - } -} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCIEBasedColorSpace.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCIEBasedColorSpace.java index 5d0a603c90e..78699969787 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCIEBasedColorSpace.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCIEBasedColorSpace.java @@ -1,80 +1,88 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.color; - -import java.awt.image.BufferedImage; -import java.awt.image.WritableRaster; -import java.io.IOException; - -/** - * CIE-based colour spaces specify colours in a way that is independent of the characteristics - * of any particular output device. They are based on an international standard for colour - * specification created by the Commission Internationale de l'Éclairage (CIE). - * - * @author John Hewson - */ -public abstract class PDCIEBasedColorSpace extends PDColorSpace -{ - // - // WARNING: this method is performance sensitive, modify with care! - // - @Override - public BufferedImage toRGBImage(WritableRaster raster) throws IOException - { - // This method calls toRGB to convert images one pixel at a time. For matrix-based - // CIE color spaces this is fast enough. However, it should not be used with any - // color space which uses an ICC Profile as it will be far too slow. - - int width = raster.getWidth(); - int height = raster.getHeight(); - - BufferedImage rgbImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); - WritableRaster rgbRaster = rgbImage.getRaster(); - - // always three components: ABC - float[] abc = new float[3]; - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - raster.getPixel(x, y, abc); - - // 0..255 -> 0..1 - abc[0] /= 255; - abc[1] /= 255; - abc[2] /= 255; - - float[] rgb = toRGB(abc); - - // 0..1 -> 0..255 - rgb[0] *= 255; - rgb[1] *= 255; - rgb[2] *= 255; - - rgbRaster.setPixel(x, y, rgb); - } - } - - return rgbImage; - } - - @Override - public String toString() - { - return getName(); // TODO return more info - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.color; + +import java.awt.image.BufferedImage; +import java.awt.image.WritableRaster; +import java.io.IOException; + +/** + * CIE-based colour spaces specify colours in a way that is independent of the characteristics + * of any particular output device. They are based on an international standard for colour + * specification created by the Commission Internationale de l'Éclairage (CIE). + * + * @author John Hewson + */ +public abstract class PDCIEBasedColorSpace extends PDColorSpace +{ + // + // WARNING: this method is performance sensitive, modify with care! + // + @Override + public BufferedImage toRGBImage(WritableRaster raster) throws IOException + { + // This method calls toRGB to convert images one pixel at a time. For matrix-based + // CIE color spaces this is fast enough. However, it should not be used with any + // color space which uses an ICC Profile as it will be far too slow. + + int width = raster.getWidth(); + int height = raster.getHeight(); + + BufferedImage rgbImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); + WritableRaster rgbRaster = rgbImage.getRaster(); + + // always three components: ABC + float[] abc = new float[3]; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + raster.getPixel(x, y, abc); + + // 0..255 -> 0..1 + abc[0] /= 255; + abc[1] /= 255; + abc[2] /= 255; + + float[] rgb = toRGB(abc); + + // 0..1 -> 0..255 + rgb[0] *= 255; + rgb[1] *= 255; + rgb[2] *= 255; + + rgbRaster.setPixel(x, y, rgb); + } + } + + return rgbImage; + } + + @Override + public BufferedImage toRawImage(WritableRaster raster) throws IOException + { + // There is no direct equivalent of a CIE colorspace in Java. So we can + // not do anything here. + return null; + } + + @Override + public String toString() + { + return getName(); // TODO return more info + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCIEDictionaryBasedColorSpace.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCIEDictionaryBasedColorSpace.java index 3cd53776df5..d651aa26654 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCIEDictionaryBasedColorSpace.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCIEDictionaryBasedColorSpace.java @@ -17,7 +17,6 @@ import java.awt.color.ColorSpace; import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; @@ -135,35 +134,30 @@ public final PDTristimulus getBlackPoint() } /** - * This will set the whitepoint tristimulus. As this is a required field - * this null should not be passed into this function. + * This will set the whitepoint tristimulus. As this is a required field, null should not be + * passed into this function. * - * @param whitepoint the whitepoint tristimulus + * @param whitepoint the whitepoint tristimulus. + * @throws IllegalArgumentException if null is passed as argument. */ public void setWhitePoint(PDTristimulus whitepoint) { - COSBase wpArray = whitepoint.getCOSObject(); - if (wpArray != null) + if (whitepoint == null) { - dictionary.setItem(COSName.WHITE_POINT, wpArray); + throw new IllegalArgumentException("Whitepoint may not be null"); } + dictionary.setItem(COSName.WHITE_POINT, whitepoint); fillWhitepointCache(whitepoint); } /** - * This will set the BlackPoint tristimulus. As this is a required field - * this null should not be passed into this function. + * This will set the BlackPoint tristimulus. * * @param blackpoint the BlackPoint tristimulus */ public void setBlackPoint(PDTristimulus blackpoint) { - COSBase bpArray = null; - if (blackpoint != null) - { - bpArray = blackpoint.getCOSObject(); - } - dictionary.setItem(COSName.BLACK_POINT, bpArray); + dictionary.setItem(COSName.BLACK_POINT, blackpoint); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalGray.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalGray.java index ae1e52a2ce2..db0b3c7831d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalGray.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalGray.java @@ -16,6 +16,8 @@ */ package org.apache.pdfbox.pdmodel.graphics.color; +import java.util.HashMap; +import java.util.Map; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; @@ -31,6 +33,11 @@ public final class PDCalGray extends PDCIEDictionaryBasedColorSpace { private final PDColor initialColor = new PDColor(new float[] { 0 }, this); + + // PDFBOX-4119: cache the results for much improved performance + // cached values MUST be cloned, because they are modified by the caller. + // this can be observed in rendering of PDFBOX-1724 + private final Map map1 = new HashMap(); /** * Create a new CalGray color space. @@ -77,13 +84,20 @@ public PDColor getInitialColor() @Override public float[] toRGB(float[] value) { - // see implementation of toRGB in PDCabRGB, and PDFBOX-2971 + // see implementation of toRGB in PDCalRGB, and PDFBOX-2971 if (wpX == 1 && wpY == 1 && wpZ == 1) { float a = value[0]; + float[] result = map1.get(a); + if (result != null) + { + return result.clone(); + } float gamma = getGamma(); float powAG = (float) Math.pow(a, gamma); - return convXYZtoRGB(powAG, powAG, powAG); + result = convXYZtoRGB(powAG, powAG, powAG); + map1.put(a, result.clone()); + return result; } else { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalRGB.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalRGB.java index 710360bca32..3521bc37889 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalRGB.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalRGB.java @@ -174,7 +174,18 @@ public final void setMatrix(Matrix matrix) COSArray matrixArray = null; if(matrix != null) { - matrixArray = matrix.toCOSArray(); + // We can't use matrix.toCOSArray(), as it only returns a subset of the matrix + float[][] values = matrix.getValues(); + matrixArray = new COSArray(); + matrixArray.add(new COSFloat(values[0][0])); + matrixArray.add(new COSFloat(values[0][1])); + matrixArray.add(new COSFloat(values[0][2])); + matrixArray.add(new COSFloat(values[1][0])); + matrixArray.add(new COSFloat(values[1][1])); + matrixArray.add(new COSFloat(values[1][2])); + matrixArray.add(new COSFloat(values[2][0])); + matrixArray.add(new COSFloat(values[2][1])); + matrixArray.add(new COSFloat(values[2][2])); } dictionary.setItem(COSName.MATRIX, matrixArray); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDColor.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDColor.java index 4ee0d384f8e..18da9a0b021 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDColor.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDColor.java @@ -109,11 +109,19 @@ public PDColor(float[] components, COSName patternName, PDColorSpace colorSpace) /** * Returns the components of this color value. - * @return the components of this color value + * @return the components of this color value, never null. */ public float[] getComponents() { - return components.clone(); + if (colorSpace instanceof PDPattern || colorSpace == null) + { + // colorspace of the pattern color isn't known, so just clone + // null colorspace can happen with empty annotation color + // see PDFBOX-3351-538928-p4.pdf + return components.clone(); + } + // PDFBOX-4279: copyOf instead of clone in case array is too small + return Arrays.copyOf(components, colorSpace.getNumberOfComponents()); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDColorSpace.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDColorSpace.java index 9e97807acc7..d1b1e4b4b47 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDColorSpace.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDColorSpace.java @@ -16,22 +16,25 @@ */ package org.apache.pdfbox.pdmodel.graphics.color; -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSObject; -import org.apache.pdfbox.pdmodel.MissingResourceException; -import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.common.COSObjectable; - +import java.awt.Graphics; import java.awt.Transparency; import java.awt.image.BufferedImage; import java.awt.image.ColorConvertOp; import java.awt.image.ComponentColorModel; import java.awt.image.WritableRaster; -import java.io.IOException; import java.awt.color.ColorSpace; import java.awt.image.ColorModel; +import java.io.IOException; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.pdmodel.MissingResourceException; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.ResourceCache; /** * A color space specifies how the colours of graphics objects will be painted on the page. @@ -42,7 +45,7 @@ public abstract class PDColorSpace implements COSObjectable { /** - * Creates a color space space given a name or array. + * Creates a color space given a name or array. * @param colorSpace the color space COS object * @return a new color space * @throws IOException if the color space is unknown or cannot be created @@ -88,7 +91,7 @@ public static PDColorSpace create(COSBase colorSpace, { if (colorSpace instanceof COSObject) { - return create(((COSObject) colorSpace).getObject(), resources); + return createFromCOSObject((COSObject) colorSpace, resources); } else if (colorSpace instanceof COSName) { @@ -153,7 +156,16 @@ else if (resources != null) else if (colorSpace instanceof COSArray) { COSArray array = (COSArray)colorSpace; - COSName name = (COSName)array.getObject(0); + if (array.size() == 0) + { + throw new IOException("Colorspace array is empty"); + } + COSBase base = array.getObject(0); + if (!(base instanceof COSName)) + { + throw new IOException("First element in colorspace array must be a name"); + } + COSName name = (COSName) base; // TODO cache these returned color spaces? @@ -171,7 +183,7 @@ else if (name == COSName.DEVICEN) } else if (name == COSName.INDEXED) { - return new PDIndexed(array); + return new PDIndexed(array, resources); } else if (name == COSName.SEPARATION) { @@ -179,7 +191,7 @@ else if (name == COSName.SEPARATION) } else if (name == COSName.ICCBASED) { - return new PDICCBased(array); + return PDICCBased.create(array, resources); } else if (name == COSName.LAB) { @@ -208,12 +220,40 @@ else if (name == COSName.DEVICECMYK || throw new IOException("Invalid color space kind: " + name); } } + else if (colorSpace instanceof COSDictionary && + ((COSDictionary) colorSpace).containsKey(COSName.COLORSPACE)) + { + // PDFBOX-4833: dictionary with /ColorSpace entry + return create(((COSDictionary) colorSpace).getDictionaryObject(COSName.COLORSPACE), resources, wasDefault); + } else { throw new IOException("Expected a name or array but got: " + colorSpace); } } + private static PDColorSpace createFromCOSObject(COSObject colorSpace, PDResources resources) + throws IOException + { + PDColorSpace cs; + if (resources != null && resources.getResourceCache() != null) + { + ResourceCache resourceCache = resources.getResourceCache(); + cs = resourceCache.getColorSpace(colorSpace); + if (cs != null) + { + return cs; + } + } + cs = create(colorSpace.getObject(), resources); + if (resources != null && resources.getResourceCache() != null && cs != null) + { + ResourceCache resourceCache = resources.getResourceCache(); + resourceCache.put(colorSpace, cs); + } + return cs; + } + // array for the given parameters protected COSArray array; @@ -258,6 +298,34 @@ else if (name == COSName.DEVICECMYK || */ public abstract BufferedImage toRGBImage(WritableRaster raster) throws IOException; + /** + * Returns the image in this colorspace or null. No conversion is performed. + * + * For special colorspaces like PDSeparation the image is returned in the gray colorspace. + * For undefined colorspaces like DeviceCMYK/DeviceRGB and DeviceGray null is returned. + * + * You can always fallback to {@link #toRGBImage(WritableRaster)} if this returns null. + * + * @param raster the source raster + * @return an buffered image in this colorspace. Or null if it is not possible to extract + * that image with the original colorspace without conversion. + */ + public abstract BufferedImage toRawImage(WritableRaster raster) throws IOException; + + /** + * Returns the given raster as BufferedImage with the given awtColorSpace using a + * ComponentColorModel. + * @param raster the source raster + * @param awtColorSpace the AWT colorspace + * @return a BufferedImage in this colorspace + */ + protected final BufferedImage toRawImage(WritableRaster raster, ColorSpace awtColorSpace) + { + ColorModel colorModel = new ComponentColorModel(awtColorSpace, + false, false, Transparency.OPAQUE, raster.getDataBuffer().getDataType()); + return new BufferedImage(colorModel, raster, false, null); + } + /** * Returns the (A)RGB equivalent of the given raster, using the given AWT color space * to perform the conversion. @@ -278,6 +346,21 @@ protected BufferedImage toRGBImageAWT(WritableRaster raster, ColorSpace colorSpa BufferedImage src = new BufferedImage(colorModel, raster, false, null); BufferedImage dest = new BufferedImage(raster.getWidth(), raster.getHeight(), BufferedImage.TYPE_INT_RGB); + if (src.getWidth() == 1 || src.getHeight() == 1) + { + // PDFBOX-5051: ColorConvertOp is too slow for tiny images. But we can't use drawImage() + // for all images because it is slower when used for all images. + // Re quality & speed: the quality gold standard is setRGB(getRGB()) but this is also + // the slowest. drawImage() is identical in quality (but faster) except for ICC based + // images with 1 component. ColorConvertOp is fastest except for small images, there's + // somehow a slowness "price" paid per call and the quality is slightly flawed sometimes, + // and rendering hints are ignored. + // All the above tested with jdk8 and LCMS. + Graphics g2d = dest.getGraphics(); + g2d.drawImage(src, 0, 0, null); + g2d.dispose(); + return dest; + } ColorConvertOp op = new ColorConvertOp(null); op.filter(src, dest); return dest; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceCMYK.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceCMYK.java index 6272cc32fef..13f83db867c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceCMYK.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceCMYK.java @@ -16,13 +16,16 @@ */ package org.apache.pdfbox.pdmodel.graphics.color; -import java.net.URL; +import java.util.Arrays; + import org.apache.pdfbox.cos.COSName; +import java.awt.color.ColorSpace; import java.awt.color.ICC_ColorSpace; import java.awt.color.ICC_Profile; import java.awt.image.BufferedImage; import java.awt.image.WritableRaster; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; @@ -44,7 +47,9 @@ public class PDDeviceCMYK extends PDDeviceColorSpace } private final PDColor initialColor = new PDColor(new float[] { 0, 0, 0, 1 }, this); - private volatile ICC_ColorSpace awtColorSpace; + private ICC_ColorSpace awtColorSpace; + private volatile boolean initDone = false; + private boolean usePureJavaCMYKConversion = false; protected PDDeviceCMYK() { @@ -56,14 +61,14 @@ protected PDDeviceCMYK() protected void init() throws IOException { // no need to synchronize this check as it is atomic - if (awtColorSpace != null) + if (initDone) { return; } synchronized (this) { // we might have been waiting for another thread, so check again - if (awtColorSpace != null) + if (initDone) { return; } @@ -79,6 +84,11 @@ protected void init() throws IOException // condition caused by lazy initialization of the color transform, so we perform // an initial color conversion while we're still in a static context, see PDFBOX-2184 awtColorSpace.toRGB(new float[] { 0, 0, 0, 0 }); + usePureJavaCMYKConversion = System + .getProperty("org.apache.pdfbox.rendering.UsePureJavaCMYKConversion") != null; + + // Assignment to volatile must be the LAST statement in this block! + initDone = true; } } @@ -89,17 +99,11 @@ protected ICC_Profile getICCProfile() throws IOException // Instead, the "ISO Coated v2 300% (basICColor)" is used, which // is an open alternative to the "ISO Coated v2 300% (ECI)" profile. - String name = "org/apache/pdfbox/resources/icc/ISOcoated_v2_300_bas.icc"; - - URL url = PDDeviceCMYK.class.getClassLoader().getResource(name); - if (url == null) - { - throw new IOException("Error loading resource: " + name); - } + String name = "/org/apache/pdfbox/resources/icc/ISOcoated_v2_300_bas.icc"; - InputStream input = url.openStream(); - ICC_Profile iccProfile = ICC_Profile.getInstance(input); - input.close(); + InputStream is = new BufferedInputStream(PDDeviceCMYK.class.getResourceAsStream(name)); + ICC_Profile iccProfile = ICC_Profile.getInstance(is); + is.close(); return iccProfile; } @@ -135,10 +139,72 @@ public float[] toRGB(float[] value) throws IOException return awtColorSpace.toRGB(value); } + @Override + public BufferedImage toRawImage(WritableRaster raster) throws IOException + { + // Device CMYK is not specified, as its the colors of whatever device you use. + // The user should fallback to the RGB image + return null; + } + @Override public BufferedImage toRGBImage(WritableRaster raster) throws IOException { init(); return toRGBImageAWT(raster, awtColorSpace); } + + @Override + protected BufferedImage toRGBImageAWT(WritableRaster raster, ColorSpace colorSpace) + { + if (usePureJavaCMYKConversion) + { + BufferedImage dest = new BufferedImage(raster.getWidth(), raster.getHeight(), + BufferedImage.TYPE_INT_RGB); + ColorSpace destCS = dest.getColorModel().getColorSpace(); + WritableRaster destRaster = dest.getRaster(); + float[] srcValues = new float[4]; + float[] lastValues = new float[] { -1.0f, -1.0f, -1.0f, -1.0f }; + float[] destValues = new float[3]; + int startX = raster.getMinX(); + int startY = raster.getMinY(); + int endX = raster.getWidth() + startX; + int endY = raster.getHeight() + startY; + for (int x = startX; x < endX; x++) + { + for (int y = startY; y < endY; y++) + { + raster.getPixel(x, y, srcValues); + // check if the last value can be reused + if (!Arrays.equals(lastValues, srcValues)) + { + lastValues[0] = srcValues[0]; + srcValues[0] = srcValues[0] / 255f; + + lastValues[1] = srcValues[1]; + srcValues[1] = srcValues[1] / 255f; + + lastValues[2] = srcValues[2]; + srcValues[2] = srcValues[2] / 255f; + + lastValues[3] = srcValues[3]; + srcValues[3] = srcValues[3] / 255f; + + // use CIEXYZ as intermediate format to optimize the color conversion + destValues = destCS.fromCIEXYZ(colorSpace.toCIEXYZ(srcValues)); + for (int k = 0; k < destValues.length; k++) + { + destValues[k] = destValues[k] * 255f; + } + } + destRaster.setPixel(x, y, destValues); + } + } + return dest; + } + else + { + return super.toRGBImageAWT(raster, colorSpace); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceGray.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceGray.java index 5c55cf0c088..d310cbfc0cc 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceGray.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceGray.java @@ -69,6 +69,14 @@ public float[] toRGB(float[] value) return new float[] { value[0], value[0], value[0] }; } + @Override + public BufferedImage toRawImage(WritableRaster raster) throws IOException + { + // DeviceGray is whatever the output device chooses. We have no Java colorspace + // for this. + return null; + } + @Override public BufferedImage toRGBImage(WritableRaster raster) throws IOException { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceN.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceN.java index 2870ee8962d..468565f93ec 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceN.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceN.java @@ -24,6 +24,7 @@ import java.awt.image.Raster; import java.awt.image.WritableRaster; import java.io.IOException; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -140,34 +141,32 @@ private void initColorConversionCache() throws IOException // spot colorants spotColorSpaces = new PDSeparation[numColorants]; - if (attributes.getColorants() != null) - { - // spot color spaces - Map spotColorants = attributes.getColorants(); - // map each colorant to the corresponding spot color space - for (int c = 0; c < numColorants; c++) + // spot color spaces + Map spotColorants = attributes.getColorants(); + + // map each colorant to the corresponding spot color space + for (int c = 0; c < numColorants; c++) + { + String name = colorantNames.get(c); + PDSeparation spot = spotColorants.get(name); + if (spot != null) { - String name = colorantNames.get(c); - PDSeparation spot = spotColorants.get(name); - if (spot != null) - { - // spot colorant - spotColorSpaces[c] = spot; + // spot colorant + spotColorSpaces[c] = spot; - // spot colors may replace process colors with same name - // providing that the subtype is not NChannel. - if (!isNChannel()) - { - colorantToComponent[c] = -1; - } - } - else + // spot colors may replace process colors with same name + // providing that the subtype is not NChannel. + if (!isNChannel()) { - // process colorant - spotColorSpaces[c] = null; + colorantToComponent[c] = -1; } } + else + { + // process colorant + spotColorSpaces[c] = null; + } } } @@ -281,6 +280,11 @@ else if (spotColorSpaces[c] == null) // private BufferedImage toRGBWithTintTransform(WritableRaster raster) throws IOException { + // cache color mappings + Map map1 = new HashMap(); + String key = null; + StringBuilder keyBuilder = new StringBuilder(); + int width = raster.getWidth(); int height = raster.getHeight(); @@ -296,27 +300,38 @@ private BufferedImage toRGBWithTintTransform(WritableRaster raster) throws IOExc for (int x = 0; x < width; x++) { raster.getPixel(x, y, src); - - int[] intSrc = new int[numSrcComponents]; - raster.getPixel(x, y, intSrc); - + // use a string representation as key + keyBuilder.append(src[0]); + for (int s = 1; s < numSrcComponents; s++) + { + keyBuilder.append('#').append(src[s]); + } + key = keyBuilder.toString(); + keyBuilder.setLength(0); + int[] pxl = map1.get(key); + if (pxl != null) + { + rgbRaster.setPixel(x, y, pxl); + continue; + } // scale to 0..1 for (int s = 0; s < numSrcComponents; s++) { src[s] = src[s] / 255; } - // convert to alternate color space via tint transform float[] result = tintTransform.eval(src); // convert from alternate color space to RGB float[] rgbFloat = alternateColorSpace.toRGB(result); - - for (int s = 0; s < 3; s++) - { - // scale to 0..255 - rgb[s] = (int) (rgbFloat[s] * 255f); - } + + // scale to 0..255 + rgb[0] = (int) (rgbFloat[0] * 255f); + rgb[1] = (int) (rgbFloat[1] * 255f); + rgb[2] = (int) (rgbFloat[2] * 255f); + + // must clone because rgb is reused + map1.put(key, rgb.clone()); rgbRaster.setPixel(x, y, rgb); } @@ -345,7 +360,8 @@ private float[] toRGBWithAttributes(float[] value) throws IOException for (int c = 0; c < numColorants; c++) { PDColorSpace componentColorSpace; - if (colorantToComponent[c] >= 0) + boolean isProcessColorant = colorantToComponent[c] >= 0; + if (isProcessColorant) { // process color componentColorSpace = processColorSpace; @@ -363,13 +379,12 @@ else if (spotColorSpaces[c] == null) } // get the single component - boolean isProcessColorant = colorantToComponent[c] >= 0; float[] componentSamples = new float[componentColorSpace.getNumberOfComponents()]; - int componentIndex = colorantToComponent[c]; if (isProcessColorant) { // process color + int componentIndex = colorantToComponent[c]; componentSamples[componentIndex] = value[c]; } else @@ -402,6 +417,13 @@ private float[] toRGBWithTintTransform(float[] value) throws IOException return alternateColorSpace.toRGB(altValue); } + @Override + public BufferedImage toRawImage(WritableRaster raster) + { + // We don't know how to convert that. + return null; + } + /** * Returns true if this color space has the NChannel subtype. * @return true if subtype is NChannel diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceNAttributes.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceNAttributes.java index 2322d6fd135..785c0e12f1e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceNAttributes.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceNAttributes.java @@ -64,13 +64,13 @@ public COSDictionary getCOSDictionary() /** * Returns a map of colorants and their associated Separation color space. - * @return map of colorants to color spaces + * @return map of colorants to color spaces, never null. * @throws IOException If there is an error reading a color space */ public Map getColorants() throws IOException { Map actuals = new HashMap(); - COSDictionary colorants = (COSDictionary)dictionary.getDictionaryObject(COSName.COLORANTS); + COSDictionary colorants = dictionary.getCOSDictionary(COSName.COLORANTS); if(colorants == null) { colorants = new COSDictionary(); @@ -90,7 +90,7 @@ public Map getColorants() throws IOException */ public PDDeviceNProcess getProcess() { - COSDictionary process = (COSDictionary)dictionary.getDictionaryObject(COSName.PROCESS); + COSDictionary process = dictionary.getCOSDictionary(COSName.PROCESS); if (process == null) { return null; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceNProcess.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceNProcess.java index 247767040f8..6417c1b7787 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceNProcess.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceNProcess.java @@ -81,12 +81,12 @@ public PDColorSpace getColorSpace() throws IOException */ public List getComponents() { - List components = new ArrayList(); - COSArray cosComponents = (COSArray)dictionary.getDictionaryObject(COSName.COMPONENTS); + COSArray cosComponents = dictionary.getCOSArray(COSName.COMPONENTS); if (cosComponents == null) { - return components; + return new ArrayList(0); } + List components = new ArrayList(cosComponents.size()); for (COSBase name : cosComponents) { components.add(((COSName)name).getName()); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceRGB.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceRGB.java index 54c8fe057b2..d69535cf66e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceRGB.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDDeviceRGB.java @@ -16,13 +16,10 @@ */ package org.apache.pdfbox.pdmodel.graphics.color; -import java.awt.Transparency; -import java.awt.color.ColorSpace; import java.awt.image.BufferedImage; -import java.awt.image.ColorModel; -import java.awt.image.ComponentColorModel; import java.awt.image.WritableRaster; import java.io.IOException; + import org.apache.pdfbox.cos.COSName; /** @@ -38,36 +35,9 @@ public final class PDDeviceRGB extends PDDeviceColorSpace public static final PDDeviceRGB INSTANCE = new PDDeviceRGB(); private final PDColor initialColor = new PDColor(new float[] { 0, 0, 0 }, this); - private volatile ColorSpace awtColorSpace; - - private PDDeviceRGB() - { - } - /** - * Lazy setting of the AWT color space due to JDK race condition. - */ - private void init() + private PDDeviceRGB() { - // no need to synchronize this check as it is atomic - if (awtColorSpace != null) - { - return; - } - synchronized (this) - { - // we might have been waiting for another thread, so check again - if (awtColorSpace != null) - { - return; - } - awtColorSpace = ColorSpace.getInstance(ColorSpace.CS_sRGB); - - // there is a JVM bug which results in a CMMException which appears to be a race - // condition caused by lazy initialization of the color transform, so we perform - // an initial color conversion while we're still synchronized, see PDFBOX-2184 - awtColorSpace.toRGB(new float[] { 0, 0, 0, 0 }); - } } @Override @@ -77,7 +47,7 @@ public String getName() } /** - * @inheritDoc + * {@inheritDoc} */ @Override public int getNumberOfComponents() @@ -100,17 +70,28 @@ public PDColor getInitialColor() @Override public float[] toRGB(float[] value) { - init(); - return awtColorSpace.toRGB(value); + return value; } @Override public BufferedImage toRGBImage(WritableRaster raster) throws IOException { - init(); - ColorModel colorModel = new ComponentColorModel(awtColorSpace, - false, false, Transparency.OPAQUE, raster.getDataBuffer().getDataType()); + // + // WARNING: this method is performance sensitive, modify with care! + // + // Please read PDFBOX-3854 and PDFBOX-2092 and look at the related commits first. + // The current code returns TYPE_INT_RGB images which prevents slowness due to threads + // blocking each other when TYPE_CUSTOM images are used. + BufferedImage image = new BufferedImage(raster.getWidth(), raster.getHeight(), BufferedImage.TYPE_INT_RGB); + image.setData(raster); + return image; + } - return new BufferedImage(colorModel, raster, false, null); + @Override + public BufferedImage toRawImage(WritableRaster raster) throws IOException + { + // Device RGB is not specified, as its the colors of whatever device you use. The user + // should use the toRGBImage(). + return null; } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDICCBased.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDICCBased.java index 96f8b7c2dd5..be615365cf3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDICCBased.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDICCBased.java @@ -16,38 +16,39 @@ */ package org.apache.pdfbox.pdmodel.graphics.color; +import java.awt.Transparency; +import java.awt.color.CMMException; import java.awt.color.ColorSpace; +import java.awt.color.ICC_ColorSpace; +import java.awt.color.ICC_Profile; +import java.awt.color.ProfileDataException; +import java.awt.image.BufferedImage; +import java.awt.image.ComponentColorModel; +import java.awt.image.DataBuffer; +import java.awt.image.WritableRaster; +import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; +import java.util.List; +import java.util.StringTokenizer; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; - import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.COSArrayList; import org.apache.pdfbox.pdmodel.common.PDRange; import org.apache.pdfbox.pdmodel.common.PDStream; - -import java.awt.Color; -import java.awt.color.CMMException; -import java.awt.color.ICC_ColorSpace; -import java.awt.color.ICC_Profile; -import java.awt.color.ProfileDataException; -import java.awt.image.BufferedImage; - -import java.awt.image.WritableRaster; -import java.io.InputStream; -import java.io.IOException; - -import java.util.List; import org.apache.pdfbox.util.Charsets; /** - * ICCBased colour spaces are based on a cross-platform colour profile as defined by the + * ICCBased color spaces are based on a cross-platform color profile as defined by the * International Color Consortium (ICC). * * @author Ben Litchfield @@ -63,6 +64,37 @@ public final class PDICCBased extends PDCIEBasedColorSpace private PDColorSpace alternateColorSpace; private ICC_ColorSpace awtColorSpace; private PDColor initialColor; + private boolean isRGB = false; + // allows to force using alternate color space instead of ICC color space for performance + // reasons with LittleCMS (LCMS), see PDFBOX-4309 + // WARNING: do not activate this in a conforming reader + private boolean useOnlyAlternateColorSpace = false; + private static final boolean IS_KCMS; + + static + { + String cmmProperty = System.getProperty("sun.java2d.cmm"); + boolean result = false; + if (!isMinJdk8()) + { + // always KCMS but class has different name + result = true; + } + else if ("sun.java2d.cmm.kcms.KcmsServiceProvider".equals(cmmProperty)) + { + try + { + Class.forName("sun.java2d.cmm.kcms.KcmsServiceProvider"); + result = true; + } + catch (ClassNotFoundException e) + { + // KCMS not available + } + } + // else maybe KCMS was available, but not wished + IS_KCMS = result; + } /** * Creates a new ICC color space with an empty stream. @@ -79,16 +111,71 @@ public PDICCBased(PDDocument doc) /** * Creates a new ICC color space using the PDF array. * - * @param iccArray the ICC stream object - * @throws java.io.IOException if there is an error reading the ICC profile. + * @param iccArray the ICC stream object. + * @throws IOException if there is an error reading the ICC profile or if the parameter is + * invalid. + * + * @deprecated This will be private in 3.0. Please use + * {@link PDICCBased#create(org.apache.pdfbox.cos.COSArray, org.apache.pdfbox.pdmodel.PDResources)} + * instead, which supports caching. */ + @Deprecated public PDICCBased(COSArray iccArray) throws IOException { + checkArray(iccArray); + useOnlyAlternateColorSpace = System + .getProperty("org.apache.pdfbox.rendering.UseAlternateInsteadOfICCColorSpace") != null; array = iccArray; stream = new PDStream((COSStream) iccArray.getObject(1)); loadICCProfile(); } + /** + * Creates a new ICC color space using the PDF array, optionally using a resource cache. + * + * @param iccArray the ICC stream object. + * @param resources resources to use as cache, or null for no caching. + * @return an ICC color space. + * @throws IOException if there is an error reading the ICC profile or if the parameter is + * invalid. + */ + public static PDICCBased create(COSArray iccArray, PDResources resources) throws IOException + { + checkArray(iccArray); + COSBase base = iccArray.get(1); + COSObject indirect = null; + if (base instanceof COSObject) + { + indirect = (COSObject) base; + } + if (indirect != null && resources != null && resources.getResourceCache() != null) + { + PDColorSpace space = resources.getResourceCache().getColorSpace(indirect); + if (space instanceof PDICCBased) + { + return (PDICCBased) space; + } + } + PDICCBased space = new PDICCBased(iccArray); + if (indirect != null && resources != null && resources.getResourceCache() != null) + { + resources.getResourceCache().put(indirect, space); + } + return space; + } + + private static void checkArray(COSArray iccArray) throws IOException + { + if (iccArray.size() < 2) + { + throw new IOException("ICCBased colorspace array must have two elements"); + } + if (!(iccArray.getObject(1) instanceof COSStream)) + { + throw new IOException("ICCBased colorspace array must have a stream as second element"); + } + } + @Override public String getName() { @@ -109,6 +196,18 @@ public PDStream getPDStream() */ private void loadICCProfile() throws IOException { + if (useOnlyAlternateColorSpace) + { + try + { + fallbackToAlternateColorSpace(null); + return; + } + catch ( IOException e ) + { + LOG.warn("Error initializing alternate color space: " + e.getLocalizedMessage()); + } + } InputStream input = null; try { @@ -116,46 +215,66 @@ private void loadICCProfile() throws IOException // if the embedded profile is sRGB then we can use Java's built-in profile, which // results in a large performance gain as it's our native color space, see PDFBOX-2587 - ICC_Profile profile = ICC_Profile.getInstance(input); - if (is_sRGB(profile)) + ICC_Profile profile; + synchronized (LOG) { - awtColorSpace = (ICC_ColorSpace)ColorSpace.getInstance(ColorSpace.CS_sRGB); - iccProfile = awtColorSpace.getProfile(); - } - else - { - awtColorSpace = new ICC_ColorSpace(profile); - iccProfile = profile; - } + profile = ICC_Profile.getInstance(input); + if (is_sRGB(profile)) + { + isRGB = true; + awtColorSpace = (ICC_ColorSpace) ColorSpace.getInstance(ColorSpace.CS_sRGB); + iccProfile = awtColorSpace.getProfile(); + } + else + { + profile = ensureDisplayProfile(profile); + awtColorSpace = new ICC_ColorSpace(profile); + iccProfile = profile; + } - // set initial colour - float[] initial = new float[getNumberOfComponents()]; - for (int c = 0; c < getNumberOfComponents(); c++) - { - initial[c] = Math.max(0, getRangeForComponent(c).getMin()); - } - initialColor = new PDColor(initial, this); + // set initial colour + float[] initial = new float[getNumberOfComponents()]; + for (int c = 0; c < getNumberOfComponents(); c++) + { + initial[c] = Math.max(0, getRangeForComponent(c).getMin()); + } + initialColor = new PDColor(initial, this); - // create a color in order to trigger a ProfileDataException - // or CMMException due to invalid profiles, see PDFBOX-1295 and PDFBOX-1740 - new Color(awtColorSpace, new float[getNumberOfComponents()], 1f); + if (IS_KCMS) + { + // do things that trigger a ProfileDataException + // or CMMException due to invalid profiles, see PDFBOX-1295 and PDFBOX-1740 (ü-file) + // or ArrayIndexOutOfBoundsException, see PDFBOX-3610 + // also triggers a ProfileDataException for PDFBOX-3549 with KCMS + awtColorSpace.toRGB(new float[getNumberOfComponents()]); + } + else + { + // PDFBOX-4015: this one triggers "CMMException: LCMS error 13" with LCMS + new ComponentColorModel(awtColorSpace, false, false, + Transparency.OPAQUE, DataBuffer.TYPE_BYTE); + } + } } - catch (RuntimeException e) + catch (ProfileDataException e) { - if (e instanceof ProfileDataException || - e instanceof CMMException || - e instanceof IllegalArgumentException) - { - // fall back to alternateColorSpace color space - awtColorSpace = null; - alternateColorSpace = getAlternateColorSpace(); - LOG.error("Can't read embedded ICC profile (" + e.getLocalizedMessage() + "), using alternate color space: " + alternateColorSpace.getName()); - initialColor = alternateColorSpace.getInitialColor(); - } - else - { - throw e; - } + fallbackToAlternateColorSpace(e); + } + catch (CMMException e) + { + fallbackToAlternateColorSpace(e); + } + catch (IllegalArgumentException e) + { + fallbackToAlternateColorSpace(e); + } + catch (ArrayIndexOutOfBoundsException e) + { + fallbackToAlternateColorSpace(e); + } + catch (IOException e) + { + fallbackToAlternateColorSpace(e); } finally { @@ -163,8 +282,25 @@ private void loadICCProfile() throws IOException } } + private void fallbackToAlternateColorSpace(Exception e) throws IOException + { + awtColorSpace = null; + alternateColorSpace = getAlternateColorSpace(); + if (alternateColorSpace.equals(PDDeviceRGB.INSTANCE)) + { + isRGB = true; + } + if ( e != null ) + { + LOG.warn("Can't read embedded ICC profile (" + e.getLocalizedMessage() + + "), using alternate color space: " + alternateColorSpace.getName()); + } + initialColor = alternateColorSpace.getInitialColor(); + } + /** - * Returns true if the given profile is represents sRGB. + * Returns true if the given profile represents sRGB. + * (unreliable on the data of ColorSpace.CS_sRGB in openjdk) */ private boolean is_sRGB(ICC_Profile profile) { @@ -174,13 +310,44 @@ private boolean is_sRGB(ICC_Profile profile) return deviceModel.equals("sRGB"); } + // PDFBOX-4114: fix profile that has the wrong display class, + // as done by Harald Kuhr in twelvemonkeys JPEGImageReader.ensureDisplayProfile() + private static ICC_Profile ensureDisplayProfile(ICC_Profile profile) + { + if (profile.getProfileClass() != ICC_Profile.CLASS_DISPLAY) + { + byte[] profileData = profile.getData(); // Need to clone entire profile, due to a OpenJDK bug + + if (profileData[ICC_Profile.icHdrRenderingIntent] == ICC_Profile.icPerceptual) + { + LOG.warn("ICC profile is Perceptual, ignoring, treating as Display class"); + intToBigEndian(ICC_Profile.icSigDisplayClass, profileData, ICC_Profile.icHdrDeviceClass); + return ICC_Profile.getInstance(profileData); + } + } + return profile; + } + + private static void intToBigEndian(int value, byte[] array, int index) + { + array[index] = (byte) (value >> 24); + array[index + 1] = (byte) (value >> 16); + array[index + 2] = (byte) (value >> 8); + array[index + 3] = (byte) (value); + } + @Override public float[] toRGB(float[] value) throws IOException { + if (isRGB) + { + return value; + } if (awtColorSpace != null) { + // PDFBOX-2142: clamp bad values // WARNING: toRGB is very slow when used with LUT-based ICC profiles - return awtColorSpace.toRGB(value); + return awtColorSpace.toRGB(clampColors(awtColorSpace, value)); } else { @@ -188,6 +355,18 @@ public float[] toRGB(float[] value) throws IOException } } + private float[] clampColors(ICC_ColorSpace cs, float[] value) + { + float[] result = new float[value.length]; + for (int i = 0; i < value.length; ++i) + { + float minValue = cs.getMinValue(i); + float maxValue = cs.getMaxValue(i); + result[i] = value[i] < minValue ? minValue : (value[i] > maxValue ? maxValue : value[i]); + } + return result; + } + @Override public BufferedImage toRGBImage(WritableRaster raster) throws IOException { @@ -201,12 +380,34 @@ public BufferedImage toRGBImage(WritableRaster raster) throws IOException } } + @Override + public BufferedImage toRawImage(WritableRaster raster) throws IOException + { + if(awtColorSpace == null) + { + return alternateColorSpace.toRawImage(raster); + } + return toRawImage(raster, awtColorSpace); + } + @Override public int getNumberOfComponents() { if (numberOfComponents < 0) { numberOfComponents = stream.getCOSObject().getInt(COSName.N); + + // PDFBOX-4801 correct wrong /N values + if (iccProfile != null) + { + int numIccComponents = iccProfile.getNumComponents(); + if (numIccComponents != numberOfComponents) + { + LOG.warn("Using " + numIccComponents + " components from ICC profile info instead of " + + numberOfComponents + " components from /N entry"); + numberOfComponents = numIccComponents; + } + } } return numberOfComponents; } @@ -252,21 +453,19 @@ public PDColorSpace getAlternateColorSpace() throws IOException alternateArray = new COSArray(); int numComponents = getNumberOfComponents(); COSName csName; - if(numComponents == 1) - { - csName = COSName.DEVICEGRAY; - } - else if(numComponents == 3) - { - csName = COSName.DEVICERGB; - } - else if(numComponents == 4) + switch (numComponents) { - csName = COSName.DEVICECMYK; - } - else - { - throw new IOException("Unknown color space number of components:" + numComponents); + case 1: + csName = COSName.DEVICEGRAY; + break; + case 3: + csName = COSName.DEVICERGB; + break; + case 4: + csName = COSName.DEVICECMYK; + break; + default: + throw new IOException("Unknown color space number of components:" + numComponents); } alternateArray.add(csName); } @@ -317,9 +516,13 @@ public COSStream getMetadata() } /** - * Returns the type of the color space in the ICC profile. - * Will be one of {@code TYPE_GRAY}, {@code TYPE_RGB}, or {@code TYPE_CMYK}. - * @return an ICC color space type + * Returns the type of the color space in the ICC profile. If the ICC profile is invalid, the + * type of the alternate colorspace is returned, which will be one of + * {@link ColorSpace#TYPE_GRAY TYPE_GRAY}, {@link ColorSpace#TYPE_RGB TYPE_RGB}, + * {@link ColorSpace#TYPE_CMYK TYPE_CMYK}, or -1 if that one is invalid. + * + * @return an ICC color space type. See {@link ColorSpace#getType()} and the static values of + * {@link ColorSpace} for more details. */ public int getColorSpaceType() { @@ -327,34 +530,27 @@ public int getColorSpaceType() { return iccProfile.getColorSpaceType(); } - else + + // if the ICC Profile could not be read + switch (alternateColorSpace.getNumberOfComponents()) { - // if the ICC Profile could not be read - if (alternateColorSpace.getNumberOfComponents() == 1) - { - return ICC_ColorSpace.TYPE_GRAY; - } - else if (alternateColorSpace.getNumberOfComponents() == 3) - { - return ICC_ColorSpace.TYPE_RGB; - } - else if (alternateColorSpace.getNumberOfComponents() == 4) - { - return ICC_ColorSpace.TYPE_CMYK; - } - else - { + case 1: + return ColorSpace.TYPE_GRAY; + case 3: + return ColorSpace.TYPE_RGB; + case 4: + return ColorSpace.TYPE_CMYK; + default: // should not happen as all ICC color spaces in PDF must have 1,3, or 4 components return -1; - } } } /** * Sets the number of color components. * @param n the number of color components + * @deprecated it's probably not safe to use this, this method will be removed in 3.0. */ - // TODO it's probably not safe to use this @Deprecated public void setNumberOfComponents(int n) { @@ -364,10 +560,10 @@ public void setNumberOfComponents(int n) /** * Sets the list of alternateColorSpace color spaces. - * This should be a list of PDColorSpace objects. + * * @param list the list of color space objects */ - public void setAlternateColorSpaces(List list) + public void setAlternateColorSpaces(List list) { COSArray altArray = null; if(list != null) @@ -409,9 +605,40 @@ public void setMetadata(COSStream metadata) stream.getCOSObject().setItem(COSName.METADATA, metadata); } + /** + * Internal accessor to support indexed raw images. + * @return true if this colorspace is sRGB. + */ + boolean isSRGB() + { + return isRGB; + } + @Override public String toString() { return getName() + "{numberOfComponents: " + getNumberOfComponents() + "}"; } + + private static boolean isMinJdk8() + { + // strategy from lucene-solr/lucene/core/src/java/org/apache/lucene/util/Constants.java + String version = System.getProperty("java.specification.version"); + final StringTokenizer st = new StringTokenizer(version, "."); + try + { + int major = Integer.parseInt(st.nextToken()); + int minor = 0; + if (st.hasMoreTokens()) + { + minor = Integer.parseInt(st.nextToken()); + } + return major > 1 || (major == 1 && minor >= 8); + } + catch (NumberFormatException nfe) + { + // maybe some new numbering scheme in the 22nd century + return true; + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDIndexed.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDIndexed.java index 6c7b0fac071..d3b6ec17f51 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDIndexed.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDIndexed.java @@ -18,7 +18,9 @@ import java.awt.Point; import java.awt.image.BufferedImage; +import java.awt.image.ColorModel; import java.awt.image.DataBuffer; +import java.awt.image.IndexColorModel; import java.awt.image.Raster; import java.awt.image.WritableRaster; import java.io.IOException; @@ -30,6 +32,7 @@ import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDStream; /** @@ -65,13 +68,27 @@ public PDIndexed() } /** - * Creates a new Indexed color space from the given PDF array. + * Creates a new indexed color space from the given PDF array. * @param indexedArray the array containing the indexed parameters + * @throws java.io.IOException */ public PDIndexed(COSArray indexedArray) throws IOException + { + this(indexedArray, null); + } + + /** + * Creates a new indexed color space from the given PDF array. + * @param indexedArray the array containing the indexed parameters + * @param resources the resources, can be null. Allows to use its cache for the colorspace. + * @throws java.io.IOException + */ + public PDIndexed(COSArray indexedArray, PDResources resources) throws IOException { array = indexedArray; - baseColorSpace = PDColorSpace.create(array.getObject(1)); + // don't call getObject(1), we want to pass a reference if possible + // to profit from caching (PDFBOX-4149) + baseColorSpace = PDColorSpace.create(array.get(1), resources); readColorTable(); initRgbColorTable(); } @@ -109,8 +126,17 @@ private void initRgbColorTable() throws IOException // convert the color table into a 1-row BufferedImage in the base color space, // using a writable raster for high performance - WritableRaster baseRaster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, - actualMaxIndex + 1, 1, numBaseComponents, new Point(0, 0)); + WritableRaster baseRaster; + try + { + baseRaster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, + actualMaxIndex + 1, 1, numBaseComponents, new Point(0, 0)); + } + catch (IllegalArgumentException ex) + { + // PDFBOX-4503: when stream is empty or null + throw new IOException(ex); + } int[] base = new int[numBaseComponents]; for (int i = 0, n = actualMaxIndex; i <= n; i++) @@ -186,6 +212,29 @@ public BufferedImage toRGBImage(WritableRaster raster) throws IOException return rgbImage; } + @Override + public BufferedImage toRawImage(WritableRaster raster) + { + // We can only convert sRGB index colorspaces, depending on the base colorspace + if (baseColorSpace instanceof PDICCBased && ((PDICCBased) baseColorSpace).isSRGB()) + { + byte[] r = new byte[colorTable.length]; + byte[] g = new byte[colorTable.length]; + byte[] b = new byte[colorTable.length]; + for (int i = 0; i < colorTable.length; i++) + { + r[i] = (byte) ((int) (colorTable[i][0] * 255) & 0xFF); + g[i] = (byte) ((int) (colorTable[i][1] * 255) & 0xFF); + b[i] = (byte) ((int) (colorTable[i][2] * 255) & 0xFF); + } + ColorModel colorModel = new IndexColorModel(8, colorTable.length, r, g, b); + return new BufferedImage(colorModel, raster, false, null); + } + + // We can't handle all other cases at the moment. + return null; + } + /** * Returns the base color space. * @return the base color space. @@ -202,7 +251,7 @@ private int getHival() } // reads the lookup table data from the array - private byte[] getLookupData() throws IOException + private void readLookupData() throws IOException { if (lookupData == null) { @@ -224,7 +273,6 @@ else if (lookupTable == null) throw new IOException("Error: Unknown type for lookup table " + lookupTable); } } - return lookupData; } // @@ -232,7 +280,8 @@ else if (lookupTable == null) // private void readColorTable() throws IOException { - byte[] lookupData = getLookupData(); + readLookupData(); + int maxIndex = Math.min(getHival(), 255); int numComponents = baseColorSpace.getNumberOfComponents(); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDJPXColorSpace.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDJPXColorSpace.java index f1760c637b4..4491400895f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDJPXColorSpace.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDJPXColorSpace.java @@ -1,93 +1,99 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.color; - -import org.apache.pdfbox.cos.COSBase; - -import java.awt.color.ColorSpace; -import java.awt.image.BufferedImage; -import java.awt.image.WritableRaster; -import java.io.IOException; - -/** - * A color space embedded in a JPX file. - * This wraps the AWT ColorSpace which is obtained after JAI Image I/O reads a JPX stream. - * - * @author John Hewson - */ -public final class PDJPXColorSpace extends PDColorSpace -{ - private final ColorSpace awtColorSpace; - - /** - * Creates a new JPX color space from the given AWT color space. - * @param colorSpace AWT color space from a JPX image - */ - public PDJPXColorSpace(ColorSpace colorSpace) - { - this.awtColorSpace = colorSpace; - } - - @Override - public String getName() - { - return "JPX"; - } - - @Override - public int getNumberOfComponents() - { - return awtColorSpace.getNumComponents(); - } - - @Override - public float[] getDefaultDecode(int bitsPerComponent) - { - int n = getNumberOfComponents(); - float[] decode = new float[n * 2]; - for (int i = 0; i < n; i++) - { - decode[i * 2] = awtColorSpace.getMinValue(i); - decode[i * 2 + 1] = awtColorSpace.getMaxValue(i); - } - return decode; - } - - @Override - public PDColor getInitialColor() - { - throw new UnsupportedOperationException("JPX color spaces don't support drawing"); - } - - @Override - public float[] toRGB(float[] value) - { - throw new UnsupportedOperationException("JPX color spaces don't support drawing"); - } - - @Override - public BufferedImage toRGBImage(WritableRaster raster) throws IOException - { - return toRGBImageAWT(raster, awtColorSpace); - } - - @Override - public COSBase getCOSObject() - { - throw new UnsupportedOperationException("JPX color spaces don't have COS objects"); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.color; + +import org.apache.pdfbox.cos.COSBase; + +import java.awt.color.ColorSpace; +import java.awt.image.BufferedImage; +import java.awt.image.WritableRaster; +import java.io.IOException; + +/** + * A color space embedded in a JPX file. + * This wraps the AWT ColorSpace which is obtained after JAI Image I/O reads a JPX stream. + * + * @author John Hewson + */ +public final class PDJPXColorSpace extends PDColorSpace +{ + private final ColorSpace awtColorSpace; + + /** + * Creates a new JPX color space from the given AWT color space. + * @param colorSpace AWT color space from a JPX image + */ + public PDJPXColorSpace(ColorSpace colorSpace) + { + this.awtColorSpace = colorSpace; + } + + @Override + public String getName() + { + return "JPX"; + } + + @Override + public int getNumberOfComponents() + { + return awtColorSpace.getNumComponents(); + } + + @Override + public float[] getDefaultDecode(int bitsPerComponent) + { + int n = getNumberOfComponents(); + float[] decode = new float[n * 2]; + for (int i = 0; i < n; i++) + { + decode[i * 2] = awtColorSpace.getMinValue(i); + decode[i * 2 + 1] = awtColorSpace.getMaxValue(i); + } + return decode; + } + + @Override + public PDColor getInitialColor() + { + throw new UnsupportedOperationException("JPX color spaces don't support drawing"); + } + + @Override + public float[] toRGB(float[] value) + { + throw new UnsupportedOperationException("JPX color spaces don't support drawing"); + } + + @Override + public BufferedImage toRGBImage(WritableRaster raster) throws IOException + { + return toRGBImageAWT(raster, awtColorSpace); + } + + @Override + public BufferedImage toRawImage(WritableRaster raster) + { + return toRawImage(raster, this.awtColorSpace); + } + + @Override + public COSBase getCOSObject() + { + throw new UnsupportedOperationException("JPX color spaces don't have COS objects"); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDLab.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDLab.java index 38ca50ba416..92243d67663 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDLab.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDLab.java @@ -20,6 +20,7 @@ import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.PDRange; + import java.awt.image.BufferedImage; import java.awt.image.WritableRaster; import java.io.IOException; @@ -69,10 +70,14 @@ public BufferedImage toRGBImage(WritableRaster raster) throws IOException BufferedImage rgbImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); WritableRaster rgbRaster = rgbImage.getRaster(); - float minA = getARange().getMin(); - float maxA = getARange().getMax(); - float minB = getBRange().getMin(); - float maxB = getBRange().getMax(); + PDRange aRange = getARange(); + PDRange bRange = getBRange(); + float minA = aRange.getMin(); + float maxA = aRange.getMax(); + float minB = bRange.getMin(); + float maxB = bRange.getMax(); + float deltaA = maxA - minA; + float deltaB = maxB - minB; // always three components: ABC float[] abc = new float[3]; @@ -89,8 +94,8 @@ public BufferedImage toRGBImage(WritableRaster raster) throws IOException // scale to range abc[0] *= 100; - abc[1] = minA + (abc[1] * (maxA - minA)); - abc[2] = minB + (abc[2] * (maxB - minB)); + abc[1] = minA + abc[1] * deltaA; + abc[2] = minB + abc[2] * deltaB; float[] rgb = toRGB(abc); @@ -106,6 +111,13 @@ public BufferedImage toRGBImage(WritableRaster raster) throws IOException return rgbImage; } + @Override + public BufferedImage toRawImage(WritableRaster raster) + { + // Not handled at the moment. + return null; + } + @Override public float[] toRGB(float[] value) { @@ -147,7 +159,7 @@ public int getNumberOfComponents() public float[] getDefaultDecode(int bitsPerComponent) { PDRange a = getARange(); - PDRange b = getARange(); + PDRange b = getBRange(); return new float[] { 0, 100, a.getMin(), a.getMax(), b.getMin(), b.getMax() }; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java index ca166ac1595..57633457295 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDPattern.java @@ -20,6 +20,7 @@ import java.awt.image.WritableRaster; import java.io.IOException; +import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern; @@ -45,6 +46,8 @@ public final class PDPattern extends PDSpecialColorSpace public PDPattern(PDResources resources) { this.resources = resources; + array = new COSArray(); + array.add(COSName.PATTERN); } /** @@ -57,6 +60,9 @@ public PDPattern(PDResources resources, PDColorSpace colorSpace) { this.resources = resources; this.underlyingColorSpace = colorSpace; + array = new COSArray(); + array.add(COSName.PATTERN); + array.add(colorSpace); } @Override @@ -95,6 +101,12 @@ public BufferedImage toRGBImage(WritableRaster raster) throws IOException throw new UnsupportedOperationException(); } + @Override + public BufferedImage toRawImage(WritableRaster raster) throws IOException + { + throw new UnsupportedOperationException(); + } + /** * Returns the pattern for the given color. * diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDSeparation.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDSeparation.java index cad7c76c3a1..b2b43d14fdb 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDSeparation.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDSeparation.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.pdmodel.graphics.color; import java.awt.Point; +import java.awt.color.ColorSpace; import java.awt.image.BufferedImage; import java.awt.image.DataBuffer; import java.awt.image.Raster; @@ -53,6 +54,14 @@ public class PDSeparation extends PDSpecialColorSpace private PDColorSpace alternateColorSpace = null; private PDFunction tintTransform = null; + /** + * Map used to speed up {@link #toRGB(float[])}. Note that this class contains three maps (this + * and the two in {@link #toRGBImage(java.awt.image.WritableRaster) } and {@link #toRGBImage2(java.awt.image.WritableRaster) + * }. The maps use different key intervals. This map here is needed for shading, which produce + * more than 256 different float values, which we cast to int so that the map can work. + */ + private Map toRGBMap = null; + /** * Creates a new Separation color space. */ @@ -105,8 +114,20 @@ public PDColor getInitialColor() @Override public float[] toRGB(float[] value) throws IOException { + if (toRGBMap == null) + { + toRGBMap = new HashMap(); + } + int key = (int) (value[0] * 255); + float[] retval = toRGBMap.get(key); + if (retval != null) + { + return retval; + } float[] altColor = tintTransform.eval(value); - return alternateColorSpace.toRGB(altColor); + retval = alternateColorSpace.toRGB(altColor); + toRGBMap.put(key, retval); + return retval; } // @@ -115,6 +136,12 @@ public float[] toRGB(float[] value) throws IOException @Override public BufferedImage toRGBImage(WritableRaster raster) throws IOException { + if (alternateColorSpace instanceof PDLab) + { + // PDFBOX-3622 - regular converter fails for Lab colorspaces + return toRGBImage2(raster); + } + // use the tint transform to convert the sample into // the alternate color space (this is usually 1:many) WritableRaster altRaster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, @@ -134,7 +161,8 @@ public BufferedImage toRGBImage(WritableRaster raster) throws IOException for (int x = 0; x < width; x++) { raster.getPixel(x, y, samples); - int[] alt = calculatedValues.get(hash = Float.floatToIntBits(samples[0])); + hash = Float.floatToIntBits(samples[0]); + int[] alt = calculatedValues.get(hash); if (alt == null) { alt = new int[numAltComponents]; @@ -149,6 +177,41 @@ public BufferedImage toRGBImage(WritableRaster raster) throws IOException return alternateColorSpace.toRGBImage(altRaster); } + // converter that works without using super implementation of toRGBImage() + private BufferedImage toRGBImage2(WritableRaster raster) throws IOException + { + int width = raster.getWidth(); + int height = raster.getHeight(); + BufferedImage rgbImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); + WritableRaster rgbRaster = rgbImage.getRaster(); + float[] samples = new float[1]; + + Map calculatedValues = new HashMap(); + Integer hash; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + raster.getPixel(x, y, samples); + hash = Float.floatToIntBits(samples[0]); + int[] rgb = calculatedValues.get(hash); + if (rgb == null) + { + samples[0] /= 255; + float[] altColor = tintTransform.eval(samples); + float[] fltab = alternateColorSpace.toRGB(altColor); + rgb = new int[3]; + rgb[0] = (int) (fltab[0] * 255); + rgb[1] = (int) (fltab[1] * 255); + rgb[2] = (int) (fltab[2] * 255); + calculatedValues.put(hash, rgb); + } + rgbRaster.setPixel(x, y, rgb); + } + } + return rgbImage; + } + protected void tintTransform(float[] samples, int[] alt) throws IOException { samples[0] /= 255; // 0..1 @@ -160,6 +223,12 @@ protected void tintTransform(float[] samples, int[] alt) throws IOException } } + @Override + public BufferedImage toRawImage(WritableRaster raster) + { + return toRawImage(raster, ColorSpace.getInstance(ColorSpace.CS_GRAY)); + } + /** * Returns the colorant name. * @return the name of the colorant diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/package.html index d16b62c0de8..619f867c82a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/PDFormXObject.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/PDFormXObject.java index 2d5f079ae5f..f05857c6fe5 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/PDFormXObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/PDFormXObject.java @@ -21,6 +21,7 @@ import java.io.InputStream; import org.apache.pdfbox.contentstream.PDContentStream; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; @@ -30,6 +31,7 @@ import org.apache.pdfbox.pdmodel.ResourceCache; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.util.Matrix; @@ -145,19 +147,26 @@ public InputStream getContents() throws IOException } /** - * This will get the resources at this page and not look up the hierarchy. - * This attribute is inheritable, and findResources() should probably used. - * This will return null if no resources are available at this level. - * @return The resources at this level in the hierarchy. + * This will get the resources for this Form XObject. + * This will return null if no resources are available. + * + * @return The resources for this Form XObject. */ @Override public PDResources getResources() { - COSDictionary resources = (COSDictionary) getCOSObject().getDictionaryObject(COSName.RESOURCES); + COSDictionary resources = getCOSObject().getCOSDictionary(COSName.RESOURCES); if (resources != null) { return new PDResources(resources, cache); } + if (getCOSObject().containsKey(COSName.RESOURCES)) + { + // PDFBOX-4372 if the resource key exists but has nothing, return empty resources, + // to avoid a self-reference (xobject form Fm0 contains "/Fm0 Do") + // See also the mention of PDFBOX-1359 in PDFStreamEngine + return new PDResources(); + } return null; } @@ -206,22 +215,13 @@ public void setBBox(PDRectangle bbox) } /** - * This will get the optional Matrix of an XObjectForm. It maps the form space to user space. + * This will get the optional matrix of an XObjectForm. It maps the form space to user space. * @return the form matrix if available, or the identity matrix. */ @Override public Matrix getMatrix() { - COSArray array = (COSArray) getCOSObject().getDictionaryObject(COSName.MATRIX); - if (array != null) - { - return new Matrix(array); - } - else - { - // default value is the identity matrix - return new Matrix(); - } + return Matrix.createMatrix(getCOSObject().getDictionaryObject(COSName.MATRIX)); } /** @@ -241,14 +241,15 @@ public void setMatrix(AffineTransform transform) } /** - * This will get the key of this XObjectForm in the structural parent tree. - * Required if the form XObject contains marked-content sequences that are - * structural content items. - * @return the integer key of the XObjectForm's entry in the structural parent tree + * This will get the key of this XObjectForm in the structural parent tree. Required if the form + * XObject contains marked-content sequences that are structural content items. + * + * @return the integer key of the XObjectForm's entry in the structural parent tree or -1 if + * there isn't any. */ public int getStructParents() { - return getCOSObject().getInt(COSName.STRUCT_PARENTS, 0); + return getCOSObject().getInt(COSName.STRUCT_PARENTS); } /** @@ -259,4 +260,30 @@ public void setStructParents(int structParent) { getCOSObject().setInt(COSName.STRUCT_PARENTS, structParent); } + + /** + * This will get the optional content group or optional content membership dictionary. + * + * @return The optional content group or optional content membership dictionary or null if there + * is none. + */ + public PDPropertyList getOptionalContent() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.OC); + if (base instanceof COSDictionary) + { + return PDPropertyList.create((COSDictionary) base); + } + return null; + } + + /** + * Sets the optional content group or optional content membership dictionary. + * + * @param oc The optional content group or optional content membership dictionary. + */ + public void setOptionalContent(PDPropertyList oc) + { + getCOSObject().setItem(COSName.OC, oc); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/PDTransparencyGroupAttributes.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/PDTransparencyGroupAttributes.java index b30eebb41ca..2d9f9878fea 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/PDTransparencyGroupAttributes.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/PDTransparencyGroupAttributes.java @@ -20,19 +20,29 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; /** * Transparency group attributes. * - * @author Kühn & Weyh Software, GmbH + * @author Kühn & Weyh Software GmbH */ public final class PDTransparencyGroupAttributes implements COSObjectable { private final COSDictionary dictionary; private PDColorSpace colorSpace; + /** + * Creates a group object with /Transparency subtype entry. + */ + public PDTransparencyGroupAttributes() + { + dictionary = new COSDictionary(); + dictionary.setItem(COSName.S, COSName.TRANSPARENCY); + } + /** * Creates a group object from a given dictionary * @param dic {@link COSDictionary} object @@ -49,15 +59,28 @@ public COSDictionary getCOSObject() } /** - * Returns the blending color space - * @return color space + * Returns the group color space or null if it isn't defined. + * + * @return the group color space. * @throws IOException */ public PDColorSpace getColorSpace() throws IOException { - if (colorSpace == null) + return getColorSpace(null); + } + + /** + * Returns the group color space or null if it isn't defined. + * + * @param resources useful for its cache. Can be null. + * @return the group color space. + * @throws IOException + */ + public PDColorSpace getColorSpace(PDResources resources) throws IOException + { + if (colorSpace == null && getCOSObject().containsKey(COSName.CS)) { - colorSpace = PDColorSpace.create(getCOSObject().getDictionaryObject(COSName.CS)); + colorSpace = PDColorSpace.create(getCOSObject().getDictionaryObject(COSName.CS), resources); } return colorSpace; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/package.html index 0d401708a8e..016c4611e66 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/form/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java index 37ca491401b..b41255613b9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java @@ -1,386 +1,523 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.image; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.io.OutputStream; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.io.RandomAccess; -import org.apache.pdfbox.io.RandomAccessFile; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; - -/** - * Factory for creating a PDImageXObject containing a CCITT Fax compressed TIFF image. - * - * @author Ben Litchfield - * @author Paul King - */ -public final class CCITTFactory -{ - private CCITTFactory() - { - } - - /** - * Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file. - * - * @param document the document to create the image as part of. - * @param reader the random access TIFF file which contains a suitable CCITT - * compressed image - * @return a new image XObject - * @throws IOException if there is an error reading the TIFF data. - * - * @deprecated Use {@link #createFromFile(PDDocument, File)} instead. - */ - @Deprecated - public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader) - throws IOException - { - return createFromRandomAccessImpl(document, reader, 0); - } - - /** - * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file. - * - * @param document the document to create the image as part of. - * @param reader the random access TIFF file which contains a suitable CCITT - * compressed image - * @param number TIFF image number, starting from 0 - * @return a new image XObject, or null if no such page - * @throws IOException if there is an error reading the TIFF data. - * - * @deprecated Use {@link #createFromFile(PDDocument, File, int)} instead. - */ - @Deprecated - public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader, - int number) throws IOException - { - return createFromRandomAccessImpl(document, reader, number); - } - - /** - * Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file. Only - * single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF - * files you have, use - * {@link LosslessFactory#createFromImage(org.apache.pdfbox.pdmodel.PDDocument, java.awt.image.BufferedImage)} - * instead. - * - * @param document the document to create the image as part of. - * @param file the TIFF file which contains a suitable CCITT compressed image - * @return a new Image XObject - * @throws IOException if there is an error reading the TIFF data. - */ - public static PDImageXObject createFromFile(PDDocument document, File file) - throws IOException - { - return createFromRandomAccessImpl(document, new RandomAccessFile(file, "r"), 0); - } - - /** - * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file. Only - * single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF - * files you have, use - * {@link LosslessFactory#createFromImage(org.apache.pdfbox.pdmodel.PDDocument, java.awt.image.BufferedImage)} - * instead. - * - * @param document the document to create the image as part of. - * @param file the TIFF file which contains a suitable CCITT compressed image - * @param number TIFF image number, starting from 0 - * @return a new Image XObject - * @throws IOException if there is an error reading the TIFF data. - */ - public static PDImageXObject createFromFile(PDDocument document, File file, int number) - throws IOException - { - return createFromRandomAccessImpl(document, new RandomAccessFile(file, "r"), number); - } - - /** - * Creates a new CCITT Fax compressed image XObject from a TIFF file. - * - * @param document the document to create the image as part of. - * @param reader the random access TIFF file which contains a suitable CCITT - * compressed image - * @param number TIFF image number, starting from 0 - * @return a new Image XObject, or null if no such page - * @throws IOException if there is an error reading the TIFF data. - */ - private static PDImageXObject createFromRandomAccessImpl(PDDocument document, - RandomAccess reader, - int number) throws IOException - { - COSDictionary decodeParms = new COSDictionary(); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - extractFromTiff(reader, bos, decodeParms, number); - if (bos.size() == 0) - { - return null; - } - ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(bos.toByteArray()); - PDImageXObject pdImage = new PDImageXObject(document, - encodedByteStream, - COSName.CCITTFAX_DECODE, - decodeParms.getInt(COSName.COLUMNS), - decodeParms.getInt(COSName.ROWS), - 1, - PDDeviceGray.INSTANCE); - - COSDictionary dict = pdImage.getCOSObject(); - dict.setItem(COSName.DECODE_PARMS, decodeParms); - return pdImage; - } - - // extracts the CCITT stream from the TIFF file - private static void extractFromTiff(RandomAccess reader, OutputStream os, - COSDictionary params, int number) throws IOException - { - try - { - // First check the basic tiff header - reader.seek(0); - char endianess = (char) reader.read(); - if ((char) reader.read() != endianess) - { - throw new IOException("Not a valid tiff file"); - } - // ensure that endianess is either M or I - if (endianess != 'M' && endianess != 'I') - { - throw new IOException("Not a valid tiff file"); - } - int magicNumber = readshort(endianess, reader); - if (magicNumber != 42) - { - throw new IOException("Not a valid tiff file"); - } - - // Relocate to the first set of tags - int address = readlong(endianess, reader); - reader.seek(address); - - // If some higher page number is required, skip this page's tags, - // then read the next page's address - for (int i = 0; i < number; i++) - { - int numtags = readshort(endianess, reader); - if (numtags > 50) - { - throw new IOException("Not a valid tiff file"); - } - reader.seek(address + 2 + numtags * 12); - address = readlong(endianess, reader); - if (address == 0) - { - return; - } - reader.seek(address); - } - - int numtags = readshort(endianess, reader); - - // The number 50 is somewhat arbitary, it just stops us load up junk from somewhere - // and tramping on - if (numtags > 50) - { - throw new IOException("Not a valid tiff file"); - } - - // Loop through the tags, some will convert to items in the params dictionary - // Other point us to where to find the data stream. - // The only param which might change as a result of other TIFF tags is K, so - // we'll deal with that differently. - - // Default value to detect error - int k = -1000; - - int dataoffset = 0; - int datalength = 0; - - for (int i = 0; i < numtags; i++) - { - int tag = readshort(endianess, reader); - int type = readshort(endianess, reader); - int count = readlong(endianess, reader); - int val = readlong(endianess, reader); // See note - - // Note, we treated that value as a long. The value always occupies 4 bytes - // But it might only use the first byte or two. Depending on endianess we might - // need to correct. - // Note we ignore all other types, they are of little interest for PDFs/CCITT Fax - if (endianess == 'M') - { - switch (type) - { - case 1: - { - val = val >> 24; - break; // byte value - } - case 3: - { - val = val >> 16; - break; // short value - } - case 4: - { - break; // long value - } - default: - { - // do nothing - } - } - } - switch (tag) - { - case 256: - { - params.setInt(COSName.COLUMNS, val); - break; - } - case 257: - { - params.setInt(COSName.ROWS, val); - break; - } - case 259: - { - if (val == 4) - { - k = -1; - } - if (val == 3) - { - k = 0; - } - break; // T6/T4 Compression - } - case 262: - { - if (val == 1) - { - params.setBoolean(COSName.BLACK_IS_1, true); - } - break; - } - case 273: - { - if (count == 1) - { - dataoffset = val; - } - break; - } - case 279: - { - if (count == 1) - { - datalength = val; - } - break; - } - case 292: - { - if ((val & 1) != 0) - { - k = 50; // T4 2D - arbitary positive K value - } - // http://www.awaresystems.be/imaging/tiff/tifftags/t4options.html - if ((val & 4) != 0) - { - throw new IOException("CCITT Group 3 'uncompressed mode' is not supported"); - } - if ((val & 2) != 0) - { - throw new IOException("CCITT Group 3 'fill bits before EOL' is not supported"); - } - break; - } - case 324: - { - if (count == 1) - { - dataoffset = val; - } - break; - } - case 325: - { - if (count == 1) - { - datalength = val; - } - break; - } - default: - { - // do nothing - } - } - } - - if (k == -1000) - { - throw new IOException("First image in tiff is not CCITT T4 or T6 compressed"); - } - if (dataoffset == 0) - { - throw new IOException("First image in tiff is not a single tile/strip"); - } - - params.setInt(COSName.K, k); - - reader.seek(dataoffset); - - byte[] buf = new byte[8192]; - int amountRead; - while ((amountRead = reader.read(buf, 0, Math.min(8192, datalength))) > 0) - { - datalength -= amountRead; - os.write(buf, 0, amountRead); - } - - } - finally - { - os.close(); - } - } - - private static int readshort(char endianess, RandomAccess raf) throws IOException - { - if (endianess == 'I') - { - return raf.read() | (raf.read() << 8); - } - return (raf.read() << 8) | raf.read(); - } - - private static int readlong(char endianess, RandomAccess raf) throws IOException - { - if (endianess == 'I') - { - return raf.read() | (raf.read() << 8) | (raf.read() << 16) | (raf.read() << 24); - } - return (raf.read() << 24) | (raf.read() << 16) | (raf.read() << 8) | raf.read(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.image; + +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import javax.imageio.stream.MemoryCacheImageOutputStream; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.filter.Filter; +import org.apache.pdfbox.filter.FilterFactory; +import org.apache.pdfbox.io.RandomAccess; +import org.apache.pdfbox.io.RandomAccessBuffer; +import org.apache.pdfbox.io.RandomAccessFile; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; + +/** + * Factory for creating a PDImageXObject containing a CCITT Fax compressed TIFF image. + * + * @author Ben Litchfield + * @author Paul King + */ +public final class CCITTFactory +{ + private CCITTFactory() + { + } + + /** + * Creates a new CCITT group 4 (T6) compressed image XObject from a b/w BufferedImage. This + * compression technique usually results in smaller images than those produced by {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) + * }. + * + * @param document the document to create the image as part of. + * @param image the image. + * @return a new image XObject. + * @throws IOException if there is an error creating the image. + * @throws IllegalArgumentException if the BufferedImage is not a b/w image. + */ + public static PDImageXObject createFromImage(PDDocument document, BufferedImage image) + throws IOException + { + if (image.getType() != BufferedImage.TYPE_BYTE_BINARY && image.getColorModel().getPixelSize() != 1) + { + throw new IllegalArgumentException("Only 1-bit b/w images supported"); + } + + int height = image.getHeight(); + int width = image.getWidth(); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos); + + for (int y = 0; y < height; ++y) + { + for (int x = 0; x < width; ++x) + { + // flip bit to avoid having to set /BlackIs1 + mcios.writeBits(~(image.getRGB(x, y) & 1), 1); + } + if (mcios.getBitOffset() != 0) + { + mcios.writeBits(0, 8 - mcios.getBitOffset()); + } + } + mcios.flush(); + mcios.close(); + + return prepareImageXObject(document, bos.toByteArray(), width, height, PDDeviceGray.INSTANCE); + } + + /** + * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file stored + * in a byte array. Only single-strip CCITT T4 or T6 compressed TIFF files are supported. If + * you're not sure what TIFF files you have, use + * {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) } + * or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) } + * instead. + * + * @param document the document to create the image as part of. + * @param byteArray the TIFF file in a byte array which contains a suitable CCITT compressed + * image + * @return a new Image XObject + * @throws IOException if there is an error reading the TIFF data. + */ + public static PDImageXObject createFromByteArray(PDDocument document, byte[] byteArray) + throws IOException + { + return createFromByteArray(document, byteArray, 0); + } + + /** + * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file stored + * in a byte array. Only single-strip CCITT T4 or T6 compressed TIFF files are supported. If + * you're not sure what TIFF files you have, use + * {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) } + * or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) } + * instead. + * + * @param document the document to create the image as part of. + * @param byteArray the TIFF file in a byte array which contains a suitable CCITT compressed + * image + * @param number TIFF image number, starting from 0 + * @return a new Image XObject + * @throws IOException if there is an error reading the TIFF data. + */ + public static PDImageXObject createFromByteArray(PDDocument document, byte[] byteArray, int number) + throws IOException + { + RandomAccess raf = new RandomAccessBuffer(byteArray); + try + { + return createFromRandomAccessImpl(document, raf, number); + } + finally + { + raf.close(); + } + } + + private static PDImageXObject prepareImageXObject(PDDocument document, + byte[] byteArray, int width, int height, + PDColorSpace initColorSpace) throws IOException + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + Filter filter = FilterFactory.INSTANCE.getFilter(COSName.CCITTFAX_DECODE); + COSDictionary dict = new COSDictionary(); + dict.setInt(COSName.COLUMNS, width); + dict.setInt(COSName.ROWS, height); + filter.encode(new ByteArrayInputStream(byteArray), baos, dict, 0); + + ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(baos.toByteArray()); + PDImageXObject image = new PDImageXObject(document, encodedByteStream, COSName.CCITTFAX_DECODE, + width, height, 1, initColorSpace); + dict.setInt(COSName.K, -1); + image.getCOSObject().setItem(COSName.DECODE_PARMS, dict); + return image; + } + + /** + * Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file. + * + * @param document the document to create the image as part of. + * @param reader the random access TIFF file which contains a suitable CCITT + * compressed image + * @return a new image XObject + * @throws IOException if there is an error reading the TIFF data. + * + * @deprecated Use {@link #createFromFile(PDDocument, File)} instead. + */ + @Deprecated + public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader) + throws IOException + { + return createFromRandomAccessImpl(document, reader, 0); + } + + /** + * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file. + * + * @param document the document to create the image as part of. + * @param reader the random access TIFF file which contains a suitable CCITT + * compressed image + * @param number TIFF image number, starting from 0 + * @return a new image XObject, or null if no such page + * @throws IOException if there is an error reading the TIFF data. + * + * @deprecated Use {@link #createFromFile(PDDocument, File, int)} instead. + */ + @Deprecated + public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader, + int number) throws IOException + { + return createFromRandomAccessImpl(document, reader, number); + } + + /** + * Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file. Only + * single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF + * files you have, use + * {@link LosslessFactory#createFromImage(org.apache.pdfbox.pdmodel.PDDocument, java.awt.image.BufferedImage)} + * or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) } + * instead. + * + * @param document the document to create the image as part of. + * @param file the TIFF file which contains a suitable CCITT compressed image + * @return a new Image XObject + * @throws IOException if there is an error reading the TIFF data. + */ + public static PDImageXObject createFromFile(PDDocument document, File file) + throws IOException + { + return createFromFile(document, file, 0); + } + + /** + * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file. Only + * single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF + * files you have, use + * {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) } + * or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) } + * instead. + * + * @param document the document to create the image as part of. + * @param file the TIFF file which contains a suitable CCITT compressed image + * @param number TIFF image number, starting from 0 + * @return a new Image XObject + * @throws IOException if there is an error reading the TIFF data. + */ + public static PDImageXObject createFromFile(PDDocument document, File file, int number) + throws IOException + { + RandomAccessFile raf = new RandomAccessFile(file, "r"); + try + { + return createFromRandomAccessImpl(document, raf, number); + } + finally + { + raf.close(); + } + } + + /** + * Creates a new CCITT Fax compressed image XObject from a TIFF file. + * + * @param document the document to create the image as part of. + * @param reader the random access TIFF file which contains a suitable CCITT + * compressed image + * @param number TIFF image number, starting from 0 + * @return a new Image XObject, or null if no such page + * @throws IOException if there is an error reading the TIFF data. + */ + private static PDImageXObject createFromRandomAccessImpl(PDDocument document, + RandomAccess reader, + int number) throws IOException + { + COSDictionary decodeParms = new COSDictionary(); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + extractFromTiff(reader, bos, decodeParms, number); + if (bos.size() == 0) + { + return null; + } + ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(bos.toByteArray()); + PDImageXObject pdImage = new PDImageXObject(document, + encodedByteStream, + COSName.CCITTFAX_DECODE, + decodeParms.getInt(COSName.COLUMNS), + decodeParms.getInt(COSName.ROWS), + 1, + PDDeviceGray.INSTANCE); + + COSDictionary dict = pdImage.getCOSObject(); + dict.setItem(COSName.DECODE_PARMS, decodeParms); + return pdImage; + } + + // extracts the CCITT stream from the TIFF file + private static void extractFromTiff(RandomAccess reader, OutputStream os, + COSDictionary params, int number) throws IOException + { + try + { + // First check the basic tiff header + reader.seek(0); + char endianess = (char) reader.read(); + if ((char) reader.read() != endianess) + { + throw new IOException("Not a valid tiff file"); + } + // ensure that endianess is either M or I + if (endianess != 'M' && endianess != 'I') + { + throw new IOException("Not a valid tiff file"); + } + int magicNumber = readshort(endianess, reader); + if (magicNumber != 42) + { + throw new IOException("Not a valid tiff file"); + } + + // Relocate to the first set of tags + int address = readlong(endianess, reader); + reader.seek(address); + + // If some higher page number is required, skip this page's tags, + // then read the next page's address + for (int i = 0; i < number; i++) + { + int numtags = readshort(endianess, reader); + if (numtags > 50) + { + throw new IOException("Not a valid tiff file"); + } + reader.seek(address + 2 + numtags * 12); + address = readlong(endianess, reader); + if (address == 0) + { + return; + } + reader.seek(address); + } + + int numtags = readshort(endianess, reader); + + // The number 50 is somewhat arbitrary, it just stops us load up junk from somewhere + // and tramping on + if (numtags > 50) + { + throw new IOException("Not a valid tiff file"); + } + + // Loop through the tags, some will convert to items in the params dictionary + // Other point us to where to find the data stream. + // The only param which might change as a result of other TIFF tags is K, so + // we'll deal with that differently. + + // Default value to detect error + int k = -1000; + + int dataoffset = 0; + int datalength = 0; + + for (int i = 0; i < numtags; i++) + { + int tag = readshort(endianess, reader); + int type = readshort(endianess, reader); + int count = readlong(endianess, reader); + int val; + // Note that when the type is shorter than 4 bytes, the rest can be garbage + // and must be ignored. E.g. short (2 bytes) from "01 00 38 32" (little endian) + // is 1, not 842530817 (seen in a real-life TIFF image). + switch (type) + { + case 1: // byte value + val = reader.read(); + reader.read(); + reader.read(); + reader.read(); + break; + case 3: // short value + val = readshort(endianess, reader); + reader.read(); + reader.read(); + break; + default: // long and other types + val = readlong(endianess, reader); + break; + } + switch (tag) + { + case 256: + { + params.setInt(COSName.COLUMNS, val); + break; + } + case 257: + { + params.setInt(COSName.ROWS, val); + break; + } + case 259: + { + if (val == 4) + { + k = -1; + } + if (val == 3) + { + k = 0; + } + break; // T6/T4 Compression + } + case 262: + { + if (val == 1) + { + params.setBoolean(COSName.BLACK_IS_1, true); + } + break; + } + case 266: + { + if (val != 1) + { + throw new IOException("FillOrder " + val + " is not supported"); + } + break; + } + case 273: + { + if (count == 1) + { + dataoffset = val; + } + break; + } + case 274: + { + // http://www.awaresystems.be/imaging/tiff/tifftags/orientation.html + if (val != 1) + { + throw new IOException("Orientation " + val + " is not supported"); + } + break; + } + case 279: + { + if (count == 1) + { + datalength = val; + } + break; + } + case 292: + { + if ((val & 1) != 0) + { + // T4 2D - arbitrary positive K value + k = 50; + } + // http://www.awaresystems.be/imaging/tiff/tifftags/t4options.html + if ((val & 4) != 0) + { + throw new IOException("CCITT Group 3 'uncompressed mode' is not supported"); + } + if ((val & 2) != 0) + { + throw new IOException("CCITT Group 3 'fill bits before EOL' is not supported"); + } + break; + } + case 324: + { + if (count == 1) + { + dataoffset = val; + } + break; + } + case 325: + { + if (count == 1) + { + datalength = val; + } + break; + } + default: + { + // do nothing + } + } + } + + if (k == -1000) + { + throw new IOException("First image in tiff is not CCITT T4 or T6 compressed"); + } + if (dataoffset == 0) + { + throw new IOException("First image in tiff is not a single tile/strip"); + } + + params.setInt(COSName.K, k); + + reader.seek(dataoffset); + + byte[] buf = new byte[8192]; + int amountRead; + while ((amountRead = reader.read(buf, 0, Math.min(8192, datalength))) > 0) + { + datalength -= amountRead; + os.write(buf, 0, amountRead); + } + + } + finally + { + os.close(); + } + } + + private static int readshort(char endianess, RandomAccess raf) throws IOException + { + if (endianess == 'I') + { + return raf.read() | (raf.read() << 8); + } + return (raf.read() << 8) | raf.read(); + } + + private static int readlong(char endianess, RandomAccess raf) throws IOException + { + if (endianess == 'I') + { + return raf.read() | (raf.read() << 8) | (raf.read() << 16) | (raf.read() << 24); + } + return (raf.read() << 24) | (raf.read() << 16) | (raf.read() << 8) | raf.read(); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/JPEGFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/JPEGFactory.java index 5c592bb0d59..24887ecc843 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/JPEGFactory.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/JPEGFactory.java @@ -21,6 +21,7 @@ import java.awt.color.ICC_ColorSpace; import java.awt.image.BufferedImage; import java.awt.image.ColorConvertOp; +import java.awt.image.Raster; import java.awt.image.WritableRaster; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -32,12 +33,20 @@ import javax.imageio.ImageIO; import javax.imageio.ImageReader; import javax.imageio.ImageTypeSpecifier; +import javax.imageio.ImageWriteParam; import javax.imageio.ImageWriter; import javax.imageio.metadata.IIOMetadata; import javax.imageio.plugins.jpeg.JPEGImageWriteParam; import javax.imageio.stream.ImageInputStream; import javax.imageio.stream.ImageOutputStream; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.filter.MissingImageReaderException; import org.apache.pdfbox.io.IOUtils; @@ -54,6 +63,8 @@ */ public final class JPEGFactory { + private static final Log LOG = LogFactory.getLog(JPEGFactory.class); + private JPEGFactory() { } @@ -70,37 +81,80 @@ private JPEGFactory() */ public static PDImageXObject createFromStream(PDDocument document, InputStream stream) throws IOException + { + return createFromByteArray(document, IOUtils.toByteArray(stream)); + } + + /** + * Creates a new JPEG Image XObject from a byte array containing JPEG data. + * + * @param document the document where the image will be created + * @param byteArray bytes of JPEG image + * @return a new Image XObject + * + * @throws IOException if the input stream cannot be read + */ + public static PDImageXObject createFromByteArray(PDDocument document, byte[] byteArray) + throws IOException { // copy stream - ByteArrayInputStream byteStream = new ByteArrayInputStream(IOUtils.toByteArray(stream)); + ByteArrayInputStream byteStream = new ByteArrayInputStream(byteArray); - // read image - BufferedImage awtImage = readJPEG(byteStream); - byteStream.reset(); + Dimensions meta = retrieveDimensions(byteStream); + + PDColorSpace colorSpace; + switch (meta.numComponents) + { + case 1: + colorSpace = PDDeviceGray.INSTANCE; + break; + case 3: + colorSpace = PDDeviceRGB.INSTANCE; + break; + case 4: + colorSpace = PDDeviceCMYK.INSTANCE; + break; + default: + throw new UnsupportedOperationException("number of data elements not supported: " + + meta.numComponents); + } - // create Image XObject from stream + // create PDImageXObject from stream PDImageXObject pdImage = new PDImageXObject(document, byteStream, - COSName.DCT_DECODE, awtImage.getWidth(), awtImage.getHeight(), - awtImage.getColorModel().getComponentSize(0), - getColorSpaceFromAWT(awtImage)); + COSName.DCT_DECODE, meta.width, meta.height, 8, colorSpace); - // no alpha - if (awtImage.getColorModel().hasAlpha()) + if (colorSpace instanceof PDDeviceCMYK) { - throw new UnsupportedOperationException("alpha channel not implemented"); + COSArray decode = new COSArray(); + decode.add(COSInteger.ONE); + decode.add(COSInteger.ZERO); + decode.add(COSInteger.ONE); + decode.add(COSInteger.ZERO); + decode.add(COSInteger.ONE); + decode.add(COSInteger.ZERO); + decode.add(COSInteger.ONE); + decode.add(COSInteger.ZERO); + pdImage.setDecode(decode); } return pdImage; } - private static BufferedImage readJPEG(InputStream stream) throws IOException + private static class Dimensions + { + private int width; + private int height; + private int numComponents; + } + + private static Dimensions retrieveDimensions(ByteArrayInputStream stream) throws IOException { // find suitable image reader - Iterator readers = ImageIO.getImageReadersByFormatName("JPEG"); + Iterator readers = ImageIO.getImageReadersByFormatName("JPEG"); ImageReader reader = null; while (readers.hasNext()) { - reader = (ImageReader) readers.next(); + reader = readers.next(); if (reader.canReadRaster()) { break; @@ -109,8 +163,8 @@ private static BufferedImage readJPEG(InputStream stream) throws IOException if (reader == null) { - throw new MissingImageReaderException("Cannot read JPEG image: " + - "a suitable JAI I/O image filter is not installed"); + throw new MissingImageReaderException( + "Cannot read JPEG image: a suitable JAI I/O image filter is not installed"); } ImageInputStream iis = null; @@ -119,8 +173,29 @@ private static BufferedImage readJPEG(InputStream stream) throws IOException iis = ImageIO.createImageInputStream(stream); reader.setInput(iis); + Dimensions meta = new Dimensions(); + meta.width = reader.getWidth(0); + meta.height = reader.getHeight(0); + // PDFBOX-4691: get from image metadata (faster because no decoding) + try + { + meta.numComponents = getNumComponentsFromImageMetadata(reader); + if (meta.numComponents != 0) + { + return meta; + } + LOG.warn("No image metadata, will decode image and use raster size"); + } + catch (IOException ex) + { + LOG.warn("Error reading image metadata, will decode image and use raster size", ex); + } + + // Old method: get from raster (slower) ImageIO.setUseCache(false); - return reader.read(0); + Raster raster = reader.readRaster(0, null); + meta.numComponents = raster.getNumDataElements(); + return meta; } finally { @@ -128,14 +203,55 @@ private static BufferedImage readJPEG(InputStream stream) throws IOException { iis.close(); } + stream.reset(); reader.dispose(); } } + private static int getNumComponentsFromImageMetadata(ImageReader reader) throws IOException + { + IIOMetadata imageMetadata = reader.getImageMetadata(0); + if (imageMetadata == null) + { + return 0; + } + Element root = (Element) imageMetadata.getAsTree("javax_imageio_jpeg_image_1.0"); + if (root == null) + { + return 0; + } + + try + { + XPath xpath = XPathFactory.newInstance().newXPath(); + String numFrameComponents = xpath.evaluate("markerSequence/sof/@numFrameComponents", root); + if (numFrameComponents.isEmpty()) + { + return 0; + } + return Integer.parseInt(numFrameComponents); + } + catch (NumberFormatException ex) + { + LOG.warn(ex.getMessage(), ex); + return 0; + } + catch (XPathExpressionException ex) + { + LOG.warn(ex.getMessage(), ex); + return 0; + } + } + /** - * Creates a new JPEG Image XObject from a Buffered Image. + * Creates a new JPEG PDImageXObject from a BufferedImage. + *

+ * Do not read a JPEG image from a stream/file and call this method; you'll get more speed and + * quality by calling {@link #createFromStream(org.apache.pdfbox.pdmodel.PDDocument, + * java.io.InputStream) createFromStream()} instead. + * * @param document the document where the image will be created - * @param image the buffered image to embed + * @param image the BufferedImage to embed * @return a new Image XObject * @throws IOException if the JPEG data cannot be written */ @@ -146,11 +262,18 @@ public static PDImageXObject createFromImage(PDDocument document, BufferedImage } /** - * Creates a new JPEG Image XObject from a Buffered Image and a given quality. - * The image will be created at 72 DPI. + * Creates a new JPEG PDImageXObject from a BufferedImage and a given quality. + *

+ * Do not read a JPEG image from a stream/file and call this method; you'll get more speed and + * quality by calling {@link #createFromStream(org.apache.pdfbox.pdmodel.PDDocument, + * java.io.InputStream) createFromStream()} instead. + * + * The image will be created with a dpi value of 72 to be stored in metadata. * @param document the document where the image will be created - * @param image the buffered image to embed - * @param quality the desired JPEG compression quality + * @param image the BufferedImage to embed + * @param quality The desired JPEG compression quality; between 0 (best + * compression) and 1 (best image quality). See + * {@link ImageWriteParam#setCompressionQuality(float)} for more details. * @return a new Image XObject * @throws IOException if the JPEG data cannot be written */ @@ -161,11 +284,19 @@ public static PDImageXObject createFromImage(PDDocument document, BufferedImage } /** - * Creates a new JPEG Image XObject from a Buffered Image, a given quality and DPI. + * Creates a new JPEG Image XObject from a BufferedImage, a given quality and dpi metadata. + *

+ * Do not read a JPEG image from a stream/file and call this method; you'll get more speed and + * quality by calling {@link #createFromStream(org.apache.pdfbox.pdmodel.PDDocument, + * java.io.InputStream) createFromStream()} instead. + * * @param document the document where the image will be created - * @param image the buffered image to embed - * @param quality the desired JPEG compression quality - * @param dpi the desired DPI (resolution) of the JPEG + * @param image the BufferedImage to embed + * @param quality The desired JPEG compression quality; between 0 (best + * compression) and 1 (best image quality). See + * {@link ImageWriteParam#setCompressionQuality(float)} for more details. + * @param dpi the desired dpi (resolution) value of the JPEG to be stored in metadata. This + * value has no influence on image content or size. * @return a new Image XObject * @throws IOException if the JPEG data cannot be written */ @@ -176,7 +307,7 @@ public static PDImageXObject createFromImage(PDDocument document, BufferedImage } // returns the alpha channel of an image - private static BufferedImage getAlphaImage(BufferedImage image) throws IOException + private static BufferedImage getAlphaImage(BufferedImage image) { if (!image.getColorModel().hasAlpha()) { @@ -199,7 +330,7 @@ private static BufferedImage getAlphaImage(BufferedImage image) throws IOExcepti return alphaImage; } - // Creates an Image XObject from a Buffered Image using JAI Image I/O + // Creates an Image XObject from a BufferedImage using JAI Image I/O private static PDImageXObject createJPEG(PDDocument document, BufferedImage image, float quality, int dpi) throws IOException { @@ -214,7 +345,7 @@ private static PDImageXObject createJPEG(PDDocument document, BufferedImage imag PDImageXObject pdImage = new PDImageXObject(document, byteStream, COSName.DCT_DECODE, awtColorImage.getWidth(), awtColorImage.getHeight(), - awtColorImage.getColorModel().getComponentSize(0), + 8, getColorSpaceFromAWT(awtColorImage)); // alpha -> soft mask @@ -227,6 +358,26 @@ private static PDImageXObject createJPEG(PDDocument document, BufferedImage imag return pdImage; } + private static ImageWriter getJPEGImageWriter() throws IOException + { + Iterator writers = ImageIO.getImageWritersBySuffix("jpeg"); + while (writers.hasNext()) + { + ImageWriter writer = writers.next(); + if (writer == null) + { + continue; + } + // PDFBOX-3566: avoid CLibJPEGImageWriter, which is not a JPEGImageWriteParam + if (writer.getDefaultWriteParam() instanceof JPEGImageWriteParam) + { + return writer; + } + writer.dispose(); + } + throw new IOException("No ImageWriter found for JPEG format"); + } + private static void encodeImageToJPEGStream(BufferedImage image, float quality, int dpi, OutputStream out) throws IOException { @@ -236,13 +387,13 @@ private static void encodeImageToJPEGStream(BufferedImage image, float quality, try { // find JAI writer - imageWriter = ImageIO.getImageWritersBySuffix("jpeg").next(); + imageWriter = getJPEGImageWriter(); ios = ImageIO.createImageOutputStream(out); imageWriter.setOutput(ios); // add compression - JPEGImageWriteParam jpegParam = (JPEGImageWriteParam)imageWriter.getDefaultWriteParam(); - jpegParam.setCompressionMode(JPEGImageWriteParam.MODE_EXPLICIT); + ImageWriteParam jpegParam = imageWriter.getDefaultWriteParam(); + jpegParam.setCompressionMode(ImageWriteParam.MODE_EXPLICIT); jpegParam.setCompressionQuality(quality); // add metadata diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactory.java index b2a60a73eb5..fb87aecff5e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactory.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactory.java @@ -15,23 +15,33 @@ */ package org.apache.pdfbox.pdmodel.graphics.image; -import java.awt.Color; import java.awt.Transparency; +import java.awt.color.ColorSpace; +import java.awt.color.ICC_ColorSpace; +import java.awt.color.ICC_Profile; import java.awt.image.BufferedImage; -import java.awt.image.WritableRaster; +import java.awt.image.DataBuffer; +import java.awt.image.Raster; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; import javax.imageio.stream.MemoryCacheImageOutputStream; import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.filter.Filter; import org.apache.pdfbox.filter.FilterFactory; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceColorSpace; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.graphics.color.PDICCBased; /** * Factory for creating a PDImageXObject containing a lossless compressed image. @@ -40,200 +50,189 @@ */ public final class LosslessFactory { + /** + * Internal, only for benchmark purpose + */ + static boolean usePredictorEncoder = true; + private LosslessFactory() { } - + /** - * Creates a new lossless encoded Image XObject from a Buffered Image. + * Creates a new lossless encoded image XObject from a BufferedImage. + *

+ * New for advanced users from 2.0.12 on:
+ * If you created your image with a non standard ICC colorspace, it will be + * preserved. (If you load images in java using ImageIO then no need to read + * this segment) However a new colorspace will be created for each image. So + * if you create a PDF with several such images, consider replacing the + * colorspace with a common object to save space. This is done with + * {@link PDImageXObject#getColorSpace()} and + * {@link PDImageXObject#setColorSpace(org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace) PDImageXObject.setColorSpace()} * * @param document the document where the image will be created - * @param image the buffered image to embed - * @return a new Image XObject + * @param image the BufferedImage to embed + * @return a new image XObject * @throws IOException if something goes wrong */ public static PDImageXObject createFromImage(PDDocument document, BufferedImage image) throws IOException { - int bpc; - PDDeviceColorSpace deviceColorSpace; - - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - int height = image.getHeight(); - int width = image.getWidth(); - - if ((image.getType() == BufferedImage.TYPE_BYTE_GRAY && image.getColorModel().getPixelSize() <= 8) - || (image.getType() == BufferedImage.TYPE_BYTE_BINARY && image.getColorModel().getPixelSize() == 1)) + if (isGrayImage(image)) { - MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos); - - // grayscale images need one color per sample - bpc = image.getColorModel().getPixelSize(); - deviceColorSpace = PDDeviceGray.INSTANCE; - for (int y = 0; y < height; ++y) - { - for (int x = 0; x < width; ++x) - { - mcios.writeBits(image.getRGB(x, y) & 0xFF, bpc); - } - while (mcios.getBitOffset() != 0) - { - mcios.writeBit(0); - } - } - mcios.flush(); - mcios.close(); + return createFromGrayImage(image, document); } - else + + // We try to encode the image with predictor + if (usePredictorEncoder) { - // RGB - bpc = 8; - deviceColorSpace = PDDeviceRGB.INSTANCE; - for (int y = 0; y < height; ++y) + PDImageXObject pdImageXObject = new PredictorEncoder(document, image).encode(); + if (pdImageXObject != null) { - for (int x = 0; x < width; ++x) + if (pdImageXObject.getColorSpace() == PDDeviceRGB.INSTANCE && + pdImageXObject.getBitsPerComponent() < 16 && + image.getWidth() * image.getHeight() <= 50 * 50) { - Color color = new Color(image.getRGB(x, y)); - bos.write(color.getRed()); - bos.write(color.getGreen()); - bos.write(color.getBlue()); + // also create classic compressed image, compare sizes + PDImageXObject pdImageXObjectClassic = createFromRGBImage(image, document); + if (pdImageXObjectClassic.getCOSObject().getLength() < + pdImageXObject.getCOSObject().getLength()) + { + pdImageXObject.getCOSObject().close(); + return pdImageXObjectClassic; + } + else + { + pdImageXObjectClassic.getCOSObject().close(); + } } + return pdImageXObject; } } - PDImageXObject pdImage = prepareImageXObject(document, bos.toByteArray(), - image.getWidth(), image.getHeight(), bpc, deviceColorSpace); - - // alpha -> soft mask - PDImage xAlpha = createAlphaFromARGBImage(document, image); - if (xAlpha != null) - { - pdImage.getCOSObject().setItem(COSName.SMASK, xAlpha); - } - - return pdImage; + // Fallback: We export the image as 8-bit sRGB and might lose color information + return createFromRGBImage(image, document); } - /** - * Creates a grayscale Flate encoded PDImageXObject from the alpha channel - * of an image. - * - * @param document the document where the image will be created. - * @param image an ARGB image. - * - * @return the alpha channel of an image as a grayscale image. - * - * @throws IOException if something goes wrong - */ - private static PDImageXObject createAlphaFromARGBImage(PDDocument document, BufferedImage image) - throws IOException + private static boolean isGrayImage(BufferedImage image) { - // this implementation makes the assumption that the raster values can be used 1:1 for - // the stream. - // Sadly the type of the databuffer is usually TYPE_INT and not TYPE_BYTE so we can't just - // save it directly - if (!image.getColorModel().hasAlpha()) + if (image.getTransparency() != Transparency.OPAQUE) { - return null; + return false; } - - // extract the alpha information - WritableRaster alphaRaster = image.getAlphaRaster(); - if (alphaRaster == null) + if (image.getType() == BufferedImage.TYPE_BYTE_GRAY && image.getColorModel().getPixelSize() <= 8) { - // happens sometimes (PDFBOX-2654) despite colormodel claiming to have alpha - return createAlphaFromARGBImage2(document, image); + return true; } + if (image.getType() == BufferedImage.TYPE_BYTE_BINARY && image.getColorModel().getPixelSize() == 1) + { + return true; + } + return false; + } + - int[] pixels = alphaRaster.getPixels(0, 0, - alphaRaster.getWidth(), - alphaRaster.getHeight(), - (int[]) null); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - int bpc; - if (image.getTransparency() == Transparency.BITMASK) + // grayscale images need one color per sample + private static PDImageXObject createFromGrayImage(BufferedImage image, PDDocument document) + throws IOException + { + int height = image.getHeight(); + int width = image.getWidth(); + int[] rgbLineBuffer = new int[width]; + int bpc = image.getColorModel().getPixelSize(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(((width*bpc/8)+(width*bpc%8 != 0 ? 1:0))*height); + MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(baos); + for (int y = 0; y < height; ++y) { - bpc = 1; - MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos); - int width = alphaRaster.getWidth(); - int p = 0; - for (int pixel : pixels) + for (int pixel : image.getRGB(0, y, width, 1, rgbLineBuffer, 0, width)) { - mcios.writeBit(pixel); - ++p; - if (p % width == 0) - { - while (mcios.getBitOffset() != 0) - { - mcios.writeBit(0); - } - } + mcios.writeBits(pixel & 0xFF, bpc); } - mcios.flush(); - mcios.close(); - } - else - { - bpc = 8; - for (int pixel : pixels) + + int bitOffset = mcios.getBitOffset(); + if (bitOffset != 0) { - bos.write(pixel); + mcios.writeBits(0, 8 - bitOffset); } } - - PDImageXObject pdImage = prepareImageXObject(document, bos.toByteArray(), + mcios.flush(); + mcios.close(); + return prepareImageXObject(document, baos.toByteArray(), image.getWidth(), image.getHeight(), bpc, PDDeviceGray.INSTANCE); - - return pdImage; } - // create alpha image the hard way: get the alpha through getRGB() - private static PDImageXObject createAlphaFromARGBImage2(PDDocument document, BufferedImage bi) - throws IOException + private static PDImageXObject createFromRGBImage(BufferedImage image, PDDocument document) throws IOException { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - int bpc; - if (bi.getTransparency() == Transparency.BITMASK) + int height = image.getHeight(); + int width = image.getWidth(); + int[] rgbLineBuffer = new int[width]; + int bpc = 8; + PDDeviceColorSpace deviceColorSpace = PDDeviceRGB.INSTANCE; + byte[] imageData = new byte[width * height * 3]; + int byteIdx = 0; + int alphaByteIdx = 0; + int alphaBitPos = 7; + int transparency = image.getTransparency(); + int apbc = transparency == Transparency.BITMASK ? 1 : 8; + byte[] alphaImageData; + if (transparency != Transparency.OPAQUE) { - bpc = 1; - MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos); - for (int y = 0, h = bi.getHeight(); y < h; ++y) - { - for (int x = 0, w = bi.getWidth(); x < w; ++x) - { - int alpha = bi.getRGB(x, y) >>> 24; - mcios.writeBit(alpha); - } - while (mcios.getBitOffset() != 0) - { - mcios.writeBit(0); - } - } - mcios.flush(); - mcios.close(); + alphaImageData = new byte[((width * apbc / 8) + (width * apbc % 8 != 0 ? 1 : 0)) * height]; } else { - bpc = 8; - for (int y = 0, h = bi.getHeight(); y < h; ++y) + alphaImageData = new byte[0]; + } + for (int y = 0; y < height; ++y) + { + for (int pixel : image.getRGB(0, y, width, 1, rgbLineBuffer, 0, width)) { - for (int x = 0, w = bi.getWidth(); x < w; ++x) + imageData[byteIdx++] = (byte) ((pixel >> 16) & 0xFF); + imageData[byteIdx++] = (byte) ((pixel >> 8) & 0xFF); + imageData[byteIdx++] = (byte) (pixel & 0xFF); + if (transparency != Transparency.OPAQUE) { - int alpha = bi.getRGB(x, y) >>> 24; - bos.write(alpha); + // we have the alpha right here, so no need to do it separately + // as done prior April 2018 + if (transparency == Transparency.BITMASK) + { + // write a bit + alphaImageData[alphaByteIdx] |= ((pixel >> 24) & 1) << alphaBitPos; + if (--alphaBitPos < 0) + { + alphaBitPos = 7; + ++alphaByteIdx; + } + } + else + { + // write a byte + alphaImageData[alphaByteIdx++] = (byte) ((pixel >> 24) & 0xFF); + } } } - } - - PDImageXObject pdImage = prepareImageXObject(document, bos.toByteArray(), - bi.getWidth(), bi.getHeight(), bpc, PDDeviceGray.INSTANCE); + // skip boundary if needed + if (transparency == Transparency.BITMASK && alphaBitPos != 7) + { + alphaBitPos = 7; + ++alphaByteIdx; + } + } + PDImageXObject pdImage = prepareImageXObject(document, imageData, + image.getWidth(), image.getHeight(), bpc, deviceColorSpace); + if (transparency != Transparency.OPAQUE) + { + PDImageXObject pdMask = prepareImageXObject(document, alphaImageData, + image.getWidth(), image.getHeight(), apbc, PDDeviceGray.INSTANCE); + pdImage.getCOSObject().setItem(COSName.SMASK, pdMask); + } return pdImage; - } + } /** - * Create a PDImageXObject while making a decision whether not to - * compress, use Flate filter only, or Flate and LZW filters. + * Create a PDImageXObject using the Flate filter. * * @param document The document. * @param byteArray array with data. @@ -244,11 +243,12 @@ private static PDImageXObject createAlphaFromARGBImage2(PDDocument document, Buf * @return the newly created PDImageXObject with the data compressed. * @throws IOException */ - private static PDImageXObject prepareImageXObject(PDDocument document, + static PDImageXObject prepareImageXObject(PDDocument document, byte [] byteArray, int width, int height, int bitsPerComponent, PDColorSpace initColorSpace) throws IOException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); + //pre-size the output stream to half of the input + ByteArrayOutputStream baos = new ByteArrayOutputStream(byteArray.length/2); Filter filter = FilterFactory.INSTANCE.getFilter(COSName.FLATE_DECODE); filter.encode(new ByteArrayInputStream(byteArray), baos, new COSDictionary(), 0); @@ -258,4 +258,465 @@ private static PDImageXObject prepareImageXObject(PDDocument document, width, height, bitsPerComponent, initColorSpace); } + private static class PredictorEncoder + { + private final PDDocument document; + private final BufferedImage image; + private final int componentsPerPixel; + private final int transferType; + private final int bytesPerComponent; + private final int bytesPerPixel; + + private final int height; + private final int width; + + private final byte[] dataRawRowNone; + private final byte[] dataRawRowSub; + private final byte[] dataRawRowUp; + private final byte[] dataRawRowAverage; + private final byte[] dataRawRowPaeth; + + final int imageType; + final boolean hasAlpha; + final byte[] alphaImageData; + + final byte[] aValues; + final byte[] cValues; + final byte[] bValues; + final byte[] xValues; + final byte[] tmpResultValues; + + /** + * Initialize the encoder and set all final fields + */ + PredictorEncoder(PDDocument document, BufferedImage image) + { + this.document = document; + this.image = image; + + // The raw count of components per pixel including optional alpha + this.componentsPerPixel = image.getColorModel().getNumComponents(); + this.transferType = image.getRaster().getTransferType(); + this.bytesPerComponent = (transferType == DataBuffer.TYPE_SHORT + || transferType == DataBuffer.TYPE_USHORT) ? 2 : 1; + + // Only the bytes we need in the output (excluding alpha) + this.bytesPerPixel = image.getColorModel().getNumColorComponents() * bytesPerComponent; + + this.height = image.getHeight(); + this.width = image.getWidth(); + this.imageType = image.getType(); + this.hasAlpha = image.getColorModel().getNumComponents() != image.getColorModel() + .getNumColorComponents(); + this.alphaImageData = hasAlpha ? new byte[width * height * bytesPerComponent] : null; + + // The rows have 1-byte encoding marker and width * BYTES_PER_PIXEL pixel-bytes + int dataRowByteCount = width * bytesPerPixel + 1; + this.dataRawRowNone = new byte[dataRowByteCount]; + this.dataRawRowSub = new byte[dataRowByteCount]; + this.dataRawRowUp = new byte[dataRowByteCount]; + this.dataRawRowAverage = new byte[dataRowByteCount]; + this.dataRawRowPaeth = new byte[dataRowByteCount]; + + // Write the encoding markers + dataRawRowNone[0] = 0; + dataRawRowSub[0] = 1; + dataRawRowUp[0] = 2; + dataRawRowAverage[0] = 3; + dataRawRowPaeth[0] = 4; + + // c | b + // ----- + // a | x + // + // x => current pixel + this.aValues = new byte[bytesPerPixel]; + this.cValues = new byte[bytesPerPixel]; + this.bValues = new byte[bytesPerPixel]; + this.xValues = new byte[bytesPerPixel]; + this.tmpResultValues = new byte[bytesPerPixel]; + } + + /** + * Tries to compress the image using a predictor. + * + * @return the image or null if it is not possible to encoded the image (e.g. not supported + * raster format etc.) + */ + PDImageXObject encode() throws IOException + { + Raster imageRaster = image.getRaster(); + final int elementsInRowPerPixel; + + // These variables store a row of the image each, the exact type depends + // on the image encoding. Can be a int[], short[] or byte[] + Object prevRow; + Object transferRow; + + switch (imageType) + { + case BufferedImage.TYPE_CUSTOM: + { + switch (imageRaster.getTransferType()) + { + case DataBuffer.TYPE_USHORT: + elementsInRowPerPixel = componentsPerPixel; + prevRow = new short[width * elementsInRowPerPixel]; + transferRow = new short[width * elementsInRowPerPixel]; + break; + case DataBuffer.TYPE_BYTE: + elementsInRowPerPixel = componentsPerPixel; + prevRow = new byte[width * elementsInRowPerPixel]; + transferRow = new byte[width * elementsInRowPerPixel]; + break; + default: + return null; + } + break; + } + + case BufferedImage.TYPE_3BYTE_BGR: + case BufferedImage.TYPE_4BYTE_ABGR: + { + elementsInRowPerPixel = componentsPerPixel; + prevRow = new byte[width * elementsInRowPerPixel]; + transferRow = new byte[width * elementsInRowPerPixel]; + break; + } + + case BufferedImage.TYPE_INT_BGR: + case BufferedImage.TYPE_INT_ARGB: + case BufferedImage.TYPE_INT_RGB: + { + elementsInRowPerPixel = 1; + prevRow = new int[width * elementsInRowPerPixel]; + transferRow = new int[width * elementsInRowPerPixel]; + break; + } + + default: + // We can not handle this unknown format + return null; + } + + final int elementsInTransferRow = width * elementsInRowPerPixel; + + // pre-size the output stream to half of the maximum size + ByteArrayOutputStream stream = new ByteArrayOutputStream( + height * width * bytesPerPixel / 2); + Deflater deflater = new Deflater(Filter.getCompressionLevel()); + DeflaterOutputStream zip = new DeflaterOutputStream(stream, deflater); + + int alphaPtr = 0; + + for (int rowNum = 0; rowNum < height; rowNum++) + { + imageRaster.getDataElements(0, rowNum, width, 1, transferRow); + + // We start to write at index one, as the predictor marker is in index zero + int writerPtr = 1; + Arrays.fill(aValues, (byte) 0); + Arrays.fill(cValues, (byte) 0); + + final byte[] transferRowByte; + final byte[] prevRowByte; + final int[] transferRowInt; + final int[] prevRowInt; + final short[] transferRowShort; + final short[] prevRowShort; + + if (transferRow instanceof byte[]) + { + transferRowByte = (byte[]) transferRow; + prevRowByte = (byte[]) prevRow; + transferRowInt = prevRowInt = null; + transferRowShort = prevRowShort = null; + } + else if (transferRow instanceof int[]) + { + transferRowInt = (int[]) transferRow; + prevRowInt = (int[]) prevRow; + transferRowShort = prevRowShort = null; + transferRowByte = prevRowByte = null; + } + else + { + // This must be short[] + transferRowShort = (short[]) transferRow; + prevRowShort = (short[]) prevRow; + transferRowInt = prevRowInt = null; + transferRowByte = prevRowByte = null; + } + + for (int indexInTransferRow = 0; indexInTransferRow < elementsInTransferRow; + indexInTransferRow += elementsInRowPerPixel, alphaPtr += bytesPerComponent) + { + // Copy the pixel values into the byte array + if (transferRowByte != null) + { + copyImageBytes(transferRowByte, indexInTransferRow, xValues, alphaImageData, + alphaPtr); + copyImageBytes(prevRowByte, indexInTransferRow, bValues, null, 0); + } + else if (transferRowInt != null) + { + copyIntToBytes(transferRowInt, indexInTransferRow, xValues, alphaImageData, + alphaPtr); + copyIntToBytes(prevRowInt, indexInTransferRow, bValues, null, 0); + } + else + { + // This must be short[] + copyShortsToBytes(transferRowShort, indexInTransferRow, xValues, alphaImageData, alphaPtr); + copyShortsToBytes(prevRowShort, indexInTransferRow, bValues, null, 0); + } + + // Encode the pixel values in the different encodings + int length = xValues.length; + for (int bytePtr = 0; bytePtr < length; bytePtr++) + { + int x = xValues[bytePtr] & 0xFF; + int a = aValues[bytePtr] & 0xFF; + int b = bValues[bytePtr] & 0xFF; + int c = cValues[bytePtr] & 0xFF; + dataRawRowNone[writerPtr] = (byte) x; + dataRawRowSub[writerPtr] = pngFilterSub(x, a); + dataRawRowUp[writerPtr] = pngFilterUp(x, b); + dataRawRowAverage[writerPtr] = pngFilterAverage(x, a, b); + dataRawRowPaeth[writerPtr] = pngFilterPaeth(x, a, b, c); + writerPtr++; + } + + // We shift the values into the prev / upper left values for the next pixel + System.arraycopy(xValues, 0, aValues, 0, bytesPerPixel); + System.arraycopy(bValues, 0, cValues, 0, bytesPerPixel); + } + + byte[] rowToWrite = chooseDataRowToWrite(); + + // Write and compress the row as long it is hot (CPU cache wise) + zip.write(rowToWrite, 0, rowToWrite.length); + + // We swap prev and transfer row, so that we have the prev row for the next row. + Object temp = prevRow; + prevRow = transferRow; + transferRow = temp; + } + zip.close(); + deflater.end(); + + return preparePredictorPDImage(stream, bytesPerComponent * 8); + } + + private void copyIntToBytes(int[] transferRow, int indexInTranferRow, byte[] targetValues, + byte[] alphaImageData, int alphaPtr) + { + int val = transferRow[indexInTranferRow]; + byte b0 = (byte) (val & 0xFF); + byte b1 = (byte) ((val >> 8) & 0xFF); + byte b2 = (byte) ((val >> 16) & 0xFF); + + switch (imageType) + { + case BufferedImage.TYPE_INT_BGR: + targetValues[0] = b0; + targetValues[1] = b1; + targetValues[2] = b2; + break; + case BufferedImage.TYPE_INT_ARGB: + targetValues[0] = b2; + targetValues[1] = b1; + targetValues[2] = b0; + if (alphaImageData != null) + { + byte b3 = (byte) ((val >> 24) & 0xFF); + alphaImageData[alphaPtr] = b3; + } + break; + case BufferedImage.TYPE_INT_RGB: + targetValues[0] = b2; + targetValues[1] = b1; + targetValues[2] = b0; + break; + default: + break; + } + } + + private void copyImageBytes(byte[] transferRow, int indexInTranferRow, byte[] targetValues, + byte[] alphaImageData, int alphaPtr) + { + System.arraycopy(transferRow, indexInTranferRow, targetValues, 0, targetValues.length); + if (alphaImageData != null) + { + alphaImageData[alphaPtr] = transferRow[indexInTranferRow + targetValues.length]; + } + } + + private static void copyShortsToBytes(short[] transferRow, int indexInTranferRow, + byte[] targetValues, byte[] alphaImageData, int alphaPtr) + { + int itr = indexInTranferRow; + for (int i = 0; i < targetValues.length; i += 2) + { + short val = transferRow[itr++]; + targetValues[i] = (byte) ((val >> 8) & 0xFF); + targetValues[i + 1] = (byte) (val & 0xFF); + } + if (alphaImageData != null) + { + short alpha = transferRow[itr]; + alphaImageData[alphaPtr] = (byte) ((alpha >> 8) & 0xFF); + alphaImageData[alphaPtr + 1] = (byte) (alpha & 0xFF); + } + } + + private PDImageXObject preparePredictorPDImage(ByteArrayOutputStream stream, + int bitsPerComponent) throws IOException + { + int h = image.getHeight(); + int w = image.getWidth(); + + ColorSpace srcCspace = image.getColorModel().getColorSpace(); + int srcCspaceType = srcCspace.getType(); + PDColorSpace pdColorSpace = srcCspaceType == ColorSpace.TYPE_CMYK + ? PDDeviceCMYK.INSTANCE + : (srcCspaceType == ColorSpace.TYPE_GRAY + ? PDDeviceGray.INSTANCE : PDDeviceRGB.INSTANCE); + + // Encode the image profile if the image has one + if (srcCspace instanceof ICC_ColorSpace) + { + ICC_Profile profile = ((ICC_ColorSpace) srcCspace).getProfile(); + // We only encode a color profile if it is not sRGB + if (profile != ICC_Profile.getInstance(ColorSpace.CS_sRGB)) + { + PDICCBased pdProfile = new PDICCBased(document); + OutputStream outputStream = pdProfile.getPDStream() + .createOutputStream(COSName.FLATE_DECODE); + outputStream.write(profile.getData()); + outputStream.close(); + pdProfile.getPDStream().getCOSObject().setInt(COSName.N, + srcCspace.getNumComponents()); + pdProfile.getPDStream().getCOSObject().setItem(COSName.ALTERNATE, + srcCspaceType == ColorSpace.TYPE_GRAY ? COSName.DEVICEGRAY + : (srcCspaceType == ColorSpace.TYPE_CMYK ? COSName.DEVICECMYK + : COSName.DEVICERGB)); + pdColorSpace = pdProfile; + } + } + + PDImageXObject imageXObject = new PDImageXObject(document, + new ByteArrayInputStream(stream.toByteArray()), COSName.FLATE_DECODE, w, + h, bitsPerComponent, pdColorSpace); + + COSDictionary decodeParms = new COSDictionary(); + decodeParms.setItem(COSName.BITS_PER_COMPONENT, COSInteger.get(bitsPerComponent)); + decodeParms.setItem(COSName.PREDICTOR, COSInteger.get(15)); + decodeParms.setItem(COSName.COLUMNS, COSInteger.get(w)); + decodeParms.setItem(COSName.COLORS, COSInteger.get(srcCspace.getNumComponents())); + imageXObject.getCOSObject().setItem(COSName.DECODE_PARMS, decodeParms); + + if (image.getTransparency() != Transparency.OPAQUE) + { + PDImageXObject pdMask = prepareImageXObject(document, alphaImageData, + image.getWidth(), image.getHeight(), 8 * bytesPerComponent, PDDeviceGray.INSTANCE); + imageXObject.getCOSObject().setItem(COSName.SMASK, pdMask); + } + return imageXObject; + } + + /** + * We look which row encoding is the "best" one, ie. has the lowest sum. We don't implement + * anything fancier to choose the right row encoding. This is just the recommend algorithm + * in the spec. The get the perfect encoding you would need to do a brute force check how + * all the different encoded rows compress in the zip stream together. You have would have + * to check 5*image-height permutations... + * + * @return the "best" row encoding of the row encodings + */ + private byte[] chooseDataRowToWrite() + { + byte[] rowToWrite = dataRawRowNone; + long estCompressSum = estCompressSum(dataRawRowNone); + long estCompressSumSub = estCompressSum(dataRawRowSub); + long estCompressSumUp = estCompressSum(dataRawRowUp); + long estCompressSumAvg = estCompressSum(dataRawRowAverage); + long estCompressSumPaeth = estCompressSum(dataRawRowPaeth); + if (estCompressSum > estCompressSumSub) + { + rowToWrite = dataRawRowSub; + estCompressSum = estCompressSumSub; + } + if (estCompressSum > estCompressSumUp) + { + rowToWrite = dataRawRowUp; + estCompressSum = estCompressSumUp; + } + if (estCompressSum > estCompressSumAvg) + { + rowToWrite = dataRawRowAverage; + estCompressSum = estCompressSumAvg; + } + if (estCompressSum > estCompressSumPaeth) + { + rowToWrite = dataRawRowPaeth; + } + return rowToWrite; + } + + /* + * PNG Filters, see https://www.w3.org/TR/PNG-Filters.html + */ + private static byte pngFilterSub(int x, int a) + { + return (byte) ((x & 0xFF) - (a & 0xFF)); + } + + private static byte pngFilterUp(int x, int b) + { + // Same as pngFilterSub, just called with the prior row + return pngFilterSub(x, b); + } + + private static byte pngFilterAverage(int x, int a, int b) + { + return (byte) (x - ((b + a) / 2)); + } + + private static byte pngFilterPaeth(int x, int a, int b, int c) + { + int p = a + b - c; + int pa = Math.abs(p - a); + int pb = Math.abs(p - b); + int pc = Math.abs(p - c); + final int pr; + if (pa <= pb && pa <= pc) + { + pr = a; + } + else if (pb <= pc) + { + pr = b; + } + else + { + pr = c; + } + + int r = x - pr; + return (byte) (r); + } + + private static long estCompressSum(byte[] dataRawRowSub) + { + long sum = 0; + for (byte aDataRawRowSub : dataRawRowSub) + { + // https://www.w3.org/TR/PNG-Encoders.html#E.Filter-selection + sum += Math.abs(aDataRawRowSub); + } + return sum; + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java index f407062b84b..66d9b8daaf1 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java @@ -1,156 +1,221 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.image; - -import java.awt.Paint; -import java.awt.image.BufferedImage; -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.pdmodel.common.COSObjectable; -import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; - -/** - * An image in a PDF document. - * - * @author John Hewson - */ -public interface PDImage extends COSObjectable -{ - /** - * Returns the content of this image as an AWT buffered image with an (A)RGB color space. - * The size of the returned image is the larger of the size of the image itself or its mask. - * @return content of this image as a buffered image. - * @throws IOException - */ - BufferedImage getImage() throws IOException; - - /** - * Returns an ARGB image filled with the given paint and using this image as a mask. - * @param paint the paint to fill the visible portions of the image with - * @return a masked image filled with the given paint - * @throws IOException if the image cannot be read - * @throws IllegalStateException if the image is not a stencil. - */ - BufferedImage getStencilImage(Paint paint) throws IOException; - - /** - * Returns an InputStream containing the image data, irrespective of whether this is an - * inline image or an image XObject. - * @return Decoded stream - * @throws IOException if the data could not be read. - */ - InputStream createInputStream() throws IOException; - - /** - * Returns an InputStream containing the image data, irrespective of whether this is an - * inline image or an image XObject. The given filters will not be decoded. - * @return Decoded stream - * @throws IOException if the data could not be read. - */ - InputStream createInputStream(List stopFilters) throws IOException; - - /** - * Returns true if the image has no data. - */ - boolean isEmpty(); - - /** - * Returns true if the image is a stencil mask. - */ - boolean isStencil(); - - /** - * Sets whether or not the image is a stencil. - * This corresponds to the {@code ImageMask} entry in the image stream's dictionary. - * @param isStencil True to make the image a stencil. - */ - void setStencil(boolean isStencil); - - /** - * Returns bits per component of this image, or -1 if one has not been set. - */ - int getBitsPerComponent(); - - /** - * Set the number of bits per component. - * @param bitsPerComponent The number of bits per component. - */ - void setBitsPerComponent(int bitsPerComponent); - - /** - * Returns the image's color space. - * @throws IOException If there is an error getting the color space. - */ - PDColorSpace getColorSpace() throws IOException; - - /** - * Sets the color space for this image. - * @param colorSpace The color space for this image. - */ - void setColorSpace(PDColorSpace colorSpace); - - /** - * Returns height of this image, or -1 if one has not been set. - */ - int getHeight(); - - /** - * Sets the height of the image. - * @param height The height of the image. - */ - void setHeight(int height); - - /** - * Returns the width of this image, or -1 if one has not been set. - */ - int getWidth(); - - /** - * Sets the width of the image. - * @param width The width of the image. - */ - void setWidth(int width); - - /** - * Sets the decode array. - * @param decode the new decode array. - */ - void setDecode(COSArray decode); - - /** - * Returns the decode array. - */ - COSArray getDecode(); - - /** - * Returns true if the image should be interpolated when rendered. - */ - boolean getInterpolate(); - - - /** - * Sets the Interpolate flag, true for high-quality image scaling. - */ - void setInterpolate(boolean value); - - /** - * Returns the suffix for this image type, e.g. "jpg" - */ - String getSuffix(); -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.image; + +import java.awt.Paint; +import java.awt.Rectangle; +import java.awt.image.BufferedImage; +import java.awt.image.WritableRaster; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.filter.DecodeOptions; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; + +/** + * An image in a PDF document. + * + * @author John Hewson + */ +public interface PDImage extends COSObjectable +{ + /** + * Returns the content of this image as an AWT buffered image with an (A)RGB color space. + * The size of the returned image is the larger of the size of the image itself or its mask. + * @return content of this image as a buffered image. + * @throws IOException + */ + BufferedImage getImage() throws IOException; + + /** + * Return the image data as WritableRaster. You should consult the PDColorSpace returned + * by {@link #getColorSpace()} to know how to interpret the data in this WritableRaster. + * + * Use this if e.g. want access to the raw color information of a + * {@link org.apache.pdfbox.pdmodel.graphics.color.PDDeviceN} image. + * + * @return the raw writable raster for this image + * @throws IOException + */ + WritableRaster getRawRaster() throws IOException; + + /** + * Try to get the raw image as AWT buffered image with it's original colorspace. No + * color conversion is performed. + * + * You could use the returned BufferedImage for draw operations. But this would be very + * slow as the color conversion would happen on demand. You rather should use + * {@link #getImage()} for that. + * + * This method returns null if it is not possible to map the underlying colorspace into a + * java.awt.ColorSpace. + * + * Use this method if you want to extract the image without loosing any color information, as + * no color conversion will be performed. + * + * You can alwoys use {@link #getRawRaster()}, if you want to access the raw data even if + * no matching java.awt.ColorSpace exists + * + * @return the raw image with a java.awt.ColorSpace or null + * @throws IOException + */ + BufferedImage getRawImage() throws IOException; + + /** + * Returns the content of this image as an AWT buffered image with an (A)RGB colored space. Only + * the subregion specified is rendered, and is subsampled by advancing the specified amount of + * rows and columns in the source image for every resulting pixel. + * + * Note that unlike {@link PDImage#getImage() the unparameterized version}, this method does not + * cache the resulting image. + * + * @param region The region of the source image to get, or null if the entire image is needed. + * The actual region will be clipped to the dimensions of the source image. + * @param subsampling The amount of rows and columns to advance for every output pixel, a value + * of 1 meaning every pixel will be read + * @return subsampled content of the requested subregion as a buffered image. + * @throws IOException + */ + BufferedImage getImage(Rectangle region, int subsampling) throws IOException; + + /** + * Returns an ARGB image filled with the given paint and using this image as a mask. + * @param paint the paint to fill the visible portions of the image with + * @return a masked image filled with the given paint + * @throws IOException if the image cannot be read + * @throws IllegalStateException if the image is not a stencil. + */ + BufferedImage getStencilImage(Paint paint) throws IOException; + + /** + * Returns an InputStream containing the image data, irrespective of whether this is an + * inline image or an image XObject. + * @return Decoded stream + * @throws IOException if the data could not be read. + */ + InputStream createInputStream() throws IOException; + + /** + * Returns an InputStream containing the image data, irrespective of whether this is an + * inline image or an image XObject. The given filters will not be decoded. + * @param stopFilters A list of filters to stop decoding at. + * @return Decoded stream + * @throws IOException if the data could not be read. + */ + InputStream createInputStream(List stopFilters) throws IOException; + + /** + * Returns an InputStream, passing additional options to each filter. As a side effect, the + * filterSubsampled flag is set in {@link DecodeOptions}. + * + * @param options Additional decoding options passed to the filters used + * @return Decoded stream + * @throws IOException if the data could not be read + */ + InputStream createInputStream(DecodeOptions options) throws IOException; + + /** + * Returns true if the image has no data. + */ + boolean isEmpty(); + + /** + * Returns true if the image is a stencil mask. + */ + boolean isStencil(); + + /** + * Sets whether or not the image is a stencil. + * This corresponds to the {@code ImageMask} entry in the image stream's dictionary. + * @param isStencil True to make the image a stencil. + */ + void setStencil(boolean isStencil); + + /** + * Returns bits per component of this image, or -1 if one has not been set. + */ + int getBitsPerComponent(); + + /** + * Set the number of bits per component. + * @param bitsPerComponent The number of bits per component. + */ + void setBitsPerComponent(int bitsPerComponent); + + /** + * Returns the image's color space. + * @throws IOException If there is an error getting the color space. + */ + PDColorSpace getColorSpace() throws IOException; + + /** + * Sets the color space for this image. + * @param colorSpace The color space for this image. + */ + void setColorSpace(PDColorSpace colorSpace); + + /** + * Returns height of this image, or -1 if one has not been set. + */ + int getHeight(); + + /** + * Sets the height of the image. + * @param height The height of the image. + */ + void setHeight(int height); + + /** + * Returns the width of this image, or -1 if one has not been set. + */ + int getWidth(); + + /** + * Sets the width of the image. + * @param width The width of the image. + */ + void setWidth(int width); + + /** + * Sets the decode array. + * @param decode the new decode array. + */ + void setDecode(COSArray decode); + + /** + * Returns the decode array. + */ + COSArray getDecode(); + + /** + * Returns true if the image should be interpolated when rendered. + */ + boolean getInterpolate(); + + + /** + * Sets the Interpolate flag, true for high-quality image scaling. + */ + void setInterpolate(boolean value); + + /** + * Returns the suffix for this image type, e.g. "jpg" + */ + String getSuffix(); +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImageXObject.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImageXObject.java index caf4970e53b..65dae2281b7 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImageXObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImageXObject.java @@ -18,10 +18,12 @@ import java.awt.Graphics2D; import java.awt.Paint; +import java.awt.Rectangle; import java.awt.RenderingHints; import java.awt.image.BufferedImage; import java.awt.image.WritableRaster; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -29,19 +31,26 @@ import java.io.OutputStream; import java.lang.ref.SoftReference; import java.util.List; + import javax.imageio.ImageIO; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSInputStream; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.filter.DecodeOptions; +import org.apache.pdfbox.filter.DecodeResult; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDMetadata; import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; @@ -63,23 +72,20 @@ public final class PDImageXObject extends PDXObject implements PDImage private SoftReference cachedImage; private PDColorSpace colorSpace; - private final PDResources resources; // current resource dictionary (has color spaces) + + // initialize to MAX_VALUE as we prefer lower subsampling when keeping/replacing cache. + private int cachedImageSubsampling = Integer.MAX_VALUE; /** - * Creates a thumbnail Image XObject from the given COSBase and name. - * @param cosStream the COS stream - * @return an XObject - * @throws IOException if there is an error creating the XObject. + * current resource dictionary (has color spaces) */ - public static PDImageXObject createThumbnail(COSStream cosStream) throws IOException - { - // thumbnails are special, any non-null subtype is treated as being "Image" - PDStream pdStream = new PDStream(cosStream); - return new PDImageXObject(pdStream, null); - } + private final PDResources resources; /** - * Creates an Image XObject in the given document. + * Creates an Image XObject in the given document. This constructor is for internal PDFBox use + * and is not for PDF generation. Users who want to create images should look at {@link #createFromFileByExtension(File, PDDocument) + * }. + * * @param document the current document * @throws java.io.IOException if there is an error creating the XObject. */ @@ -89,7 +95,10 @@ public PDImageXObject(PDDocument document) throws IOException } /** - * Creates an Image XObject in the given document using the given filtered stream. + * Creates an Image XObject in the given document using the given filtered stream. This + * constructor is for internal PDFBox use and is not for PDF generation. Users who want to + * create images should look at {@link #createFromFileByExtension(File, PDDocument) }. + * * @param document the current document * @param encodedStream an encoded stream of image data * @param cosFilter the filter or a COSArray of filters @@ -113,6 +122,50 @@ public PDImageXObject(PDDocument document, InputStream encodedStream, setColorSpace(initColorSpace); } + /** + * Creates an Image XObject with the given stream as its contents and current color spaces. This + * constructor is for internal PDFBox use and is not for PDF generation. Users who want to + * create images should look at {@link #createFromFileByExtension(File, PDDocument) }. + * + * @param stream the XObject stream to read + * @param resources the current resources + * @throws java.io.IOException if there is an error creating the XObject. + */ + public PDImageXObject(PDStream stream, PDResources resources) throws IOException + { + super(stream, COSName.IMAGE); + this.resources = resources; + List filters = stream.getFilters(); + if (filters != null && !filters.isEmpty() && COSName.JPX_DECODE.equals(filters.get(filters.size()-1))) + { + COSInputStream is = null; + try + { + is = stream.createInputStream(); + DecodeResult decodeResult = is.getDecodeResult(); + stream.getCOSObject().addAll(decodeResult.getParameters()); + this.colorSpace = decodeResult.getJPXColorSpace(); + } + finally + { + IOUtils.closeQuietly(is); + } + } + } + + /** + * Creates a thumbnail Image XObject from the given COSBase and name. + * @param cosStream the COS stream + * @return an XObject + * @throws IOException if there is an error creating the XObject. + */ + public static PDImageXObject createThumbnail(COSStream cosStream) throws IOException + { + // thumbnails are special, any non-null subtype is treated as being "Image" + PDStream pdStream = new PDStream(cosStream); + return new PDImageXObject(pdStream, null); + } + /** * Creates a COS stream from raw (encoded) data. */ @@ -136,17 +189,6 @@ private static COSStream createRawStream(PDDocument document, InputStream rawInp return stream; } - /** - * Creates an Image XObject with the given stream as its contents and current color spaces. - * @param stream the XObject stream to read - * @param resources the current resources - * @throws java.io.IOException if there is an error creating the XObject. - */ - public PDImageXObject(PDStream stream, PDResources resources) throws IOException - { - this(stream, resources, stream.createInputStream()); - } - /** * Create a PDImageXObject from an image file, see {@link #createFromFileByExtension(File, PDDocument)} for * more details. @@ -164,11 +206,17 @@ public static PDImageXObject createFromFile(String imagePath, PDDocument doc) th /** * Create a PDImageXObject from an image file. The file format is determined by the file name - * suffix. The following suffixes are supported: jpg, jpeg, tif, tiff, gif, bmp and png. This is + * suffix. The following suffixes are supported: JPG, JPEG, TIF, TIFF, GIF, BMP and PNG. This is * a convenience method that calls {@link JPEGFactory#createFromStream}, * {@link CCITTFactory#createFromFile} or {@link ImageIO#read} combined with * {@link LosslessFactory#createFromImage}. (The later can also be used to create a - * PDImageXObject from a BufferedImage). + * PDImageXObject from a BufferedImage). Starting with 2.0.18, this call will create an image + * directly from a PNG file without decoding it (when possible), which is faster. However the + * result size depends on the compression skill of the software that created the PNG file. If + * file size or bandwidth are important to you or to your clients, then create your PNG files + * with a tool that has implemented the + * Zopfli + * algorithm, or use the two-step process mentioned above. * * @param file the image file. * @param doc the document that shall use this PDImageXObject. @@ -188,10 +236,16 @@ public static PDImageXObject createFromFileByExtension(File file, PDDocument doc String ext = name.substring(dot + 1).toLowerCase(); if ("jpg".equals(ext) || "jpeg".equals(ext)) { - FileInputStream fis = new FileInputStream(file); - PDImageXObject imageXObject = JPEGFactory.createFromStream(doc, fis); - fis.close(); - return imageXObject; + FileInputStream fis = null; + try + { + fis = new FileInputStream(file); + return JPEGFactory.createFromStream(doc, fis); + } + finally + { + IOUtils.closeQuietly(fis); + } } if ("tif".equals(ext) || "tiff".equals(ext)) { @@ -207,11 +261,17 @@ public static PDImageXObject createFromFileByExtension(File file, PDDocument doc /** * Create a PDImageXObject from an image file. The file format is determined by the file - * content. The following file types are supported: jpg, jpeg, tif, tiff, gif, bmp and png. This + * content. The following file types are supported: JPG, JPEG, TIF, TIFF, GIF, BMP and PNG. This * is a convenience method that calls {@link JPEGFactory#createFromStream}, * {@link CCITTFactory#createFromFile} or {@link ImageIO#read} combined with * {@link LosslessFactory#createFromImage}. (The later can also be used to create a - * PDImageXObject from a BufferedImage). + * PDImageXObject from a BufferedImage). Starting with 2.0.18, this call will create an image + * directly from a png file without decoding it (when possible), which is faster. However the + * result size depends on the compression skill of the software that created the PNG file. If + * file size or bandwidth are important to you or to your clients, then create your PNG files + * with a tool that has implemented the + * Zopfli + * algorithm, or use the two-step process mentioned above. * * @param file the image file. * @param doc the document that shall use this PDImageXObject. @@ -254,29 +314,100 @@ public static PDImageXObject createFromFileByContent(File file, PDDocument doc) } if (fileType.equals(FileType.TIFF)) { - return CCITTFactory.createFromFile(doc, file); + try + { + return CCITTFactory.createFromFile(doc, file); + } + catch (IOException ex) + { + LOG.debug("Reading as TIFF failed, setting fileType to PNG", ex); + // Plan B: try reading with ImageIO + // common exception: + // First image in tiff is not CCITT T4 or T6 compressed + fileType = FileType.PNG; + } } if (fileType.equals(FileType.BMP) || fileType.equals(FileType.GIF) || fileType.equals(FileType.PNG)) { BufferedImage bim = ImageIO.read(file); return LosslessFactory.createFromImage(doc, bim); } - throw new IllegalArgumentException("Image type not supported: " + file.getName()); + throw new IllegalArgumentException("Image type " + fileType + " not supported: " + file.getName()); } - // repairs parameters using decode result - private PDImageXObject(PDStream stream, PDResources resources, COSInputStream input) + /** + * Create a PDImageXObject from bytes of an image file. The file format is determined by the + * file content. The following file types are supported: JPG, JPEG, TIF, TIFF, GIF, BMP and PNG. + * This is a convenience method that calls {@link JPEGFactory#createFromByteArray}, + * {@link CCITTFactory#createFromFile} or {@link ImageIO#read} combined with + * {@link LosslessFactory#createFromImage}. (The later can also be used to create a + * PDImageXObject from a BufferedImage). Starting with 2.0.18, this call will create an image + * directly from a PNG file without decoding it (when possible), which is faster. However the + * result size depends on the compression skill of the software that created the PNG file. If + * file size or bandwidth are important to you or to your clients, then create your PNG files + * with a tool that has implemented the + * Zopfli + * algorithm, or use the two-step process mentioned above. + * + * @param byteArray bytes from an image file. + * @param document the document that shall use this PDImageXObject. + * @param name name of image file for exception messages, can be null. + * @return a PDImageXObject. + * @throws IOException if there is an error when reading the file or creating the + * PDImageXObject. + * @throws IllegalArgumentException if the image type is not supported. + */ + public static PDImageXObject createFromByteArray(PDDocument document, byte[] byteArray, String name) throws IOException { - super(repair(stream, input), COSName.IMAGE); - this.resources = resources; - this.colorSpace = input.getDecodeResult().getJPXColorSpace(); - } + FileType fileType; + try + { + fileType = FileTypeDetector.detectFileType(byteArray); + } + catch (IOException e) + { + throw new IOException("Could not determine file type: " + name, e); + } + if (fileType == null) + { + throw new IllegalArgumentException("Image type not supported: " + name); + } - // repairs parameters using decode result - private static PDStream repair(PDStream stream, COSInputStream input) - { - stream.getCOSObject().addAll(input.getDecodeResult().getParameters()); - return stream; + if (fileType.equals(FileType.JPEG)) + { + return JPEGFactory.createFromByteArray(document, byteArray); + } + if (fileType.equals(FileType.PNG)) + { + // Try to directly convert the image without recoding it. + PDImageXObject image = PNGConverter.convertPNGImage(document, byteArray); + if (image != null) + { + return image; + } + } + if (fileType.equals(FileType.TIFF)) + { + try + { + return CCITTFactory.createFromByteArray(document, byteArray); + } + catch (IOException ex) + { + LOG.debug("Reading as TIFF failed, setting fileType to PNG", ex); + // Plan B: try reading with ImageIO + // common exception: + // First image in tiff is not CCITT T4 or T6 compressed + fileType = FileType.PNG; + } + } + if (fileType.equals(FileType.BMP) || fileType.equals(FileType.GIF) || fileType.equals(FileType.PNG)) + { + ByteArrayInputStream bais = new ByteArrayInputStream(byteArray); + BufferedImage bim = ImageIO.read(bais); + return LosslessFactory.createFromImage(document, bim); + } + throw new IllegalArgumentException("Image type " + fileType + " not supported: " + name); } /** @@ -285,7 +416,7 @@ private static PDStream repair(PDStream stream, COSInputStream input) */ public PDMetadata getMetadata() { - COSStream cosStream = (COSStream) getCOSObject().getDictionaryObject(COSName.METADATA); + COSStream cosStream = getCOSObject().getCOSStream(COSName.METADATA); if (cosStream != null) { return new PDMetadata(cosStream); @@ -304,11 +435,12 @@ public void setMetadata(PDMetadata meta) /** * Returns the key of this XObject in the structural parent tree. - * @return this object's key the structural parent tree + * + * @return this object's key the structural parent tree or -1 if there isn't any. */ public int getStructParent() { - return getCOSObject().getInt(COSName.STRUCT_PARENT, 0); + return getCOSObject().getInt(COSName.STRUCT_PARENT); } /** @@ -327,7 +459,16 @@ public void setStructParent(int key) @Override public BufferedImage getImage() throws IOException { - if (cachedImage != null) + return getImage(null, 1); + } + + /** + * {@inheritDoc} + */ + @Override + public BufferedImage getImage(Rectangle region, int subsampling) throws IOException + { + if (region == null && subsampling == cachedImageSubsampling && cachedImage != null) { BufferedImage cached = cachedImage.get(); if (cached != null) @@ -337,13 +478,14 @@ public BufferedImage getImage() throws IOException } // get image as RGB - BufferedImage image = SampledImageReader.getRGBImage(this, getColorKeyMask()); + BufferedImage image = SampledImageReader.getRGBImage(this, region, subsampling, getColorKeyMask()); // soft mask (overrides explicit mask) PDImageXObject softMask = getSoftMask(); if (softMask != null) { - image = applyMask(image, softMask.getOpaqueImage(), true); + float[] matte = extractMatte(softMask); + image = applyMask(image, softMask.getOpaqueImage(), true, matte); } else { @@ -351,14 +493,60 @@ public BufferedImage getImage() throws IOException PDImageXObject mask = getMask(); if (mask != null && mask.isStencil()) { - image = applyMask(image, mask.getOpaqueImage(), false); + image = applyMask(image, mask.getOpaqueImage(), false, null); } } - cachedImage = new SoftReference(image); + if (region == null && subsampling <= cachedImageSubsampling) + { + // only cache full-image renders, and prefer lower subsampling frequency, as lower + // subsampling means higher quality and longer render times. + cachedImageSubsampling = subsampling; + cachedImage = new SoftReference(image); + } + return image; } + @Override + public BufferedImage getRawImage() throws IOException + { + return getColorSpace().toRawImage(getRawRaster()); + } + + @Override + public WritableRaster getRawRaster() throws IOException + { + return SampledImageReader.getRawRaster(this); + } + + /** + * Extract the matte color from a softmask. + * + * @param softMask + * @return the matte color. + * @throws IOException if the color conversion fails. + */ + private float[] extractMatte(PDImageXObject softMask) throws IOException + { + COSBase base = softMask.getCOSObject().getItem(COSName.MATTE); + float[] matte = null; + if (base instanceof COSArray) + { + // PDFBOX-4267: process /Matte + // see PDF specification 1.7, 11.6.5.3 Soft-Mask Images + matte = ((COSArray) base).toFloatArray(); + // convert to RGB + if (matte.length < getColorSpace().getNumberOfComponents()) + { + LOG.error("Image /Matte entry not long enough for colorspace, skipped"); + return null; + } + matte = getColorSpace().toRGB(matte); + } + return matte; + } + /** * {@inheritDoc} * The returned images are not cached. @@ -386,8 +574,8 @@ public BufferedImage getOpaqueImage() throws IOException // explicit mask: RGB + Binary -> ARGB // soft mask: RGB + Gray -> ARGB - private BufferedImage applyMask(BufferedImage image, BufferedImage mask, boolean isSoft) - throws IOException + private BufferedImage applyMask(BufferedImage image, BufferedImage mask, + boolean isSoft, float[] matte) { if (mask == null) { @@ -400,13 +588,20 @@ private BufferedImage applyMask(BufferedImage image, BufferedImage mask, boolean // scale mask to fit image, or image to fit mask, whichever is larger if (mask.getWidth() < width || mask.getHeight() < height) { - mask = scaleImage(mask, width, height); + mask = scaleImage(mask, width, height, BufferedImage.TYPE_BYTE_GRAY); } - else if (mask.getWidth() > width || mask.getHeight() > height) + + if (mask.getWidth() > width || mask.getHeight() > height) { width = mask.getWidth(); height = mask.getHeight(); - image = scaleImage(image, width, height); + image = scaleImage(image, width, height, BufferedImage.TYPE_INT_ARGB); + } + else if (image.getType() != BufferedImage.TYPE_INT_ARGB) + { + // always convert to ARGB to allow bulk read / write + // PDFBOX-4470 bitonal image has only one element => copy into RGB + image = scaleImage(image, width, height, BufferedImage.TYPE_INT_ARGB); } // compose to ARGB @@ -415,47 +610,55 @@ else if (mask.getWidth() > width || mask.getHeight() > height) WritableRaster dest = masked.getRaster(); WritableRaster alpha = mask.getRaster(); - float[] rgb = new float[4]; - float[] rgba = new float[4]; - float[] alphaPixel = null; + int[] alphaRow = new int[width]; + int[] rgbaRow = new int[4 * width]; for (int y = 0; y < height; y++) { + src.getPixels(0, y, width, 1, rgbaRow); + alpha.getSamples(0, y, width, 1, 0, alphaRow); for (int x = 0; x < width; x++) { - src.getPixel(x, y, rgb); - - rgba[0] = rgb[0]; - rgba[1] = rgb[1]; - rgba[2] = rgb[2]; - - alphaPixel = alpha.getPixel(x, y, alphaPixel); + int offset = x * 4; if (isSoft) { - rgba[3] = alphaPixel[0]; + rgbaRow[offset + 3] = alphaRow[x]; + if (matte != null && alphaRow[x] != 0) + { + float k = alphaRow[x] / 255f; + rgbaRow[offset + 0] = clampColor(((rgbaRow[offset + 0] / 255f - matte[0]) / k + matte[0]) * 255f); + rgbaRow[offset + 1] = clampColor(((rgbaRow[offset + 1] / 255f - matte[1]) / k + matte[1]) * 255f); + rgbaRow[offset + 2] = clampColor(((rgbaRow[offset + 2] / 255f - matte[2]) / k + matte[2]) * 255f); + } } else { - rgba[3] = 255 - alphaPixel[0]; + rgbaRow[offset + 3] = 255 - alphaRow[x]; } - - dest.setPixel(x, y, rgba); } + dest.setPixels(0, y, width, 1, rgbaRow); } - return masked; } + private int clampColor(float color) + { + return color < 0 ? 0 : (color > 255 ? 255 : Math.round(color)); + } + /** * High-quality image scaling. */ - private BufferedImage scaleImage(BufferedImage image, int width, int height) + private BufferedImage scaleImage(BufferedImage image, int width, int height, int type) { - BufferedImage image2 = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); + BufferedImage image2 = new BufferedImage(width, height, type); Graphics2D g = image2.createGraphics(); - g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, - RenderingHints.VALUE_INTERPOLATION_BICUBIC); - g.setRenderingHint(RenderingHints.KEY_RENDERING, - RenderingHints.VALUE_RENDER_QUALITY); + if (getInterpolate()) + { + g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, + RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g.setRenderingHint(RenderingHints.KEY_RENDERING, + RenderingHints.VALUE_RENDER_QUALITY); + } g.drawImage(image, 0, 0, width, height, 0, 0, image.getWidth(), image.getHeight(), null); g.dispose(); return image2; @@ -476,7 +679,7 @@ public PDImageXObject getMask() throws IOException } else { - COSStream cosStream = (COSStream) getCOSObject().getDictionaryObject(COSName.MASK); + COSStream cosStream = getCOSObject().getCOSStream(COSName.MASK); if (cosStream != null) { // always DeviceGray @@ -507,7 +710,7 @@ public COSArray getColorKeyMask() */ public PDImageXObject getSoftMask() throws IOException { - COSStream cosStream = (COSStream) getCOSObject().getDictionaryObject(COSName.SMASK); + COSStream cosStream = getCOSObject().getCOSStream(COSName.SMASK); if (cosStream != null) { // always DeviceGray @@ -540,10 +743,27 @@ public PDColorSpace getColorSpace() throws IOException { if (colorSpace == null) { - COSBase cosBase = getCOSObject().getDictionaryObject(COSName.COLORSPACE, COSName.CS); + COSBase cosBase = getCOSObject().getItem(COSName.COLORSPACE, COSName.CS); if (cosBase != null) { + COSObject indirect = null; + if (cosBase instanceof COSObject && + resources != null && resources.getResourceCache() != null) + { + // PDFBOX-4022: use the resource cache because several images + // might have the same colorspace indirect object. + indirect = (COSObject) cosBase; + colorSpace = resources.getResourceCache().getColorSpace(indirect); + if (colorSpace != null) + { + return colorSpace; + } + } colorSpace = PDColorSpace.create(cosBase, resources); + if (indirect != null) + { + resources.getResourceCache().put(indirect, colorSpace); + } } else if (isStencil()) { @@ -564,6 +784,12 @@ public InputStream createInputStream() throws IOException { return getStream().createInputStream(); } + + @Override + public InputStream createInputStream(DecodeOptions options) throws IOException + { + return getStream().createInputStream(options); + } @Override public InputStream createInputStream(List stopFilters) throws IOException @@ -581,6 +807,8 @@ public boolean isEmpty() public void setColorSpace(PDColorSpace cs) { getCOSObject().setItem(COSName.COLORSPACE, cs != null ? cs.getCOSObject() : null); + colorSpace = null; + cachedImage = null; } @Override @@ -679,11 +907,40 @@ else if (filters.contains(COSName.FLATE_DECODE) { return "png"; } + else if (filters.contains(COSName.JBIG2_DECODE)) + { + return "jb2"; + } else { LOG.warn("getSuffix() returns null, filters: " + filters); - // TODO more... return null; } } + + /** + * This will get the optional content group or optional content membership dictionary. + * + * @return The optional content group or optional content membership dictionary or null if there + * is none. + */ + public PDPropertyList getOptionalContent() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.OC); + if (base instanceof COSDictionary) + { + return PDPropertyList.create((COSDictionary) base); + } + return null; + } + + /** + * Sets the optional content group or optional content membership dictionary. + * + * @param oc The optional content group or optional content membership dictionary. + */ + public void setOptionalContent(PDPropertyList oc) + { + getCOSObject().setItem(COSName.OC, oc); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java index 7f6c184fd63..e2c3199e35d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java @@ -1,380 +1,426 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.image; - -import java.awt.Paint; -import java.awt.image.BufferedImage; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.filter.DecodeResult; -import org.apache.pdfbox.filter.Filter; -import org.apache.pdfbox.filter.FilterFactory; -import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.common.COSArrayList; -import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; -import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; - -/** - * An inline image object which uses a special syntax to express the data for a - * small image directly within the content stream. - * - * @author Ben Litchfield - * @author John Hewson - */ -public final class PDInlineImage implements PDImage -{ - // image parameters - private final COSDictionary parameters; - - // the current resources, contains named color spaces - private final PDResources resources; - - // image data - private final byte[] rawData; - private final byte[] decodedData; - - /** - * Creates an inline image from the given parameters and data. - * - * @param parameters the image parameters - * @param data the image data - * @param resources the current resources - * @throws IOException if the stream cannot be decoded - */ - public PDInlineImage(COSDictionary parameters, byte[] data, PDResources resources) - throws IOException - { - this.parameters = parameters; - this.resources = resources; - this.rawData = data; - - DecodeResult decodeResult = null; - List filters = getFilters(); - if (filters == null || filters.isEmpty()) - { - this.decodedData = data; - } - else - { - ByteArrayInputStream in = new ByteArrayInputStream(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(data.length); - for (int i = 0; i < filters.size(); i++) - { - // TODO handling of abbreviated names belongs here, rather than in other classes - out.reset(); - Filter filter = FilterFactory.INSTANCE.getFilter(filters.get(i)); - decodeResult = filter.decode(in, out, parameters, i); - in = new ByteArrayInputStream(out.toByteArray()); - } - this.decodedData = out.toByteArray(); - } - - // repair parameters - if (decodeResult != null) - { - parameters.addAll(decodeResult.getParameters()); - } - } - - @Override - public COSBase getCOSObject() - { - return parameters; - } - - @Override - public int getBitsPerComponent() - { - if (isStencil()) - { - return 1; - } - else - { - return parameters.getInt(COSName.BPC, COSName.BITS_PER_COMPONENT, -1); - } - } - - @Override - public void setBitsPerComponent(int bitsPerComponent) - { - parameters.setInt(COSName.BPC, bitsPerComponent); - } - - @Override - public PDColorSpace getColorSpace() throws IOException - { - COSBase cs = parameters.getDictionaryObject(COSName.CS, COSName.COLORSPACE); - if (cs != null) - { - return createColorSpace(cs); - } - else if (isStencil()) - { - // stencil mask color space must be gray, it is often missing - return PDDeviceGray.INSTANCE; - } - else - { - // an image without a color space is always broken - throw new IOException("could not determine inline image color space"); - } - } - - // deliver the long name of a device colorspace, or the parameter - private COSBase toLongName(COSBase cs) - { - if (COSName.RGB.equals(cs)) - { - return COSName.DEVICERGB; - } - if (COSName.CMYK.equals(cs)) - { - return COSName.DEVICECMYK; - } - if (COSName.G.equals(cs)) - { - return COSName.DEVICEGRAY; - } - return cs; - } - - private PDColorSpace createColorSpace(COSBase cs) throws IOException - { - if (cs instanceof COSName) - { - return PDColorSpace.create(toLongName(cs), resources); - } - - if (cs instanceof COSArray && ((COSArray) cs).size() > 1) - { - COSArray srcArray = (COSArray) cs; - COSBase csType = srcArray.get(0); - if (COSName.I.equals(csType) || COSName.INDEXED.equals(csType)) - { - COSArray dstArray = new COSArray(); - dstArray.addAll(srcArray); - dstArray.set(0, COSName.INDEXED); - dstArray.set(1, toLongName(srcArray.get(1))); - return PDColorSpace.create(dstArray, resources); - } - - throw new IOException("Illegal type of inline image color space: " + csType); - } - - throw new IOException("Illegal type of object for inline image color space: " + cs); - } - - @Override - public void setColorSpace(PDColorSpace colorSpace) - { - COSBase base = null; - if (colorSpace != null) - { - base = colorSpace.getCOSObject(); - } - parameters.setItem(COSName.CS, base); - } - - @Override - public int getHeight() - { - return parameters.getInt(COSName.H, COSName.HEIGHT, -1); - } - - @Override - public void setHeight(int height) - { - parameters.setInt(COSName.H, height); - } - - @Override - public int getWidth() - { - return parameters.getInt(COSName.W, COSName.WIDTH, -1); - } - - @Override - public void setWidth(int width) - { - parameters.setInt(COSName.W, width); - } - - @Override - public boolean getInterpolate() - { - return parameters.getBoolean(COSName.I, COSName.INTERPOLATE, false); - } - - @Override - public void setInterpolate(boolean value) - { - parameters.setBoolean(COSName.I, value); - } - - /** - * Returns a list of filters applied to this stream, or null if there are none. - * - * @return a list of filters applied to this stream - */ - // TODO return an empty list if there are none? - public List getFilters() - { - List names = null; - COSBase filters = parameters.getDictionaryObject(COSName.F, COSName.FILTER); - if (filters instanceof COSName) - { - COSName name = (COSName) filters; - names = new COSArrayList(name.getName(), name, parameters, COSName.FILTER); - } - else if (filters instanceof COSArray) - { - names = COSArrayList.convertCOSNameCOSArrayToList((COSArray) filters); - } - return names; - } - - /** - * Sets which filters are applied to this stream. - * - * @param filters the filters to apply to this stream. - */ - public void setFilters(List filters) - { - COSBase obj = COSArrayList.convertStringListToCOSNameCOSArray(filters); - parameters.setItem(COSName.F, obj); - } - - @Override - public void setDecode(COSArray decode) - { - parameters.setItem(COSName.D, decode); - } - - @Override - public COSArray getDecode() - { - return (COSArray) parameters.getDictionaryObject(COSName.D, COSName.DECODE); - } - - @Override - public boolean isStencil() - { - return parameters.getBoolean(COSName.IM, COSName.IMAGE_MASK, false); - } - - @Override - public void setStencil(boolean isStencil) - { - parameters.setBoolean(COSName.IM, isStencil); - } - - @Override - public InputStream createInputStream() throws IOException - { - return new ByteArrayInputStream(decodedData); - } - - @Override - public InputStream createInputStream(List stopFilters) throws IOException - { - List filters = getFilters(); - ByteArrayInputStream in = new ByteArrayInputStream(rawData); - ByteArrayOutputStream out = new ByteArrayOutputStream(rawData.length); - for (int i = 0; i < filters.size(); i++) - { - // TODO handling of abbreviated names belongs here, rather than in other classes - out.reset(); - if (stopFilters.contains(filters.get(i))) - { - break; - } - else - { - Filter filter = FilterFactory.INSTANCE.getFilter(filters.get(i)); - filter.decode(in, out, parameters, i); - in = new ByteArrayInputStream(out.toByteArray()); - } - } - return new ByteArrayInputStream(out.toByteArray()); - } - - @Override - public boolean isEmpty() - { - return decodedData.length == 0; - } - - /** - * Returns the inline image data. - */ - public byte[] getData() - { - return decodedData; - } - - @Override - public BufferedImage getImage() throws IOException - { - return SampledImageReader.getRGBImage(this, getColorKeyMask()); - } - - @Override - public BufferedImage getStencilImage(Paint paint) throws IOException - { - if (!isStencil()) - { - throw new IllegalStateException("Image is not a stencil"); - } - return SampledImageReader.getStencilImage(this, paint); - } - - /** - * Returns the color key mask array associated with this image, or null if - * there is none. - * - * @return Mask Image XObject - */ - public COSArray getColorKeyMask() - { - COSBase mask = parameters.getDictionaryObject(COSName.IM, COSName.MASK); - if (mask instanceof COSArray) - { - return (COSArray) mask; - } - return null; - } - - /** - * Returns the suffix for this image type, e.g. jpg/png. - * - * @return The image suffix. - */ - @Override - public String getSuffix() - { - // TODO implement me - return null; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.image; + +import java.awt.Paint; +import java.awt.Rectangle; +import java.awt.image.BufferedImage; +import java.awt.image.WritableRaster; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.filter.DecodeOptions; +import org.apache.pdfbox.filter.DecodeResult; +import org.apache.pdfbox.filter.Filter; +import org.apache.pdfbox.filter.FilterFactory; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.COSArrayList; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; + +/** + * An inline image object which uses a special syntax to express the data for a + * small image directly within the content stream. + * + * @author Ben Litchfield + * @author John Hewson + */ +public final class PDInlineImage implements PDImage +{ + // image parameters + private final COSDictionary parameters; + + // the current resources, contains named color spaces + private final PDResources resources; + + // image data + private final byte[] rawData; + private final byte[] decodedData; + + /** + * Creates an inline image from the given parameters and data. + * + * @param parameters the image parameters + * @param data the image data + * @param resources the current resources + * @throws IOException if the stream cannot be decoded + */ + public PDInlineImage(COSDictionary parameters, byte[] data, PDResources resources) + throws IOException + { + this.parameters = parameters; + this.resources = resources; + this.rawData = data; + + DecodeResult decodeResult = null; + List filters = getFilters(); + if (filters == null || filters.isEmpty()) + { + this.decodedData = data; + } + else + { + ByteArrayInputStream in = new ByteArrayInputStream(data); + ByteArrayOutputStream out = new ByteArrayOutputStream(data.length); + for (int i = 0; i < filters.size(); i++) + { + // TODO handling of abbreviated names belongs here, rather than in other classes + out.reset(); + Filter filter = FilterFactory.INSTANCE.getFilter(filters.get(i)); + decodeResult = filter.decode(in, out, parameters, i); + in = new ByteArrayInputStream(out.toByteArray()); + } + this.decodedData = out.toByteArray(); + } + + // repair parameters + if (decodeResult != null) + { + parameters.addAll(decodeResult.getParameters()); + } + } + + @Override + public COSBase getCOSObject() + { + return parameters; + } + + @Override + public int getBitsPerComponent() + { + if (isStencil()) + { + return 1; + } + else + { + return parameters.getInt(COSName.BPC, COSName.BITS_PER_COMPONENT, -1); + } + } + + @Override + public void setBitsPerComponent(int bitsPerComponent) + { + parameters.setInt(COSName.BPC, bitsPerComponent); + } + + @Override + public PDColorSpace getColorSpace() throws IOException + { + COSBase cs = parameters.getDictionaryObject(COSName.CS, COSName.COLORSPACE); + if (cs != null) + { + return createColorSpace(cs); + } + else if (isStencil()) + { + // stencil mask color space must be gray, it is often missing + return PDDeviceGray.INSTANCE; + } + else + { + // an image without a color space is always broken + throw new IOException("could not determine inline image color space"); + } + } + + // deliver the long name of a device colorspace, or the parameter + private COSBase toLongName(COSBase cs) + { + if (COSName.RGB.equals(cs)) + { + return COSName.DEVICERGB; + } + if (COSName.CMYK.equals(cs)) + { + return COSName.DEVICECMYK; + } + if (COSName.G.equals(cs)) + { + return COSName.DEVICEGRAY; + } + return cs; + } + + private PDColorSpace createColorSpace(COSBase cs) throws IOException + { + if (cs instanceof COSName) + { + return PDColorSpace.create(toLongName(cs), resources); + } + + if (cs instanceof COSArray && ((COSArray) cs).size() > 1) + { + COSArray srcArray = (COSArray) cs; + COSBase csType = srcArray.get(0); + if (COSName.I.equals(csType) || COSName.INDEXED.equals(csType)) + { + COSArray dstArray = new COSArray(); + dstArray.addAll(srcArray); + dstArray.set(0, COSName.INDEXED); + dstArray.set(1, toLongName(srcArray.get(1))); + return PDColorSpace.create(dstArray, resources); + } + + throw new IOException("Illegal type of inline image color space: " + csType); + } + + throw new IOException("Illegal type of object for inline image color space: " + cs); + } + + @Override + public void setColorSpace(PDColorSpace colorSpace) + { + COSBase base = null; + if (colorSpace != null) + { + base = colorSpace.getCOSObject(); + } + parameters.setItem(COSName.CS, base); + } + + @Override + public int getHeight() + { + return parameters.getInt(COSName.H, COSName.HEIGHT, -1); + } + + @Override + public void setHeight(int height) + { + parameters.setInt(COSName.H, height); + } + + @Override + public int getWidth() + { + return parameters.getInt(COSName.W, COSName.WIDTH, -1); + } + + @Override + public void setWidth(int width) + { + parameters.setInt(COSName.W, width); + } + + @Override + public boolean getInterpolate() + { + return parameters.getBoolean(COSName.I, COSName.INTERPOLATE, false); + } + + @Override + public void setInterpolate(boolean value) + { + parameters.setBoolean(COSName.I, value); + } + + /** + * Returns a list of filters applied to this stream, or null if there are none. + * + * @return a list of filters applied to this stream + */ + // TODO return an empty list if there are none? + public List getFilters() + { + List names = null; + COSBase filters = parameters.getDictionaryObject(COSName.F, COSName.FILTER); + if (filters instanceof COSName) + { + COSName name = (COSName) filters; + names = new COSArrayList(name.getName(), name, parameters, COSName.FILTER); + } + else if (filters instanceof COSArray) + { + names = COSArrayList.convertCOSNameCOSArrayToList((COSArray) filters); + } + return names; + } + + /** + * Sets which filters are applied to this stream. + * + * @param filters the filters to apply to this stream. + */ + public void setFilters(List filters) + { + COSBase obj = COSArrayList.convertStringListToCOSNameCOSArray(filters); + parameters.setItem(COSName.F, obj); + } + + @Override + public void setDecode(COSArray decode) + { + parameters.setItem(COSName.D, decode); + } + + @Override + public COSArray getDecode() + { + return (COSArray) parameters.getDictionaryObject(COSName.D, COSName.DECODE); + } + + @Override + public boolean isStencil() + { + return parameters.getBoolean(COSName.IM, COSName.IMAGE_MASK, false); + } + + @Override + public void setStencil(boolean isStencil) + { + parameters.setBoolean(COSName.IM, isStencil); + } + + @Override + public InputStream createInputStream() throws IOException + { + return new ByteArrayInputStream(decodedData); + } + + @Override + public InputStream createInputStream(DecodeOptions options) throws IOException + { + // Decode options are irrelevant for inline image, as the data is always buffered. + return createInputStream(); + } + + @Override + public InputStream createInputStream(List stopFilters) throws IOException + { + List filters = getFilters(); + ByteArrayInputStream in = new ByteArrayInputStream(rawData); + ByteArrayOutputStream out = new ByteArrayOutputStream(rawData.length); + for (int i = 0; filters != null && i < filters.size(); i++) + { + // TODO handling of abbreviated names belongs here, rather than in other classes + out.reset(); + if (stopFilters.contains(filters.get(i))) + { + break; + } + else + { + Filter filter = FilterFactory.INSTANCE.getFilter(filters.get(i)); + filter.decode(in, out, parameters, i); + in = new ByteArrayInputStream(out.toByteArray()); + } + } + return in; + } + + @Override + public boolean isEmpty() + { + return decodedData.length == 0; + } + + /** + * Returns the inline image data. + */ + public byte[] getData() + { + return decodedData; + } + + @Override + public BufferedImage getImage() throws IOException + { + return SampledImageReader.getRGBImage(this, null); + } + + @Override + public BufferedImage getImage(Rectangle region, int subsampling) throws IOException + { + return SampledImageReader.getRGBImage(this, region, subsampling, null); + } + + @Override + public WritableRaster getRawRaster() throws IOException + { + return SampledImageReader.getRawRaster(this); + } + + @Override + public BufferedImage getRawImage() throws IOException + { + return getColorSpace().toRawImage(getRawRaster()); + } + + @Override + public BufferedImage getStencilImage(Paint paint) throws IOException + { + if (!isStencil()) + { + throw new IllegalStateException("Image is not a stencil"); + } + return SampledImageReader.getStencilImage(this, paint); + } + + /** + * Returns the color key mask array associated with this image, or null if + * there is none. + * + * @return Mask Image XObject + * @deprecated inline images don't have a color key mask. + */ + @Deprecated + public COSArray getColorKeyMask() + { + COSBase mask = parameters.getDictionaryObject(COSName.IM, COSName.MASK); + if (mask instanceof COSArray) + { + return (COSArray) mask; + } + return null; + } + + /** + * Returns the suffix for this image type, e.g. jpg/png. + * + * @return The image suffix. + */ + @Override + public String getSuffix() + { + List filters = getFilters(); + + if (filters == null || filters.isEmpty()) + { + return "png"; + } + if (filters.contains(COSName.DCT_DECODE.getName()) || + filters.contains(COSName.DCT_DECODE_ABBREVIATION.getName())) + { + return "jpg"; + } + if (filters.contains(COSName.CCITTFAX_DECODE.getName()) || + filters.contains(COSName.CCITTFAX_DECODE_ABBREVIATION.getName())) + { + return "tiff"; + } + // JPX and JBIG2 don't exist for inline images + return "png"; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PNGConverter.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PNGConverter.java new file mode 100644 index 00000000000..1d400e885ce --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PNGConverter.java @@ -0,0 +1,955 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.image; + +import java.awt.color.ColorSpace; +import java.awt.color.ICC_Profile; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.MemoryCacheImageInputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSInteger; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.filter.Filter; +import org.apache.pdfbox.filter.FilterFactory; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.graphics.color.PDICCBased; +import org.apache.pdfbox.pdmodel.graphics.color.PDIndexed; + +/** + * This factory tries to encode a PNG given as byte array into a PDImageXObject + * by directly coping the image data into the PDF streams without + * decoding/encoding and re-compressing the PNG data. + *

+ * If this is for any reason not possible, the factory will return null. You + * must then encode the image by loading it and using the LosslessFactory. + *

+ * The W3C PNG spec was used to implement this class: + * https://www.w3.org/TR/2003/REC-PNG-20031110 + * + * @author Emmeran Seehuber + */ +final class PNGConverter +{ + private static final Log LOG = LogFactory.getLog(PNGConverter.class); + + // Chunk Type definitions. The bytes in the comments are the bytes in the spec. + private static final int CHUNK_IHDR = 0x49484452; // IHDR: 73 72 68 82 + private static final int CHUNK_IDAT = 0x49444154; // IDAT: 73 68 65 84 + private static final int CHUNK_PLTE = 0x504C5445; // PLTE: 80 76 84 69 + private static final int CHUNK_IEND = 0x49454E44; // IEND: 73 69 78 68 + private static final int CHUNK_TRNS = 0x74524E53; // tRNS: 116 82 78 83 + private static final int CHUNK_CHRM = 0x6348524D; // cHRM: 99 72 82 77 + private static final int CHUNK_GAMA = 0x67414D41; // gAMA: 103 65 77 65 + private static final int CHUNK_ICCP = 0x69434350; // iCCP: 105 67 67 80 + private static final int CHUNK_SBIT = 0x73424954; // sBIT: 115 66 73 84 + private static final int CHUNK_SRGB = 0x73524742; // sRGB: 115 82 71 66 + private static final int CHUNK_TEXT = 0x74455874; // tEXt: 116 69 88 116 + private static final int CHUNK_ZTXT = 0x7A545874; // zTXt: 122 84 88 116 + private static final int CHUNK_ITXT = 0x69545874; // iTXt: 105 84 88 116 + private static final int CHUNK_KBKG = 0x6B424B47; // kBKG: 107 66 75 71 + private static final int CHUNK_HIST = 0x68495354; // hIST: 104 73 83 84 + private static final int CHUNK_PHYS = 0x70485973; // pHYs: 112 72 89 115 + private static final int CHUNK_SPLT = 0x73504C54; // sPLT: 115 80 76 84 + private static final int CHUNK_TIME = 0x74494D45; // tIME: 116 73 77 69 + + // CRC Reference Implementation, see + // https://www.w3.org/TR/2003/REC-PNG-20031110/#D-CRCAppendix + // for details + + /* Table of CRCs of all 8-bit messages. */ + private static final int[] CRC_TABLE = new int[256]; + + static + { + makeCrcTable(); + } + + private PNGConverter() + { + } + + /** + * Try to convert a PNG into a PDImageXObject. If for any reason the PNG can not + * be converted, null is returned. + *

+ * This usually means the PNG structure is damaged (CRC error, etc.) or it uses + * some features which can not be mapped to PDF. + * + * @param doc the document to put the image in + * @param imageData the byte data of the PNG + * @return null or the PDImageXObject built from the png + */ + static PDImageXObject convertPNGImage(PDDocument doc, byte[] imageData) throws IOException + { + PNGConverterState state = parsePNGChunks(imageData); + if (!checkConverterState(state)) + { + // There is something wrong, we can't convert this PNG + return null; + } + + return convertPng(doc, state); + } + + /** + * Convert the image using the state. + * + * @param doc the document to put the image in + * @param state the parser state containing the PNG chunks. + * @return null or the converted image + */ + private static PDImageXObject convertPng(PDDocument doc, PNGConverterState state) + throws IOException + { + Chunk ihdr = state.IHDR; + int ihdrStart = ihdr.start; + int width = readInt(ihdr.bytes, ihdrStart); + int height = readInt(ihdr.bytes, ihdrStart + 4); + int bitDepth = ihdr.bytes[ihdrStart + 8] & 0xFF; + int colorType = ihdr.bytes[ihdrStart + 9] & 0xFF; + int compressionMethod = ihdr.bytes[ihdrStart + 10] & 0xFF; + int filterMethod = ihdr.bytes[ihdrStart + 11] & 0xFF; + int interlaceMethod = ihdr.bytes[ihdrStart + 12] & 0xFF; + + if (bitDepth != 1 && bitDepth != 2 && bitDepth != 4 && bitDepth != 8 && bitDepth != 16) + { + LOG.error(String.format("Invalid bit depth %d.", bitDepth)); + return null; + } + if (width <= 0 || height <= 0) + { + LOG.error(String.format("Invalid image size %d x %d", width, height)); + return null; + } + if (compressionMethod != 0) + { + LOG.error(String.format("Unknown PNG compression method %d.", compressionMethod)); + return null; + } + if (filterMethod != 0) + { + LOG.error(String.format("Unknown PNG filtering method %d.", compressionMethod)); + return null; + } + if (interlaceMethod != 0) + { + LOG.debug(String.format("Can't handle interlace method %d.", interlaceMethod)); + return null; + } + + state.width = width; + state.height = height; + state.bitsPerComponent = bitDepth; + + switch (colorType) + { + case 0: + // Grayscale + LOG.debug("Can't handle grayscale yet."); + return null; + case 2: + // Truecolor + if (state.tRNS != null) + { + LOG.debug("Can't handle images with transparent colors."); + return null; + } + return buildImageObject(doc, state); + case 3: + // Indexed image + return buildIndexImage(doc, state); + case 4: + // Grayscale with alpha. + LOG.debug( + "Can't handle grayscale with alpha, would need to separate alpha from image data"); + return null; + case 6: + // Truecolor with alpha. + LOG.debug( + "Can't handle truecolor with alpha, would need to separate alpha from image data"); + return null; + default: + LOG.error("Unknown PNG color type " + colorType); + return null; + } + } + + /** + * Build a indexed image + */ + private static PDImageXObject buildIndexImage(PDDocument doc, PNGConverterState state) + throws IOException + { + Chunk plte = state.PLTE; + if (plte == null) + { + LOG.error("Indexed image without PLTE chunk."); + return null; + } + if (plte.length % 3 != 0) + { + LOG.error("PLTE table corrupted, last (r,g,b) tuple is not complete."); + return null; + } + if (state.bitsPerComponent > 8) + { + LOG.debug(String.format("Can only convert indexed images with bit depth <= 8, not %d.", + state.bitsPerComponent)); + return null; + } + + PDImageXObject image = buildImageObject(doc, state); + if (image == null) + { + return null; + } + + int highVal = (plte.length / 3) - 1; + if (highVal > 255) + { + LOG.error(String.format("Too much colors in PLTE, only 256 allowed, found %d colors.", + highVal + 1)); + return null; + } + + setupIndexedColorSpace(doc, plte, image, highVal); + + if (state.tRNS != null) + { + image.getCOSObject().setItem(COSName.SMASK, + buildTransparencyMaskFromIndexedData(doc, image, state)); + } + + return image; + } + + private static PDImageXObject buildTransparencyMaskFromIndexedData(PDDocument doc, + PDImageXObject image, PNGConverterState state) throws IOException + { + Filter flateDecode = FilterFactory.INSTANCE.getFilter(COSName.FLATE_DECODE); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + COSDictionary decodeParams = buildDecodeParams(state, PDDeviceGray.INSTANCE); + COSDictionary imageDict = new COSDictionary(); + imageDict.setItem(COSName.FILTER, COSName.FLATE_DECODE); + imageDict.setItem(COSName.DECODE_PARMS, decodeParams); + flateDecode.decode(getIDATInputStream(state), outputStream, imageDict, 0); + int length = image.getWidth() * image.getHeight(); + byte[] bytes = new byte[length]; + byte[] transparencyTable = state.tRNS.getData(); + byte[] decodedIDAT = outputStream.toByteArray(); + ImageInputStream iis = new MemoryCacheImageInputStream( + new ByteArrayInputStream(decodedIDAT)); + try + { + int bitsPerComponent = state.bitsPerComponent; + int w = 0; + int neededBits = bitsPerComponent * state.width; + int bitPadding = neededBits % 8; + for (int i = 0; i < bytes.length; i++) + { + int idx = (int) iis.readBits(bitsPerComponent); + if (idx < transparencyTable.length) + { + // Inside the table, use the transparency value + bytes[i] = transparencyTable[idx]; + } + else + { + // Outside the table -> transparent value is 0xFF here. + bytes[i] = (byte) 0xFF; + } + w++; + if (w == state.width) + { + w = 0; + iis.readBits(bitPadding); + } + } + } + finally + { + iis.close(); + } + return LosslessFactory + .prepareImageXObject(doc, bytes, image.getWidth(), image.getHeight(), 8, + PDDeviceGray.INSTANCE); + } + + private static void setupIndexedColorSpace(PDDocument doc, Chunk lookupTable, + PDImageXObject image, int highVal) throws IOException + { + COSArray indexedArray = new COSArray(); + indexedArray.add(COSName.INDEXED); + indexedArray.add(image.getColorSpace()); + ((COSDictionary) image.getCOSObject().getItem(COSName.DECODE_PARMS)) + .setItem(COSName.COLORS, COSInteger.ONE); + + indexedArray.add(COSInteger.get(highVal)); + + PDStream colorTable = new PDStream(doc); + OutputStream colorTableStream = colorTable.createOutputStream(COSName.FLATE_DECODE); + try + { + colorTableStream.write(lookupTable.bytes, lookupTable.start, lookupTable.length); + } + finally + { + colorTableStream.close(); + } + indexedArray.add(colorTable); + + PDIndexed indexed = new PDIndexed(indexedArray); + image.setColorSpace(indexed); + } + + /** + * Build the base image object from the IDATs and profile information + */ + private static PDImageXObject buildImageObject(PDDocument document, PNGConverterState state) + throws IOException + { + InputStream encodedByteStream = getIDATInputStream(state); + + PDColorSpace colorSpace = PDDeviceRGB.INSTANCE; + + PDImageXObject imageXObject = new PDImageXObject(document, encodedByteStream, + COSName.FLATE_DECODE, state.width, state.height, state.bitsPerComponent, + colorSpace); + + COSDictionary decodeParams = buildDecodeParams(state, colorSpace); + imageXObject.getCOSObject().setItem(COSName.DECODE_PARMS, decodeParams); + + // We ignore gAMA and cHRM chunks if we have a ICC profile, as the ICC profile + // takes preference + boolean hasICCColorProfile = state.sRGB != null || state.iCCP != null; + + if (state.gAMA != null && !hasICCColorProfile) + { + if (state.gAMA.length != 4) + { + LOG.error("Invalid gAMA chunk length " + state.gAMA.length); + return null; + } + float gamma = readPNGFloat(state.gAMA.bytes, state.gAMA.start); + // If the gamma is 2.2 for sRGB everything is fine. Otherwise bail out. + // The gamma is stored as 1 / gamma. + if (Math.abs(gamma - (1 / 2.2f)) > 0.00001) + { + LOG.debug(String.format("We can't handle gamma of %f yet.", gamma)); + return null; + } + } + + if (state.sRGB != null) + { + if (state.sRGB.length != 1) + { + LOG.error( + String.format("sRGB chunk has an invalid length of %d", state.sRGB.length)); + return null; + } + + // Store the specified rendering intent + int renderIntent = state.sRGB.bytes[state.sRGB.start]; + COSName value = mapPNGRenderIntent(renderIntent); + imageXObject.getCOSObject().setItem(COSName.INTENT, value); + } + + if (state.cHRM != null && !hasICCColorProfile) + { + if (state.cHRM.length != 32) + { + LOG.error("Invalid cHRM chunk length " + state.cHRM.length); + return null; + } + LOG.debug("We can not handle cHRM chunks yet."); + return null; + } + + // If possible we prefer a ICCBased color profile, just because its way faster + // to decode ... + if (state.iCCP != null || state.sRGB != null) + { + // We have got a color profile, which we must attach + COSStream cosStream = createCOSStreamwithIccProfile(document, colorSpace, state); + if (cosStream == null) + { + return null; + } + COSArray array = new COSArray(); + array.add(COSName.ICCBASED); + array.add(cosStream); + PDICCBased profile = PDICCBased.create(array, null); + imageXObject.setColorSpace(profile); + } + return imageXObject; + } + + private static COSStream createCOSStreamwithIccProfile + (PDDocument document, PDColorSpace colorSpace, PNGConverterState state) throws IOException + { + COSStream cosStream = document.getDocument().createCOSStream(); + cosStream.setInt(COSName.N, colorSpace.getNumberOfComponents()); + cosStream.setItem(COSName.ALTERNATE, colorSpace.getNumberOfComponents() + == 1 ? COSName.DEVICEGRAY : COSName.DEVICERGB); + cosStream.setItem(COSName.FILTER, COSName.FLATE_DECODE); + if (state.iCCP != null) + { + // We need to skip over the name + int iccProfileDataStart = 0; + while (iccProfileDataStart < 80 && iccProfileDataStart < state.iCCP.length) + { + if (state.iCCP.bytes[state.iCCP.start + iccProfileDataStart] == 0) + { + break; + } + iccProfileDataStart++; + } + iccProfileDataStart++; + if (iccProfileDataStart >= state.iCCP.length) + { + LOG.error("Invalid iCCP chunk, to few bytes"); + return null; + } + byte compressionMethod = state.iCCP.bytes[state.iCCP.start + iccProfileDataStart]; + if (compressionMethod != 0) + { + LOG.error(String.format("iCCP chunk: invalid compression method %d", + compressionMethod)); + return null; + } + // Skip over the compression method + iccProfileDataStart++; + OutputStream rawOutputStream = cosStream.createRawOutputStream(); + try + { + rawOutputStream.write(state.iCCP.bytes, state.iCCP.start + iccProfileDataStart, + state.iCCP.length - iccProfileDataStart); + } + finally + { + rawOutputStream.close(); + } + } + else + { + // We tag the image with the sRGB profile + ICC_Profile rgbProfile = ICC_Profile.getInstance(ColorSpace.CS_sRGB); + OutputStream outputStream = cosStream.createOutputStream(); + try + { + outputStream.write(rgbProfile.getData()); + } + finally + { + outputStream.close(); + } + } + return cosStream; + } + + private static COSDictionary buildDecodeParams(PNGConverterState state, PDColorSpace colorSpace) + { + COSDictionary decodeParms = new COSDictionary(); + decodeParms.setItem(COSName.BITS_PER_COMPONENT, COSInteger.get(state.bitsPerComponent)); + decodeParms.setItem(COSName.PREDICTOR, COSInteger.get(15)); + decodeParms.setItem(COSName.COLUMNS, COSInteger.get(state.width)); + decodeParms.setItem(COSName.COLORS, COSInteger.get(colorSpace.getNumberOfComponents())); + return decodeParms; + } + + /** + * Build an input stream for the IDAT data. May need to concat multiple IDAT + * chunks. + * + * @param state the converter state. + * @return a input stream with the IDAT data. + */ + private static InputStream getIDATInputStream(PNGConverterState state) + { + MultipleInputStream inputStream = new MultipleInputStream(); + for (Chunk idat : state.IDATs) + { + inputStream.inputStreams + .add(new ByteArrayInputStream(idat.bytes, idat.start, idat.length)); + } + return inputStream; + } + + private static class MultipleInputStream extends InputStream + { + + List inputStreams = new ArrayList(); + int currentStreamIdx; + InputStream currentStream; + + private boolean ensureStream() + { + if (currentStream == null) + { + if (currentStreamIdx >= inputStreams.size()) + { + return false; + } + currentStream = inputStreams.get(currentStreamIdx++); + } + return true; + } + + @Override + public int read() throws IOException + { + if (!ensureStream()) + { + return -1; + } + int ret = currentStream.read(); + if (ret == -1) + { + currentStream = null; + return read(); + } + return ret; + } + + @Override + public int available() throws IOException + { + if (!ensureStream()) + { + return 0; + } + return 1; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException + { + if (!ensureStream()) + { + return -1; + } + int ret = currentStream.read(b, off, len); + if (ret == -1) + { + currentStream = null; + return read(b, off, len); + } + return ret; + } + } + + /** + * Map the renderIntent int to a PDF render intent. See also + * https://www.w3.org/TR/2003/REC-PNG-20031110/#11sRGB + * + * @param renderIntent the PNG render intent + * @return the matching PDF Render Intent or null + */ + static COSName mapPNGRenderIntent(int renderIntent) + { + COSName value; + switch (renderIntent) + { + case 0: + value = COSName.PERCEPTUAL; + break; + case 1: + value = COSName.RELATIVE_COLORIMETRIC; + break; + case 2: + value = COSName.SATURATION; + break; + case 3: + value = COSName.ABSOLUTE_COLORIMETRIC; + break; + default: + value = null; + break; + } + return value; + } + + /** + * Check if the converter state is sane. + * + * @param state the parsed converter state + * @return true if the state seems plausible + */ + static boolean checkConverterState(PNGConverterState state) + { + if (state == null) + { + return false; + } + if (state.IHDR == null || !checkChunkSane(state.IHDR)) + { + LOG.error("Invalid IHDR chunk."); + return false; + } + if (!checkChunkSane(state.PLTE)) + { + LOG.error("Invalid PLTE chunk."); + return false; + } + if (!checkChunkSane(state.iCCP)) + { + LOG.error("Invalid iCCP chunk."); + return false; + } + if (!checkChunkSane(state.tRNS)) + { + LOG.error("Invalid tRNS chunk."); + return false; + } + if (!checkChunkSane(state.sRGB)) + { + LOG.error("Invalid sRGB chunk."); + return false; + } + if (!checkChunkSane(state.cHRM)) + { + LOG.error("Invalid cHRM chunk."); + return false; + } + if (!checkChunkSane(state.gAMA)) + { + LOG.error("Invalid gAMA chunk."); + return false; + } + + // Check the IDATs + if (state.IDATs.isEmpty()) + { + LOG.error("No IDAT chunks."); + return false; + } + for (Chunk idat : state.IDATs) + { + if (!checkChunkSane(idat)) + { + LOG.error("Invalid IDAT chunk."); + return false; + } + } + return true; + } + + /** + * Check if the chunk is sane, i.e. CRC matches and offsets and lengths in the + * byte array + */ + static boolean checkChunkSane(Chunk chunk) + { + if (chunk == null) + { + // If the chunk does not exist, it can not be wrong... + return true; + } + + if (chunk.start + chunk.length > chunk.bytes.length) + { + return false; + } + + if (chunk.start < 4) + { + return false; + } + + // We must include the chunk type in the CRC calculation + int ourCRC = crc(chunk.bytes, chunk.start - 4, chunk.length + 4); + if (ourCRC != chunk.crc) + { + LOG.error(String.format("Invalid CRC %08X on chunk %08X, expected %08X.", ourCRC, + chunk.chunkType, chunk.crc)); + return false; + } + return true; + } + + /** + * Holds the information about a chunks + */ + static final class Chunk + { + /** + * This field holds the whole byte array; In that it's redundant, as all chunks + * will have the same byte array. But have this byte array per chunk makes it + * easier to validate and pass around. And we won't have that many chunks, so + * those 8 bytes for the pointer (on 64-bit systems) don't matter. + */ + byte[] bytes; + /** + * The chunk type, see the CHUNK_??? constants. + */ + int chunkType; + /** + * The crc of the chunk data, as stored in the PNG stream. + */ + int crc; + /** + * The start index of the chunk data within bytes. + */ + int start; + /** + * The length of the data within the byte array. + */ + int length; + + /** + * Get the data of this chunk as a byte array + * + * @return a byte-array with only the data of the chunk + */ + byte[] getData() + { + return Arrays.copyOfRange(bytes, start, start + length); + } + } + + /** + * Holds all relevant chunks of the PNG + */ + static final class PNGConverterState + { + List IDATs = new ArrayList(); + @SuppressWarnings("SpellCheckingInspection") Chunk IHDR; + @SuppressWarnings("SpellCheckingInspection") Chunk PLTE; + Chunk iCCP; + Chunk tRNS; + Chunk sRGB; + Chunk gAMA; + Chunk cHRM; + + // Parsed header fields + int width; + int height; + int bitsPerComponent; + } + + private static int readInt(byte[] data, int offset) + { + int b1 = (data[offset] & 0xFF) << 24; + int b2 = (data[offset + 1] & 0xFF) << 16; + int b3 = (data[offset + 2] & 0xFF) << 8; + int b4 = (data[offset + 3] & 0xFF); + return b1 | b2 | b3 | b4; + } + + private static float readPNGFloat(byte[] bytes, int offset) + { + int v = readInt(bytes, offset); + return v / 100000f; + } + + /** + * Parse the PNG structure into the PNGConverterState. If we can't handle + * something, this method will return null. + * + * @param imageData the byte array with the PNG data + * @return null or the converter state with all relevant chunks + */ + private static PNGConverterState parsePNGChunks(byte[] imageData) + { + if (imageData.length < 20) + { + LOG.error("ByteArray way to small: " + imageData.length); + return null; + } + + PNGConverterState state = new PNGConverterState(); + int ptr = 8; + int firstChunkType = readInt(imageData, ptr + 4); + + if (firstChunkType != CHUNK_IHDR) + { + LOG.error(String.format("First Chunktype was %08X, not IHDR", firstChunkType)); + return null; + } + + while (ptr + 12 <= imageData.length) + { + int chunkLength = readInt(imageData, ptr); + int chunkType = readInt(imageData, ptr + 4); + ptr += 8; + + if (ptr + chunkLength + 4 > imageData.length) + { + LOG.error("Not enough bytes. At offset " + ptr + " are " + chunkLength + + " bytes expected. Overall length is " + imageData.length); + return null; + } + + Chunk chunk = new Chunk(); + chunk.chunkType = chunkType; + chunk.bytes = imageData; + chunk.start = ptr; + chunk.length = chunkLength; + + switch (chunkType) + { + case CHUNK_IHDR: + if (state.IHDR != null) + { + LOG.error("Two IHDR chunks? There is something wrong."); + return null; + } + state.IHDR = chunk; + break; + case CHUNK_IDAT: + // The image data itself + state.IDATs.add(chunk); + break; + case CHUNK_PLTE: + // For indexed images the palette table + if (state.PLTE != null) + { + LOG.error("Two PLTE chunks? There is something wrong."); + return null; + } + state.PLTE = chunk; + break; + case CHUNK_IEND: + // We are done, return the state + return state; + case CHUNK_TRNS: + // For indexed images the alpha transparency table + if (state.tRNS != null) + { + LOG.error("Two tRNS chunks? There is something wrong."); + return null; + } + state.tRNS = chunk; + break; + case CHUNK_GAMA: + // Gama + state.gAMA = chunk; + break; + case CHUNK_CHRM: + // Chroma + state.cHRM = chunk; + break; + case CHUNK_ICCP: + // ICC Profile + state.iCCP = chunk; + break; + case CHUNK_SBIT: + LOG.debug("Can't convert PNGs with sBIT chunk."); + break; + case CHUNK_SRGB: + // We use the rendering intent from the chunk + state.sRGB = chunk; + break; + case CHUNK_TEXT: + case CHUNK_ZTXT: + case CHUNK_ITXT: + // We don't care about this text infos / metadata + break; + case CHUNK_KBKG: + // As we can handle transparency we don't need the background color information. + break; + case CHUNK_HIST: + // We don't need the color histogram + break; + case CHUNK_PHYS: + // The PDImageXObject will be placed by the user however he wants, + // so we can not enforce the physical dpi information stored here. + // We just ignore it. + break; + case CHUNK_SPLT: + // This palette stuff seems editor related, we don't need it. + break; + case CHUNK_TIME: + // We don't need the last image change time either + break; + default: + LOG.debug(String.format("Unknown chunk type %08X, skipping.", chunkType)); + break; + } + ptr += chunkLength; + + // Read the CRC + chunk.crc = readInt(imageData, ptr); + ptr += 4; + } + LOG.error("No IEND chunk found."); + return null; + } + + /* Make the table for a fast CRC. */ + private static void makeCrcTable() + { + int c; + + for (int n = 0; n < 256; n++) + { + c = n; + for (int k = 0; k < 8; k++) + { + if ((c & 1) != 0) + { + c = 0xEDB88320 ^ (c >>> 1); + } + else + { + c = c >>> 1; + } + } + CRC_TABLE[n] = c; + } + } + + /* + * Update a running CRC with the bytes buf[0..len-1]--the CRC should be + * initialized to all 1's, and the transmitted value is the 1's complement of + * the final running CRC (see the crc() routine below). + */ + private static int updateCrc(byte[] buf, int offset, int len) + { + int c = -1; + int end = offset + len; + for (int n = offset; n < end; n++) + { + c = CRC_TABLE[(c ^ buf[n]) & 0xff] ^ (c >>> 8); + } + return c; + } + + /* Return the CRC of the bytes buf[offset..(offset+len-1)]. */ + static int crc(byte[] buf, int offset, int len) + { + return ~updateCrc(buf, offset, len); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java index 4e89e158e56..20114dc87c5 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java @@ -1,454 +1,824 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.image; - -import java.awt.Graphics2D; -import java.awt.Paint; -import java.awt.Point; -import java.awt.image.BufferedImage; -import java.awt.image.DataBuffer; -import java.awt.image.DataBufferByte; -import java.awt.image.Raster; -import java.awt.image.WritableRaster; -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; -import javax.imageio.stream.ImageInputStream; -import javax.imageio.stream.MemoryCacheImageInputStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSNumber; -import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; -import org.apache.pdfbox.pdmodel.graphics.color.PDIndexed; - -/** - * Reads a sampled image from a PDF file. - * @author John Hewson - */ -final class SampledImageReader -{ - private static final Log LOG = LogFactory.getLog(SampledImageReader.class); - - private SampledImageReader() - { - } - - /** - * Returns an ARGB image filled with the given paint and using the given image as a mask. - * @param paint the paint to fill the visible portions of the image with - * @return a masked image filled with the given paint - * @throws IOException if the image cannot be read - * @throws IllegalStateException if the image is not a stencil. - */ - public static BufferedImage getStencilImage(PDImage pdImage, Paint paint) throws IOException - { - // get mask (this image) - BufferedImage mask = getRGBImage(pdImage, null); - - // compose to ARGB - BufferedImage masked = new BufferedImage(mask.getWidth(), mask.getHeight(), - BufferedImage.TYPE_INT_ARGB); - Graphics2D g = masked.createGraphics(); - - // draw the mask - //g.drawImage(mask, 0, 0, null); - - // fill with paint using src-in - //g.setComposite(AlphaComposite.SrcIn); - g.setPaint(paint); - g.fillRect(0, 0, mask.getWidth(), mask.getHeight()); - g.dispose(); - - // set the alpha - int width = masked.getWidth(); - int height = masked.getHeight(); - WritableRaster raster = masked.getRaster(); - WritableRaster alpha = mask.getRaster(); - - final float[] transparent = new float[4]; - float[] alphaPixel = null; - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - alphaPixel = alpha.getPixel(x, y, alphaPixel); - if (alphaPixel[0] == 255) - { - raster.setPixel(x, y, transparent); - } - } - } - - return masked; - } - - /** - * Returns the content of the given image as an AWT buffered image with an RGB color space. - * If a color key mask is provided then an ARGB image is returned instead. - * This method never returns null. - * @param pdImage the image to read - * @param colorKey an optional color key mask - * @return content of this image as an RGB buffered image - * @throws IOException if the image cannot be read - */ - public static BufferedImage getRGBImage(PDImage pdImage, COSArray colorKey) throws IOException - { - if (pdImage.isEmpty()) - { - throw new IOException("Image stream is empty"); - } - - // get parameters, they must be valid or have been repaired - final PDColorSpace colorSpace = pdImage.getColorSpace(); - final int numComponents = colorSpace.getNumberOfComponents(); - final int width = pdImage.getWidth(); - final int height = pdImage.getHeight(); - final int bitsPerComponent = pdImage.getBitsPerComponent(); - final float[] decode = getDecodeArray(pdImage); - - // - // An AWT raster must use 8/16/32 bits per component. Images with < 8bpc - // will be unpacked into a byte-backed raster. Images with 16bpc will be reduced - // in depth to 8bpc as they will be drawn to TYPE_INT_RGB images anyway. All code - // in PDColorSpace#toRGBImage expects and 8-bit range, i.e. 0-255. - // - WritableRaster raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, width, height, - numComponents, new Point(0, 0)); - - // convert image, faster path for non-decoded, non-colormasked 8-bit images - final float[] defaultDecode = pdImage.getColorSpace().getDefaultDecode(8); - if (bitsPerComponent == 8 && Arrays.equals(decode, defaultDecode) && colorKey == null) - { - return from8bit(pdImage, raster); - } - else if (bitsPerComponent == 1 && colorKey == null) - { - return from1Bit(pdImage, raster); - } - else - { - return fromAny(pdImage, raster, colorKey); - } - } - - private static BufferedImage from1Bit(PDImage pdImage, WritableRaster raster) - throws IOException - { - final PDColorSpace colorSpace = pdImage.getColorSpace(); - final int width = pdImage.getWidth(); - final int height = pdImage.getHeight(); - final float[] decode = getDecodeArray(pdImage); - byte[] output = ((DataBufferByte) raster.getDataBuffer()).getData(); - - // read bit stream - InputStream iis = null; - try - { - // create stream - iis = pdImage.createInputStream(); - final boolean isIndexed = colorSpace instanceof PDIndexed; - - int rowLen = width / 8; - if (width % 8 > 0) - { - rowLen++; - } - - // read stream - byte value0; - byte value1; - if (isIndexed || decode[0] < decode[1]) - { - value0 = 0; - value1 = (byte) 255; - } - else - { - value0 = (byte) 255; - value1 = 0; - } - byte[] buff = new byte[rowLen]; - int idx = 0; - for (int y = 0; y < height; y++) - { - int x = 0; - int readLen = iis.read(buff); - for (int r = 0; r < rowLen && r < readLen; r++) - { - int value = buff[r]; - int mask = 128; - for (int i = 0; i < 8; i++) - { - int bit = value & mask; - mask >>= 1; - output[idx++] = bit == 0 ? value0 : value1; - x++; - if (x == width) - { - break; - } - } - } - if (readLen != rowLen) - { - LOG.warn("premature EOF, image will be incomplete"); - break; - } - } - - // use the color space to convert the image to RGB - BufferedImage rgbImage = colorSpace.toRGBImage(raster); - - return rgbImage; - } - finally - { - if (iis != null) - { - iis.close(); - } - } - } - - // faster, 8-bit non-decoded, non-colormasked image conversion - private static BufferedImage from8bit(PDImage pdImage, WritableRaster raster) - throws IOException - { - InputStream input = pdImage.createInputStream(); - try - { - // get the raster's underlying byte buffer - byte[][] banks = ((DataBufferByte) raster.getDataBuffer()).getBankData(); - final int width = pdImage.getWidth(); - final int height = pdImage.getHeight(); - final int numComponents = pdImage.getColorSpace().getNumberOfComponents(); - int max = width * height; - byte[] tempBytes = new byte[numComponents]; - for (int i = 0; i < max; i++) - { - input.read(tempBytes); - for (int c = 0; c < numComponents; c++) - { - banks[c][i] = tempBytes[0+c]; - } - } - // use the color space to convert the image to RGB - return pdImage.getColorSpace().toRGBImage(raster); - } - finally - { - IOUtils.closeQuietly(input); - } - } - - // slower, general-purpose image conversion from any image format - private static BufferedImage fromAny(PDImage pdImage, WritableRaster raster, COSArray colorKey) - throws IOException - { - final PDColorSpace colorSpace = pdImage.getColorSpace(); - final int numComponents = colorSpace.getNumberOfComponents(); - final int width = pdImage.getWidth(); - final int height = pdImage.getHeight(); - final int bitsPerComponent = pdImage.getBitsPerComponent(); - final float[] decode = getDecodeArray(pdImage); - - // read bit stream - ImageInputStream iis = null; - try - { - // create stream - iis = new MemoryCacheImageInputStream(pdImage.createInputStream()); - final float sampleMax = (float)Math.pow(2, bitsPerComponent) - 1f; - final boolean isIndexed = colorSpace instanceof PDIndexed; - - // init color key mask - float[] colorKeyRanges = null; - BufferedImage colorKeyMask = null; - if (colorKey != null) - { - colorKeyRanges = colorKey.toFloatArray(); - colorKeyMask = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY); - } - - // calculate row padding - int padding = 0; - if (width * numComponents * bitsPerComponent % 8 > 0) - { - padding = 8 - (width * numComponents * bitsPerComponent % 8); - } - - // read stream - byte[] srcColorValues = new byte[numComponents]; - byte[] alpha = new byte[1]; - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - boolean isMasked = true; - for (int c = 0; c < numComponents; c++) - { - int value = (int)iis.readBits(bitsPerComponent); - - // color key mask requires values before they are decoded - if (colorKeyRanges != null) - { - isMasked &= value >= colorKeyRanges[c * 2] && - value <= colorKeyRanges[c * 2 + 1]; - } - - // decode array - final float dMin = decode[c * 2]; - final float dMax = decode[(c * 2) + 1]; - - // interpolate to domain - float output = dMin + (value * ((dMax - dMin) / sampleMax)); - - if (isIndexed) - { - // indexed color spaces get the raw value, because the TYPE_BYTE - // below cannot be reversed by the color space without it having - // knowledge of the number of bits per component - srcColorValues[c] = (byte)Math.round(output); - } - else - { - // interpolate to TYPE_BYTE - int outputByte = Math.round(((output - Math.min(dMin, dMax)) / - Math.abs(dMax - dMin)) * 255f); - - srcColorValues[c] = (byte)outputByte; - } - } - raster.setDataElements(x, y, srcColorValues); - - // set alpha channel in color key mask, if any - if (colorKeyMask != null) - { - alpha[0] = (byte)(isMasked ? 255 : 0); - colorKeyMask.getRaster().setDataElements(x, y, alpha); - } - } - - // rows are padded to the nearest byte - iis.readBits(padding); - } - - // use the color space to convert the image to RGB - BufferedImage rgbImage = colorSpace.toRGBImage(raster); - - // apply color mask, if any - if (colorKeyMask != null) - { - return applyColorKeyMask(rgbImage, colorKeyMask); - } - else - { - return rgbImage; - } - } - finally - { - if (iis != null) - { - iis.close(); - } - } - } - - // color key mask: RGB + Binary -> ARGB - private static BufferedImage applyColorKeyMask(BufferedImage image, BufferedImage mask) - throws IOException - { - int width = image.getWidth(); - int height = image.getHeight(); - - // compose to ARGB - BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); - - WritableRaster src = image.getRaster(); - WritableRaster dest = masked.getRaster(); - WritableRaster alpha = mask.getRaster(); - - float[] rgb = new float[3]; - float[] rgba = new float[4]; - float[] alphaPixel = null; - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - src.getPixel(x, y, rgb); - - rgba[0] = rgb[0]; - rgba[1] = rgb[1]; - rgba[2] = rgb[2]; - alphaPixel = alpha.getPixel(x, y, alphaPixel); - rgba[3] = 255 - alphaPixel[0]; - - dest.setPixel(x, y, rgba); - } - } - - return masked; - } - - // gets decode array from dictionary or returns default - private static float[] getDecodeArray(PDImage pdImage) throws IOException - { - final COSArray cosDecode = pdImage.getDecode(); - float[] decode = null; - - if (cosDecode != null) - { - int numberOfComponents = pdImage.getColorSpace().getNumberOfComponents(); - if (cosDecode.size() != numberOfComponents * 2) - { - if (pdImage.isStencil() && cosDecode.size() >= 2 - && cosDecode.get(0) instanceof COSNumber - && cosDecode.get(1) instanceof COSNumber) - { - float decode0 = ((COSNumber) cosDecode.get(0)).floatValue(); - float decode1 = ((COSNumber) cosDecode.get(1)).floatValue(); - if (decode0 >= 0 && decode0 <= 1 && decode1 >= 0 && decode1 <= 1) - { - LOG.warn("decode array " + cosDecode - + " not compatible with color space, using the first two entries"); - return new float[] - { - decode0, decode1 - }; - } - } - LOG.error("decode array " + cosDecode - + " not compatible with color space, using default"); - } - else - { - decode = cosDecode.toFloatArray(); - } - } - - // use color space default - if (decode == null) - { - return pdImage.getColorSpace().getDefaultDecode(pdImage.getBitsPerComponent()); - } - - return decode; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.image; + +import java.awt.Graphics2D; +import java.awt.Paint; +import java.awt.Point; +import java.awt.Rectangle; +import java.awt.image.BufferedImage; +import java.awt.image.DataBuffer; +import java.awt.image.DataBufferByte; +import java.awt.image.Raster; +import java.awt.image.WritableRaster; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.MemoryCacheImageInputStream; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.filter.DecodeOptions; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.graphics.color.PDIndexed; + +/** + * Reads a sampled image from a PDF file. + * @author John Hewson + */ +final class SampledImageReader +{ + private static final Log LOG = LogFactory.getLog(SampledImageReader.class); + + private SampledImageReader() + { + } + + /** + * Returns an ARGB image filled with the given paint and using the given image as a mask. + * @param paint the paint to fill the visible portions of the image with + * @return a masked image filled with the given paint + * @throws IOException if the image cannot be read + * @throws IllegalStateException if the image is not a stencil. + */ + public static BufferedImage getStencilImage(PDImage pdImage, Paint paint) throws IOException + { + int width = pdImage.getWidth(); + int height = pdImage.getHeight(); + + // compose to ARGB + BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); + Graphics2D g = masked.createGraphics(); + + // draw the mask + //g.drawImage(mask, 0, 0, null); + + // fill with paint using src-in + //g.setComposite(AlphaComposite.SrcIn); + g.setPaint(paint); + g.fillRect(0, 0, width, height); + g.dispose(); + + // set the alpha + WritableRaster raster = masked.getRaster(); + + final int[] transparent = new int[4]; + + // avoid getting a BufferedImage for the mask to lessen memory footprint. + // Such masks are always bpc=1 and have no colorspace, but have a decode. + // (see 8.9.6.2 Stencil Masking) + ImageInputStream iis = null; + try + { + iis = new MemoryCacheImageInputStream(pdImage.createInputStream()); + final float[] decode = getDecodeArray(pdImage); + int value = decode[0] < decode[1] ? 1 : 0; + int rowLen = width / 8; + if (width % 8 > 0) + { + rowLen++; + } + byte[] buff = new byte[rowLen]; + for (int y = 0; y < height; y++) + { + int x = 0; + int readLen = iis.read(buff); + for (int r = 0; r < rowLen && r < readLen; r++) + { + int byteValue = buff[r]; + int mask = 128; + int shift = 7; + for (int i = 0; i < 8; i++) + { + int bit = (byteValue & mask) >> shift; + mask >>= 1; + --shift; + if (bit == value) + { + raster.setPixel(x, y, transparent); + } + x++; + if (x == width) + { + break; + } + } + } + if (readLen != rowLen) + { + LOG.warn("premature EOF, image will be incomplete"); + break; + } + } + } + finally + { + if (iis != null) + { + iis.close(); + } + } + + return masked; + } + + /** + * Returns the content of the given image as an AWT buffered image with an RGB color space. + * If a color key mask is provided then an ARGB image is returned instead. + * This method never returns null. + * @param pdImage the image to read + * @param colorKey an optional color key mask + * @return content of this image as an RGB buffered image + * @throws IOException if the image cannot be read + */ + public static BufferedImage getRGBImage(PDImage pdImage, COSArray colorKey) throws IOException + { + return getRGBImage(pdImage, null, 1, colorKey); + } + + private static Rectangle clipRegion(PDImage pdImage, Rectangle region) + { + if (region == null) + { + return new Rectangle(0, 0, pdImage.getWidth(), pdImage.getHeight()); + } + else + { + int x = Math.max(0, region.x); + int y = Math.max(0, region.y); + int width = Math.min(region.width, pdImage.getWidth() - x); + int height = Math.min(region.height, pdImage.getHeight() - y); + return new Rectangle(x, y, width, height); + } + } + + /** + * Returns the content of the given image as an AWT buffered image with an RGB color space. + * If a color key mask is provided then an ARGB image is returned instead. + * This method never returns null. + * @param pdImage the image to read + * @param region The region of the source image to get, or null if the entire image is needed. + * The actual region will be clipped to the dimensions of the source image. + * @param subsampling The amount of rows and columns to advance for every output pixel, a value + * of 1 meaning every pixel will be read. It must not be larger than the image width or height. + * @param colorKey an optional color key mask + * @return content of this image as an (A)RGB buffered image + * @throws IOException if the image cannot be read + */ + public static BufferedImage getRGBImage(PDImage pdImage, Rectangle region, int subsampling, + COSArray colorKey) throws IOException + { + if (pdImage.isEmpty()) + { + throw new IOException("Image stream is empty"); + } + Rectangle clipped = clipRegion(pdImage, region); + + // get parameters, they must be valid or have been repaired + final PDColorSpace colorSpace = pdImage.getColorSpace(); + final int numComponents = colorSpace.getNumberOfComponents(); + final int width = (int) Math.ceil(clipped.getWidth() / subsampling); + final int height = (int) Math.ceil(clipped.getHeight() / subsampling); + final int bitsPerComponent = pdImage.getBitsPerComponent(); + + if (width <= 0 || height <= 0 || pdImage.getWidth() <= 0 || pdImage.getHeight() <= 0) + { + throw new IOException("image width and height must be positive"); + } + + try + { + if (bitsPerComponent == 1 && colorKey == null && numComponents == 1) + { + return from1Bit(pdImage, clipped, subsampling, width, height); + } + + // An AWT raster must use 8/16/32 bits per component. Images with < 8bpc + // will be unpacked into a byte-backed raster. Images with 16bpc will be reduced + // in depth to 8bpc as they will be drawn to TYPE_INT_RGB images anyway. All code + // in PDColorSpace#toRGBImage expects an 8-bit range, i.e. 0-255. + // Interleaved raster allows chunk-copying for 8-bit images. + WritableRaster raster = Raster.createInterleavedRaster(DataBuffer.TYPE_BYTE, width, height, + numComponents, new Point(0, 0)); + final float[] defaultDecode = pdImage.getColorSpace().getDefaultDecode(8); + final float[] decode = getDecodeArray(pdImage); + if (bitsPerComponent == 8 && colorKey == null && Arrays.equals(decode, defaultDecode)) + { + // convert image, faster path for non-decoded, non-colormasked 8-bit images + return from8bit(pdImage, raster, clipped, subsampling, width, height); + } + return fromAny(pdImage, raster, colorKey, clipped, subsampling, width, height); + } + catch (NegativeArraySizeException ex) + { + throw new IOException(ex); + } + } + + /** + * Extract the raw unconverted raster of the given image + * @param pdImage The image to get the raw raster data from + * @return the raw raster of this image + * @throws IOException + */ + public static WritableRaster getRawRaster(PDImage pdImage) throws IOException + { + if (pdImage.isEmpty()) + { + throw new IOException("Image stream is empty"); + } + + // get parameters, they must be valid or have been repaired + final PDColorSpace colorSpace = pdImage.getColorSpace(); + final int numComponents = colorSpace.getNumberOfComponents(); + final int width = pdImage.getWidth(); + final int height = pdImage.getHeight(); + final int bitsPerComponent = pdImage.getBitsPerComponent(); + + if (width <= 0 || height <= 0) + { + throw new IOException("image width and height must be positive"); + } + + try + { + int dataBufferType = DataBuffer.TYPE_BYTE; + if (bitsPerComponent > 8) + { + dataBufferType = DataBuffer.TYPE_USHORT; + } + WritableRaster raster = Raster.createInterleavedRaster(dataBufferType, width, height, numComponents, + new Point(0, 0)); + readRasterFromAny(pdImage, raster); + return raster; + } + catch (NegativeArraySizeException ex) + { + throw new IOException(ex); + } + } + + private static void readRasterFromAny(PDImage pdImage, WritableRaster raster) + throws IOException + { + final PDColorSpace colorSpace = pdImage.getColorSpace(); + final int numComponents = colorSpace.getNumberOfComponents(); + final int bitsPerComponent = pdImage.getBitsPerComponent(); + final float[] decode = getDecodeArray(pdImage); + DecodeOptions options = new DecodeOptions(); + + // read bit stream + ImageInputStream iis = null; + try + { + iis = new MemoryCacheImageInputStream(pdImage.createInputStream(options)); + + final int inputWidth = pdImage.getWidth(); + final int scanWidth = pdImage.getWidth(); + final int scanHeight = pdImage.getHeight(); + + // create stream + final float sampleMax = (float) Math.pow(2, bitsPerComponent) - 1f; + final boolean isIndexed = colorSpace instanceof PDIndexed; + + // calculate row padding + int padding = 0; + if (inputWidth * numComponents * bitsPerComponent % 8 > 0) + { + padding = 8 - (inputWidth * numComponents * bitsPerComponent % 8); + } + + // read stream + final boolean isShort = raster.getDataBuffer().getDataType() == DataBuffer.TYPE_USHORT; + assert !isIndexed || !isShort; + final byte[] srcColorValuesBytes = isShort ? null : new byte[numComponents]; + final short[] srcColorValuesShort = isShort ? new short[numComponents] : null; + for (int y = 0; y < scanHeight; y++) + { + for (int x = 0; x < scanWidth; x++) + { + for (int c = 0; c < numComponents; c++) + { + int value = (int) iis.readBits(bitsPerComponent); + + // decode array + final float dMin = decode[c * 2]; + final float dMax = decode[(c * 2) + 1]; + + // interpolate to domain + float output = dMin + (value * ((dMax - dMin) / sampleMax)); + + if (isIndexed) + { + // indexed color spaces get the raw value, because the TYPE_BYTE + // below cannot be reversed by the color space without it having + // knowledge of the number of bits per component + srcColorValuesBytes[c] = (byte) Math.round(output); + } + else + { + if (isShort) + { + // interpolate to TYPE_SHORT + int outputShort = Math + .round(((output - Math.min(dMin, dMax)) / Math.abs(dMax - dMin)) * 65535f); + + srcColorValuesShort[c] = (short) outputShort; + } + else + { + // interpolate to TYPE_BYTE + int outputByte = Math + .round(((output - Math.min(dMin, dMax)) / Math.abs(dMax - dMin)) * 255f); + + srcColorValuesBytes[c] = (byte) outputByte; + } + } + } + + if (isShort) + { + raster.setDataElements(x, y, srcColorValuesShort); + } + else + { + raster.setDataElements(x, y, srcColorValuesBytes); + } + } + + // rows are padded to the nearest byte + iis.readBits(padding); + } + } + finally + { + if (iis != null) + { + iis.close(); + } + } + } + + private static BufferedImage from1Bit(PDImage pdImage, Rectangle clipped, final int subsampling, + final int width, final int height) throws IOException + { + int currentSubsampling = subsampling; + final PDColorSpace colorSpace = pdImage.getColorSpace(); + final float[] decode = getDecodeArray(pdImage); + BufferedImage bim = null; + WritableRaster raster; + byte[] output; + + DecodeOptions options = new DecodeOptions(currentSubsampling); + options.setSourceRegion(clipped); + // read bit stream + InputStream iis = null; + try + { + // create stream + iis = pdImage.createInputStream(options); + + final int inputWidth; + final int startx; + final int starty; + final int scanWidth; + final int scanHeight; + if (options.isFilterSubsampled()) + { + // Decode options were honored, and so there is no need for additional clipping or subsampling + inputWidth = width; + startx = 0; + starty = 0; + scanWidth = width; + scanHeight = height; + currentSubsampling = 1; + } + else + { + // Decode options not honored, so we need to clip and subsample ourselves. + inputWidth = pdImage.getWidth(); + startx = clipped.x; + starty = clipped.y; + scanWidth = clipped.width; + scanHeight = clipped.height; + } + if (colorSpace instanceof PDDeviceGray) + { + // TYPE_BYTE_GRAY and not TYPE_BYTE_BINARY because this one is handled + // without conversion to RGB by Graphics.drawImage + // this reduces the memory footprint, only one byte per pixel instead of three. + bim = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY); + raster = bim.getRaster(); + } + else + { + raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, width, height, 1, new Point(0, 0)); + } + output = ((DataBufferByte) raster.getDataBuffer()).getData(); + final boolean isIndexed = colorSpace instanceof PDIndexed; + + int rowLen = inputWidth / 8; + if (inputWidth % 8 > 0) + { + rowLen++; + } + + // read stream + byte value0; + byte value1; + if (isIndexed || decode[0] < decode[1]) + { + value0 = 0; + value1 = (byte) 255; + } + else + { + value0 = (byte) 255; + value1 = 0; + } + byte[] buff = new byte[rowLen]; + int idx = 0; + for (int y = 0; y < starty + scanHeight; y++) + { + int x = 0; + int readLen = (int) IOUtils.populateBuffer(iis, buff); + if (y < starty || y % currentSubsampling > 0) + { + continue; + } + for (int r = 0; r < rowLen && r < readLen; r++) + { + int value = buff[r]; + int mask = 128; + for (int i = 0; i < 8; i++) + { + if (x >= startx + scanWidth) + { + break; + } + int bit = value & mask; + mask >>= 1; + if (x >= startx && x % currentSubsampling == 0) + { + output[idx++] = bit == 0 ? value0 : value1; + } + x++; + } + } + if (readLen != rowLen) + { + LOG.warn("premature EOF, image will be incomplete"); + break; + } + } + + if (bim != null) + { + return bim; + } + + // use the color space to convert the image to RGB + return colorSpace.toRGBImage(raster); + } + finally + { + if (iis != null) + { + iis.close(); + } + } + } + + // faster, 8-bit non-decoded, non-colormasked image conversion + private static BufferedImage from8bit(PDImage pdImage, WritableRaster raster, Rectangle clipped, final int subsampling, + final int width, final int height) throws IOException + { + int currentSubsampling = subsampling; + DecodeOptions options = new DecodeOptions(currentSubsampling); + options.setSourceRegion(clipped); + InputStream input = pdImage.createInputStream(options); + try + { + final int inputWidth; + final int startx; + final int starty; + final int scanWidth; + final int scanHeight; + if (options.isFilterSubsampled()) + { + // Decode options were honored, and so there is no need for additional clipping or subsampling + inputWidth = width; + startx = 0; + starty = 0; + scanWidth = width; + scanHeight = height; + currentSubsampling = 1; + } + else + { + // Decode options not honored, so we need to clip and subsample ourselves. + inputWidth = pdImage.getWidth(); + startx = clipped.x; + starty = clipped.y; + scanWidth = clipped.width; + scanHeight = clipped.height; + } + final int numComponents = pdImage.getColorSpace().getNumberOfComponents(); + // get the raster's underlying byte buffer + byte[] bank = ((DataBufferByte) raster.getDataBuffer()).getData(); + if (startx == 0 && starty == 0 && scanWidth == width && scanHeight == height && currentSubsampling == 1) + { + // we just need to copy all sample data, then convert to RGB image. + long inputResult = IOUtils.populateBuffer(input, bank); + if (inputResult != width * height * (long) numComponents) + { + LOG.debug("Tried reading " + width * height * (long) numComponents + " bytes but only " + inputResult + " bytes read"); + } + return pdImage.getColorSpace().toRGBImage(raster); + } + + // either subsampling is required, or reading only part of the image, so its + // not possible to blindly copy all data. + byte[] tempBytes = new byte[numComponents * inputWidth]; + // compromise between memory and time usage: + // reading the whole image consumes too much memory + // reading one pixel at a time makes it slow in our buffering infrastructure + int i = 0; + for (int y = 0; y < starty + scanHeight; ++y) + { + IOUtils.populateBuffer(input, tempBytes); + if (y < starty || y % currentSubsampling > 0) + { + continue; + } + + if (currentSubsampling == 1) + { + // Not the entire region was requested, but if no subsampling should + // be performed, we can still copy the entire part of this row + System.arraycopy(tempBytes, startx * numComponents, bank, y * inputWidth * numComponents, scanWidth * numComponents); + } + else + { + for (int x = startx; x < startx + scanWidth; x += currentSubsampling) + { + for (int c = 0; c < numComponents; c++) + { + bank[i] = tempBytes[x * numComponents + c]; + ++i; + } + } + } + } + // use the color space to convert the image to RGB + return pdImage.getColorSpace().toRGBImage(raster); + } + finally + { + IOUtils.closeQuietly(input); + } + } + + // slower, general-purpose image conversion from any image format + private static BufferedImage fromAny(PDImage pdImage, WritableRaster raster, COSArray colorKey, Rectangle clipped, + final int subsampling, final int width, final int height) + throws IOException + { + int currentSubsampling = subsampling; + final PDColorSpace colorSpace = pdImage.getColorSpace(); + final int numComponents = colorSpace.getNumberOfComponents(); + final int bitsPerComponent = pdImage.getBitsPerComponent(); + final float[] decode = getDecodeArray(pdImage); + + DecodeOptions options = new DecodeOptions(currentSubsampling); + options.setSourceRegion(clipped); + // read bit stream + ImageInputStream iis = null; + try + { + iis = new MemoryCacheImageInputStream(pdImage.createInputStream(options)); + + final int inputWidth; + final int startx; + final int starty; + final int scanWidth; + final int scanHeight; + if (options.isFilterSubsampled()) + { + // Decode options were honored, and so there is no need for additional clipping or subsampling + inputWidth = width; + startx = 0; + starty = 0; + scanWidth = width; + scanHeight = height; + currentSubsampling = 1; + } + else + { + // Decode options not honored, so we need to clip and subsample ourselves. + inputWidth = pdImage.getWidth(); + startx = clipped.x; + starty = clipped.y; + scanWidth = clipped.width; + scanHeight = clipped.height; + } + // create stream + final float sampleMax = (float) Math.pow(2, bitsPerComponent) - 1f; + final boolean isIndexed = colorSpace instanceof PDIndexed; + + // init color key mask + float[] colorKeyRanges = null; + BufferedImage colorKeyMask = null; + if (colorKey != null) + { + colorKeyRanges = colorKey.toFloatArray(); + colorKeyMask = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY); + } + + // calculate row padding + int padding = 0; + if (inputWidth * numComponents * bitsPerComponent % 8 > 0) + { + padding = 8 - (inputWidth * numComponents * bitsPerComponent % 8); + } + + // read stream + byte[] srcColorValues = new byte[numComponents]; + byte[] alpha = new byte[1]; + for (int y = 0; y < starty + scanHeight; y++) + { + for (int x = 0; x < startx + scanWidth; x++) + { + boolean isMasked = true; + for (int c = 0; c < numComponents; c++) + { + int value = (int)iis.readBits(bitsPerComponent); + + // color key mask requires values before they are decoded + if (colorKeyRanges != null) + { + isMasked &= value >= colorKeyRanges[c * 2] && + value <= colorKeyRanges[c * 2 + 1]; + } + + // decode array + final float dMin = decode[c * 2]; + final float dMax = decode[(c * 2) + 1]; + + // interpolate to domain + float output = dMin + (value * ((dMax - dMin) / sampleMax)); + + if (isIndexed) + { + // indexed color spaces get the raw value, because the TYPE_BYTE + // below cannot be reversed by the color space without it having + // knowledge of the number of bits per component + srcColorValues[c] = (byte)Math.round(output); + } + else + { + // interpolate to TYPE_BYTE + int outputByte = Math.round(((output - Math.min(dMin, dMax)) / + Math.abs(dMax - dMin)) * 255f); + + srcColorValues[c] = (byte)outputByte; + } + } + // only write to output if within requested region and subsample. + if (x >= startx && y >= starty && x % currentSubsampling == 0 && y % currentSubsampling == 0) + { + raster.setDataElements((x - startx) / currentSubsampling, (y - starty) / currentSubsampling, srcColorValues); + + // set alpha channel in color key mask, if any + if (colorKeyMask != null) + { + alpha[0] = (byte)(isMasked ? 255 : 0); + colorKeyMask.getRaster().setDataElements((x - startx) / currentSubsampling, (y - starty) / currentSubsampling, alpha); + } + } + } + + // rows are padded to the nearest byte + iis.readBits(padding); + } + + // use the color space to convert the image to RGB + BufferedImage rgbImage = colorSpace.toRGBImage(raster); + + // apply color mask, if any + if (colorKeyMask != null) + { + return applyColorKeyMask(rgbImage, colorKeyMask); + } + else + { + return rgbImage; + } + } + finally + { + if (iis != null) + { + iis.close(); + } + } + } + + // color key mask: RGB + Binary -> ARGB + private static BufferedImage applyColorKeyMask(BufferedImage image, BufferedImage mask) + { + int width = image.getWidth(); + int height = image.getHeight(); + + // compose to ARGB + BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); + + WritableRaster src = image.getRaster(); + WritableRaster dest = masked.getRaster(); + WritableRaster alpha = mask.getRaster(); + + float[] rgb = new float[3]; + float[] rgba = new float[4]; + float[] alphaPixel = null; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + src.getPixel(x, y, rgb); + + rgba[0] = rgb[0]; + rgba[1] = rgb[1]; + rgba[2] = rgb[2]; + alphaPixel = alpha.getPixel(x, y, alphaPixel); + rgba[3] = 255 - alphaPixel[0]; + + dest.setPixel(x, y, rgba); + } + } + + return masked; + } + + // gets decode array from dictionary or returns default + private static float[] getDecodeArray(PDImage pdImage) throws IOException + { + final COSArray cosDecode = pdImage.getDecode(); + float[] decode = null; + + if (cosDecode != null) + { + int numberOfComponents = pdImage.getColorSpace().getNumberOfComponents(); + if (cosDecode.size() != numberOfComponents * 2) + { + if (pdImage.isStencil() && cosDecode.size() >= 2 + && cosDecode.get(0) instanceof COSNumber + && cosDecode.get(1) instanceof COSNumber) + { + float decode0 = ((COSNumber) cosDecode.get(0)).floatValue(); + float decode1 = ((COSNumber) cosDecode.get(1)).floatValue(); + if (decode0 >= 0 && decode0 <= 1 && decode1 >= 0 && decode1 <= 1) + { + LOG.warn("decode array " + cosDecode + + " not compatible with color space, using the first two entries"); + return new float[] + { + decode0, decode1 + }; + } + } + LOG.error("decode array " + cosDecode + + " not compatible with color space, using default"); + } + else + { + decode = cosDecode.toFloatArray(); + } + } + + // use color space default + if (decode == null) + { + return pdImage.getColorSpace().getDefaultDecode(pdImage.getBitsPerComponent()); + } + + return decode; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/package.html index 0f7b1419015..5828a20a72a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentGroup.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentGroup.java index 2d288b0c7c2..7be9459dea6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentGroup.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentGroup.java @@ -19,6 +19,7 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; +import org.apache.pdfbox.rendering.RenderDestination; /** * An optional content group (OCG). @@ -48,6 +49,51 @@ public PDOptionalContentGroup(COSDictionary dict) "Provided dictionary is not of type '" + COSName.OCG + "'"); } } + + /** + * Enumeration for the renderState dictionary entry on the "Export", "View" + * and "Print" dictionary. + */ + public enum RenderState + { + /** The "ON" value. */ + ON(COSName.ON), + /** The "OFF" value. */ + OFF(COSName.OFF); + + private final COSName name; + + private RenderState(COSName value) + { + this.name = value; + } + + /** + * Returns the base state represented by the given {@link COSName}. + * + * @param state the state name + * @return the state enum value + */ + public static RenderState valueOf(COSName state) + { + if (state == null) + { + return null; + } + + return RenderState.valueOf(state.getName().toUpperCase()); + } + + /** + * Returns the PDF name for the state. + * + * @return the name of the state + */ + public COSName getName() + { + return this.name; + } + } /** * Returns the name of the optional content group. @@ -67,7 +113,36 @@ public void setName(String name) dict.setString(COSName.NAME, name); } - //TODO Add support for "Intent" and "Usage" + //TODO Add support for "Intent" + /** + * @param destination to be rendered + * @return state or null if undefined + */ + public RenderState getRenderState(RenderDestination destination) + { + COSName state = null; + COSDictionary usage = (COSDictionary) dict.getDictionaryObject("Usage"); + if (usage != null) + { + if (RenderDestination.PRINT.equals(destination)) + { + COSDictionary print = (COSDictionary) usage.getDictionaryObject("Print"); + state = print == null ? null : (COSName) print.getDictionaryObject("PrintState"); + } + else if (RenderDestination.VIEW.equals(destination)) + { + COSDictionary view = (COSDictionary) usage.getDictionaryObject("View"); + state = view == null ? null : (COSName) view.getDictionaryObject("ViewState"); + } + // Fallback to export + if (state == null) + { + COSDictionary export = (COSDictionary) usage.getDictionaryObject("Export"); + state = export == null ? null : (COSName) export.getDictionaryObject("ExportState"); + } + } + return state == null ? null : RenderState.valueOf(state); + } @Override public String toString() diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentMembershipDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentMembershipDictionary.java new file mode 100644 index 00000000000..db65025f147 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentMembershipDictionary.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.optionalcontent; + +import java.util.ArrayList; +import java.util.List; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; + +/** + * An optional content membership dictionary (OCMD). + * + * @author Tilman Hausherr + */ +public class PDOptionalContentMembershipDictionary extends PDPropertyList +{ + /** + * Creates a new optional content membership dictionary (OCMD). + */ + public PDOptionalContentMembershipDictionary() + { + this.dict.setItem(COSName.TYPE, COSName.OCMD); + } + + /** + * Creates a new instance based on a given {@link COSDictionary}. + * @param dict the dictionary + */ + public PDOptionalContentMembershipDictionary(COSDictionary dict) + { + super(dict); + if (!dict.getItem(COSName.TYPE).equals(COSName.OCMD)) + { + throw new IllegalArgumentException( + "Provided dictionary is not of type '" + COSName.OCMD + "'"); + } + } + + /** + * Get a list of optional content groups. + * + * @return List of optional content groups, never null. + */ + public List getOCGs() + { + List list = new ArrayList(); + COSBase base = dict.getDictionaryObject(COSName.OCGS); + if (base instanceof COSDictionary) + { + list.add(PDPropertyList.create((COSDictionary) base)); + } + else if (base instanceof COSArray) + { + COSArray ar = (COSArray) base; + for (int i = 0; i < ar.size(); ++i) + { + COSBase elem = ar.getObject(i); + if (elem instanceof COSDictionary) + { + list.add(PDPropertyList.create((COSDictionary) elem)); + } + } + } + return list; + } + + /** + * Set optional content groups as a list. + * + * @param ocgs list of optional content groups to set. + */ + public void setOCGs(List ocgs) + { + COSArray ar = new COSArray(); + for (PDPropertyList prop : ocgs) + { + ar.add(prop); + } + dict.setItem(COSName.OCGS, ar); + } + + /** + * Get the visibility policy name. Valid names are AllOff, AllOn, AnyOff, AnyOn (default). + * + * @return the visibility policy, never null. + */ + public COSName getVisibilityPolicy() + { + return dict.getCOSName(COSName.P, COSName.ANY_ON); + } + + /** + * Sets the visibility policy name. Valid names are AllOff, AllOn, AnyOff, AnyOn (default). + * @param visibilityPolicy + */ + public void setVisibilityPolicy(COSName visibilityPolicy) + { + dict.setItem(COSName.P, visibilityPolicy); + } + + //TODO support /VE some day +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java index 30b2374c1d4..2fa3720e238 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java @@ -16,6 +16,7 @@ */ package org.apache.pdfbox.pdmodel.graphics.optionalcontent; +import java.util.ArrayList; import java.util.Collection; import org.apache.pdfbox.cos.COSArray; @@ -36,7 +37,7 @@ public class PDOptionalContentProperties implements COSObjectable /** * Enumeration for the BaseState dictionary entry on the "D" dictionary. */ - public static enum BaseState + public enum BaseState { /** The "ON" value. */ @@ -87,7 +88,12 @@ public PDOptionalContentProperties() { this.dict = new COSDictionary(); this.dict.setItem(COSName.OCGS, new COSArray()); - this.dict.setItem(COSName.D, new COSDictionary()); + COSDictionary d = new COSDictionary(); + + // Name optional but required for PDF/A-3 + d.setString(COSName.NAME, "Top"); + + this.dict.setItem(COSName.D, d); } /** @@ -101,14 +107,14 @@ public PDOptionalContentProperties(COSDictionary props) /** {@inheritDoc} */ @Override - public COSBase getCOSObject() + public COSDictionary getCOSObject() { return this.dict; } private COSArray getOCGs() { - COSArray ocgs = (COSArray)this.dict.getItem(COSName.OCGS); + COSArray ocgs = this.dict.getCOSArray(COSName.OCGS); if (ocgs == null) { ocgs = new COSArray(); @@ -119,17 +125,26 @@ private COSArray getOCGs() private COSDictionary getD() { - COSDictionary d = (COSDictionary)this.dict.getDictionaryObject(COSName.D); - if (d == null) + COSBase base = this.dict.getDictionaryObject(COSName.D); + if (base instanceof COSDictionary) { - d = new COSDictionary(); - this.dict.setItem(COSName.D, d); //D is required + return (COSDictionary) base; } + + COSDictionary d = new COSDictionary(); + + // Name optional but required for PDF/A-3 + d.setString(COSName.NAME, "Top"); + + // D is required + this.dict.setItem(COSName.D, d); + return d; } /** - * Returns the optional content group of the given name. + * Returns the first optional content group of the given name. + * * @param name the group name * @return the optional content group or null, if there is no such group */ @@ -173,12 +188,11 @@ public void addGroup(PDOptionalContentGroup ocg) */ public Collection getOptionalContentGroups() { - Collection coll = new java.util.ArrayList(); + Collection coll = new ArrayList(); COSArray ocgs = getOCGs(); for (COSBase base : ocgs) { - COSObject obj = (COSObject)base; //Children must be indirect references - coll.add(new PDOptionalContentGroup((COSDictionary)obj.getObject())); + coll.add(new PDOptionalContentGroup(toDictionary(base))); } return coll; } @@ -210,7 +224,11 @@ public void setBaseState(BaseState state) */ public String[] getGroupNames() { - COSArray ocgs = (COSArray)dict.getDictionaryObject(COSName.OCGS); + COSArray ocgs = dict.getCOSArray(COSName.OCGS); + if (ocgs == null) + { + return new String[0]; + } int size = ocgs.size(); String[] groups = new String[size]; for (int i = 0; i < size; i++) @@ -241,47 +259,75 @@ public boolean hasGroup(String groupName) } /** - * Indicates whether an optional content group is enabled. + * Indicates whether at least one optional content group with this name is enabled. + * There may be disabled optional content groups with this name even if this function returns + * true. + * * @param groupName the group name - * @return true if the group is enabled + * @return true if at least one group is enabled */ public boolean isGroupEnabled(String groupName) + { + boolean result = false; + COSArray ocgs = getOCGs(); + for (COSBase o : ocgs) + { + COSDictionary ocg = toDictionary(o); + String name = ocg.getString(COSName.NAME); + if (groupName.equals(name) && isGroupEnabled(new PDOptionalContentGroup(ocg))) + { + result = true; + } + } + return result; + } + + /** + * Indicates whether an optional content group is enabled. + * @param group the group object + * @return true if the group is enabled + */ + public boolean isGroupEnabled(PDOptionalContentGroup group) { //TODO handle Optional Content Configuration Dictionaries, //i.e. OCProperties/Configs + PDOptionalContentProperties.BaseState baseState = getBaseState(); + boolean enabled = !baseState.equals(BaseState.OFF); + //TODO What to do with BaseState.Unchanged? + + if (group == null) + { + return enabled; + } + COSDictionary d = getD(); - COSArray on = (COSArray)d.getDictionaryObject(COSName.ON); - if (on != null) + COSBase base = d.getDictionaryObject(COSName.ON); + if (base instanceof COSArray) { - for (COSBase o : on) + for (COSBase o : (COSArray) base) { - COSDictionary group = toDictionary(o); - String name = group.getString(COSName.NAME); - if (name.equals(groupName)) + COSDictionary dictionary = toDictionary(o); + if (dictionary == group.getCOSObject()) { return true; } } } - COSArray off = (COSArray)d.getDictionaryObject(COSName.OFF); - if (off != null) + base = d.getDictionaryObject(COSName.OFF); + if (base instanceof COSArray) { - for (COSBase o : off) + for (COSBase o : (COSArray) base) { - COSDictionary group = toDictionary(o); - String name = group.getString(COSName.NAME); - if (name.equals(groupName)) + COSDictionary dictionary = toDictionary(o); + if (dictionary == group.getCOSObject()) { return false; } } } - BaseState baseState = getBaseState(); - boolean enabled = !baseState.equals(BaseState.OFF); - //TODO What to do with BaseState.Unchanged? return enabled; } @@ -298,35 +344,69 @@ private COSDictionary toDictionary(COSBase o) } /** - * Enables or disables an optional content group. + * Enables or disables all optional content groups with the given name. + * * @param groupName the group name * @param enable true to enable, false to disable - * @return true if the group already had an on or off setting, false otherwise + * @return true if at least one group with this name already had an on or off setting, false + * otherwise */ public boolean setGroupEnabled(String groupName, boolean enable) { + boolean result = false; + COSArray ocgs = getOCGs(); + for (COSBase o : ocgs) + { + COSDictionary ocg = toDictionary(o); + String name = ocg.getString(COSName.NAME); + if (groupName.equals(name) && setGroupEnabled(new PDOptionalContentGroup(ocg), enable)) + { + result = true; + } + } + return result; + } + + /** + * Enables or disables an optional content group. + * @param group the group object + * @param enable true to enable, false to disable + * @return true if the group already had an on or off setting, false otherwise + */ + public boolean setGroupEnabled(PDOptionalContentGroup group, boolean enable) + { + COSArray on; + COSArray off; + COSDictionary d = getD(); - COSArray on = (COSArray)d.getDictionaryObject(COSName.ON); - if (on == null) + COSBase base = d.getDictionaryObject(COSName.ON); + if (!(base instanceof COSArray)) { on = new COSArray(); d.setItem(COSName.ON, on); } - COSArray off = (COSArray)d.getDictionaryObject(COSName.OFF); - if (off == null) + else + { + on = (COSArray) base; + } + base = d.getDictionaryObject(COSName.OFF); + if (!(base instanceof COSArray)) { off = new COSArray(); d.setItem(COSName.OFF, off); } + else + { + off = (COSArray) base; + } boolean found = false; if (enable) { for (COSBase o : off) { - COSDictionary group = toDictionary(o); - String name = group.getString(COSName.NAME); - if (name.equals(groupName)) + COSDictionary groupDictionary = toDictionary(o); + if (groupDictionary == group.getCOSObject()) { //enable group off.remove(o); @@ -340,9 +420,8 @@ public boolean setGroupEnabled(String groupName, boolean enable) { for (COSBase o : on) { - COSDictionary group = toDictionary(o); - String name = group.getString(COSName.NAME); - if (name.equals(groupName)) + COSDictionary groupDictionary = toDictionary(o); + if (groupDictionary == group.getCOSObject()) { //disable group on.remove(o); @@ -354,18 +433,15 @@ public boolean setGroupEnabled(String groupName, boolean enable) } if (!found) { - PDOptionalContentGroup ocg = getGroup(groupName); if (enable) { - on.add(ocg.getCOSObject()); + on.add(group.getCOSObject()); } else { - off.add(ocg.getCOSObject()); + off.add(group.getCOSObject()); } } return found; } - - } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/package.html index 00daf367d4c..9893e2e9275 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDAbstractPattern.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDAbstractPattern.java index e93f72ad31b..0f1637e0dc5 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDAbstractPattern.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDAbstractPattern.java @@ -23,11 +23,12 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.ResourceCache; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.util.Matrix; /** - * A Pattern dictionary from a page's resources. + * This class wraps a pattern dictionary. Patterns can be found in resource dictionaries. */ public abstract class PDAbstractPattern implements COSObjectable { @@ -39,21 +40,33 @@ public abstract class PDAbstractPattern implements COSObjectable /** * Create the correct PD Model pattern based on the COS base pattern. - * @param resourceDictionary the COS pattern dictionary - * @return the newly created pattern resources object + * @param dictionary the COS pattern dictionary + * @return the newly created pattern object * @throws IOException If we are unable to create the PDPattern object. */ - public static PDAbstractPattern create(COSDictionary resourceDictionary) throws IOException + public static PDAbstractPattern create(COSDictionary dictionary) throws IOException + { + return create(dictionary, null); + } + + /** + * Create the correct PD Model pattern based on the COS base pattern. + * @param dictionary the COS pattern dictionary + * @param resourceCache the resource cache, may be null, useful for tiling patterns. + * @return the newly created pattern object + * @throws IOException If we are unable to create the PDPattern object. + */ + public static PDAbstractPattern create(COSDictionary dictionary, ResourceCache resourceCache) throws IOException { PDAbstractPattern pattern; - int patternType = resourceDictionary.getInt(COSName.PATTERN_TYPE, 0); + int patternType = dictionary.getInt(COSName.PATTERN_TYPE, 0); switch (patternType) { case TYPE_TILING_PATTERN: - pattern = new PDTilingPattern(resourceDictionary); + pattern = new PDTilingPattern(dictionary, resourceCache); break; case TYPE_SHADING_PATTERN: - pattern = new PDShadingPattern(resourceDictionary); + pattern = new PDShadingPattern(dictionary); break; default: throw new IOException("Error: Unknown pattern type " + patternType); @@ -74,16 +87,16 @@ public PDAbstractPattern() /** * Creates a new Pattern dictionary from the given COS dictionary. - * @param resourceDictionary The COSDictionary for this pattern resource. + * @param dictionary The COSDictionary for this pattern. */ - public PDAbstractPattern(COSDictionary resourceDictionary) + public PDAbstractPattern(COSDictionary dictionary) { - patternDictionary = resourceDictionary; + patternDictionary = dictionary; } /** * This will get the underlying dictionary. - * @return The dictionary for these pattern resources. + * @return The dictionary for this pattern. */ @Override public COSDictionary getCOSObject() @@ -129,16 +142,7 @@ public void setPatternType(int patternType) */ public Matrix getMatrix() { - COSArray array = (COSArray)getCOSObject().getDictionaryObject(COSName.MATRIX); - if (array != null) - { - return new Matrix(array); - } - else - { - // default value is the identity matrix - return new Matrix(); - } + return Matrix.createMatrix(getCOSObject().getDictionaryObject(COSName.MATRIX)); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDShadingPattern.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDShadingPattern.java index 99c20138a2c..9f29a016477 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDShadingPattern.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDShadingPattern.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.pdmodel.graphics.pattern; import java.io.IOException; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; @@ -37,7 +38,6 @@ public class PDShadingPattern extends PDAbstractPattern */ public PDShadingPattern() { - super(); getCOSObject().setInt(COSName.PATTERN_TYPE, PDAbstractPattern.TYPE_SHADING_PATTERN); } @@ -64,12 +64,10 @@ public PDExtendedGraphicsState getExtendedGraphicsState() { if (extendedGraphicsState == null) { - COSDictionary dictionary = (COSDictionary)getCOSObject() - .getDictionaryObject(COSName.EXT_G_STATE); - - if( dictionary != null ) + COSBase base = getCOSObject().getDictionaryObject(COSName.EXT_G_STATE); + if (base instanceof COSDictionary) { - extendedGraphicsState = new PDExtendedGraphicsState( dictionary ); + extendedGraphicsState = new PDExtendedGraphicsState((COSDictionary) base); } } return extendedGraphicsState; @@ -92,12 +90,12 @@ public void setExtendedGraphicsState(PDExtendedGraphicsState extendedGraphicsSta */ public PDShading getShading() throws IOException { - if (shading == null) + if (shading == null) { - COSDictionary dictionary = (COSDictionary) getCOSObject().getDictionaryObject(COSName.SHADING); - if( dictionary != null ) + COSBase base = getCOSObject().getDictionaryObject(COSName.SHADING); + if (base instanceof COSDictionary) { - shading = PDShading.create(dictionary); + shading = PDShading.create((COSDictionary) base); } } return shading; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDTilingPattern.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDTilingPattern.java index ce866f8c8fc..6ea1b3359a8 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDTilingPattern.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/PDTilingPattern.java @@ -20,10 +20,12 @@ import java.io.InputStream; import org.apache.pdfbox.contentstream.PDContentStream; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.ResourceCache; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; @@ -42,28 +44,46 @@ public class PDTilingPattern extends PDAbstractPattern implements PDContentStrea /** tiling type 1 = constant spacing.*/ public static final int TILING_CONSTANT_SPACING = 1; - /** tiling type 2 = no distortion. */ + /** tiling type 2 = no distortion. */ public static final int TILING_NO_DISTORTION = 2; /** tiling type 3 = constant spacing and faster tiling. */ public static final int TILING_CONSTANT_SPACING_FASTER_TILING = 3; + private final ResourceCache resourceCache; + /** * Creates a new tiling pattern. */ public PDTilingPattern() { - super(); + super(new COSStream()); + getCOSObject().setName(COSName.TYPE, COSName.PATTERN.getName()); getCOSObject().setInt(COSName.PATTERN_TYPE, PDAbstractPattern.TYPE_TILING_PATTERN); + + // Resources required per PDF specification; when missing, pattern is not displayed in Adobe Reader + setResources(new PDResources()); + resourceCache = null; + } + + /** + * Creates a new tiling pattern from the given COS dictionary. + * @param dictionary The COSDictionary for this pattern. + */ + public PDTilingPattern(COSDictionary dictionary) + { + this(dictionary, null); } /** * Creates a new tiling pattern from the given COS dictionary. - * @param resourceDictionary The COSDictionary for this pattern resource. + * @param dictionary The COSDictionary for this pattern. + * @param resourceCache The resource cache, may be null */ - public PDTilingPattern(COSDictionary resourceDictionary) + public PDTilingPattern(COSDictionary dictionary, ResourceCache resourceCache) { - super(resourceDictionary); + super(dictionary); + this.resourceCache = resourceCache; } @Override @@ -124,9 +144,7 @@ public void setXStep(float xStep) */ public float getXStep() { - // ignores invalid values, see PDFBOX-1094-065514-XStep32767.pdf - float xStep = getCOSObject().getFloat( COSName.X_STEP, 0 ); - return xStep == Short.MAX_VALUE ? 0 : xStep; + return getCOSObject().getFloat(COSName.X_STEP, 0); } /** @@ -144,9 +162,7 @@ public void setYStep(float yStep) */ public float getYStep() { - // ignores invalid values, see PDFBOX-1094-065514-XStep32767.pdf - float yStep = getCOSObject().getFloat( COSName.Y_STEP, 0 ); - return yStep == Short.MAX_VALUE ? 0 : yStep; + return getCOSObject().getFloat(COSName.Y_STEP, 0); } public PDStream getContentStream() @@ -174,10 +190,10 @@ public InputStream getContents() throws IOException public PDResources getResources() { PDResources retval = null; - COSDictionary resources = (COSDictionary) getCOSObject().getDictionaryObject(COSName.RESOURCES); - if( resources != null ) + COSBase base = getCOSObject().getDictionaryObject(COSName.RESOURCES); + if (base instanceof COSDictionary) { - retval = new PDResources( resources ); + retval = new PDResources((COSDictionary) base); } return retval; } @@ -186,7 +202,7 @@ public PDResources getResources() * This will set the resources for this pattern. * @param resources The new resources for this pattern. */ - public void setResources( PDResources resources ) + public final void setResources( PDResources resources ) { getCOSObject().setItem(COSName.RESOURCES, resources); } @@ -202,10 +218,10 @@ public void setResources( PDResources resources ) public PDRectangle getBBox() { PDRectangle retval = null; - COSArray array = (COSArray)getCOSObject().getDictionaryObject( COSName.BBOX ); - if( array != null ) + COSBase base = getCOSObject().getDictionaryObject(COSName.BBOX); + if (base instanceof COSArray) { - retval = new PDRectangle( array ); + retval = new PDRectangle((COSArray) base); } return retval; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/package.html index 7e6dbb1cb69..e0176f9d7f2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/pattern/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/AxialShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/AxialShadingContext.java index 18b21a26e2e..fbdcc8de810 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/AxialShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/AxialShadingContext.java @@ -112,7 +112,8 @@ public AxialShadingContext(PDShadingType2 shading, ColorModel colorModel, Affine } catch (NoninvertibleTransformException ex) { - LOG.error(ex, ex); + LOG.error(ex.getMessage() + ", matrix: " + matrix, ex); + rat = new AffineTransform(); } // shading space -> device space @@ -175,26 +176,16 @@ public Raster getRaster(int x, int y, int w, int h) WritableRaster raster = getColorModel().createCompatibleWritableRaster(w, h); boolean useBackground; int[] data = new int[w * h * 4]; + float[] values = new float[2]; for (int j = 0; j < h; j++) { - double currentY = y + j; - if (bboxRect != null && (currentY < minBBoxY || currentY > maxBBoxY)) - { - continue; - } for (int i = 0; i < w; i++) { - double currentX = x + i; - if (bboxRect != null && (currentX < minBBoxX || currentX > maxBBoxX)) - { - continue; - } useBackground = false; - float[] values = new float[] { x + i, y + j }; + values[0] = x + i; + values[1] = y + j; rat.transform(values, 0, values, 0, 1); - currentX = values[0]; - currentY = values[1]; - double inputValue = x1x0 * (currentX - coords[0]) + y1y0 * (currentY - coords[1]); + double inputValue = x1x0 * (values[0] - coords[0]) + y1y0 * (values[1] - coords[1]); // TODO this happens if start == end, see PDFBOX-1442 if (denom == 0) { @@ -214,7 +205,7 @@ public Raster getRaster(int x, int y, int w, int h) // the shading has to be extended if extend[0] == true if (extend[0]) { - inputValue = 0; + inputValue = domain[0]; } else { @@ -231,7 +222,7 @@ else if (inputValue > 1) // the shading has to be extended if extend[1] == true if (extend[1]) { - inputValue = 1; + inputValue = domain[1]; } else { @@ -245,7 +236,7 @@ else if (inputValue > 1) int value; if (useBackground) { - // use the given backgound color values + // use the given background color values value = getRgbBackground(); } else diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/AxialShadingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/AxialShadingPaint.java index 56ae6d9ce4e..49b69a86ca1 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/AxialShadingPaint.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/AxialShadingPaint.java @@ -17,7 +17,6 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Color; -import java.awt.Paint; import java.awt.PaintContext; import java.awt.Rectangle; import java.awt.RenderingHints; @@ -33,13 +32,10 @@ * AWT Paint for axial shading. * */ -public class AxialShadingPaint implements Paint +public class AxialShadingPaint extends ShadingPaint { private static final Log LOG = LogFactory.getLog(AxialShadingPaint.class); - private final PDShadingType2 shading; - private final Matrix matrix; - /** * Constructor. * @@ -48,8 +44,7 @@ public class AxialShadingPaint implements Paint */ AxialShadingPaint(PDShadingType2 shadingType2, Matrix matrix) { - shading = shadingType2; - this.matrix = matrix; + super(shadingType2, matrix); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/CoonsPatch.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/CoonsPatch.java index 127710e9893..d41c3edfa33 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/CoonsPatch.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/CoonsPatch.java @@ -34,7 +34,7 @@ class CoonsPatch extends Patch */ protected CoonsPatch(Point2D[] points, float[][] color) { - super(points, color); + super(color); controlPoints = reshapeControlPoints(points); level = calcLevel(); listOfTriangles = getTriangles(); @@ -162,7 +162,7 @@ protected Point2D[] getFlag3Edge() /* dividing a patch into a grid, return a matrix of the coordinate and color at the crossing points of the grid, the rule to calculate the coordinate is defined in page 195 of PDF32000_2008.pdf, the rule to calculate the - cooresponding color is bilinear interpolation + corresponding color is bilinear interpolation */ private CoordinateColorPair[][] getPatchCoordinatesColor(CubicBezierCurve c1, CubicBezierCurve c2, CubicBezierCurve d1, CubicBezierCurve d2) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/GouraudShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/GouraudShadingContext.java index 24628735631..e1f9d59e6f6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/GouraudShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/GouraudShadingContext.java @@ -19,17 +19,13 @@ import java.awt.Point; import java.awt.Rectangle; import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; import java.awt.image.ColorModel; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import javax.imageio.stream.ImageInputStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.pdmodel.common.PDRange; + import org.apache.pdfbox.util.Matrix; /** @@ -40,8 +36,6 @@ */ abstract class GouraudShadingContext extends TriangleBasedShadingContext { - private static final Log LOG = LogFactory.getLog(GouraudShadingContext.class); - /** * triangle list. */ @@ -62,44 +56,7 @@ protected GouraudShadingContext(PDShading shading, ColorModel colorModel, Affine super(shading, colorModel, xform, matrix); } - /** - * Read a vertex from the bit input stream performs interpolations. - * - * @param input bit input stream - * @param maxSrcCoord max value for source coordinate (2^bits-1) - * @param maxSrcColor max value for source color (2^bits-1) - * @param rangeX dest range for X - * @param rangeY dest range for Y - * @param colRangeTab dest range array for colors - * @param matrix the pattern matrix concatenated with that of the parent content stream - * @return a new vertex with the flag and the interpolated values - * @throws IOException if something went wrong - */ - protected Vertex readVertex(ImageInputStream input, long maxSrcCoord, long maxSrcColor, - PDRange rangeX, PDRange rangeY, PDRange[] colRangeTab, - Matrix matrix, AffineTransform xform) throws IOException - { - float[] colorComponentTab = new float[numberOfColorComponents]; - long x = input.readBits(bitsPerCoordinate); - long y = input.readBits(bitsPerCoordinate); - float dstX = interpolate(x, maxSrcCoord, rangeX.getMin(), rangeX.getMax()); - float dstY = interpolate(y, maxSrcCoord, rangeY.getMin(), rangeY.getMax()); - LOG.debug("coord: " + String.format("[%06X,%06X] -> [%f,%f]", x, y, dstX, dstY)); - Point2D p = matrix.transformPoint(dstX, dstY); - xform.transform(p, p); - - for (int n = 0; n < numberOfColorComponents; ++n) - { - int color = (int) input.readBits(bitsPerColorComponent); - colorComponentTab[n] = interpolate(color, maxSrcColor, colRangeTab[n].getMin(), - colRangeTab[n].getMax()); - LOG.debug("color[" + n + "]: " + color + "/" + String.format("%02x", color) - + "-> color[" + n + "]: " + colorComponentTab[n]); - } - return new Vertex(p, colorComponentTab); - } - - void setTriangleList(List triangleList) + final void setTriangleList(List triangleList) { this.triangleList = triangleList; } @@ -119,20 +76,6 @@ public void dispose() super.dispose(); } - /** - * Calculate the interpolation, see p.345 pdf spec 1.7. - * - * @param src src value - * @param srcMax max src value (2^bits-1) - * @param dstMin min dst value - * @param dstMax max dst value - * @return interpolated value - */ - private float interpolate(float src, long srcMax, float dstMin, float dstMax) - { - return dstMin + (src * (dstMax - dstMin) / srcMax); - } - @Override protected boolean isDataEmpty() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/IntPoint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/IntPoint.java new file mode 100644 index 00000000000..0db9927d6b9 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/IntPoint.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.shading; + +import java.awt.Point; +import java.awt.geom.Point2D; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Point class with faster hashCode() to speed up the rendering of Gouraud shadings. Should only be + * used for maps or sets when all elements are of this type, because the hashCode() method violates + * its general contract "If two objects are equal according to the equals(Object) method, then + * calling the hashCode method on each of the two objects must produce the same" when IntPoint is + * mixed with Point, because IntPoint(x,y) would have a different hashCode than Point(x,y). + * + * @author Tilman Hausherr + */ +class IntPoint extends Point +{ + private static final Log LOG = LogFactory.getLog(IntPoint.class); + + IntPoint(int x, int y) + { + super(x, y); + } + + @Override + public int hashCode() + { + return 89 * (623 + this.x) + this.y; + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) + { + return true; + } + if (obj == null) + { + return false; + } + if (getClass() != obj.getClass()) + { + if (obj instanceof Point2D) + { + // hitting this branch means that the warning on top of the class wasn't read + LOG.error("IntPoint should not be used together with its base class"); + } + return false; + } + final IntPoint other = (IntPoint) obj; + return this.x == other.x && this.y == other.y; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Line.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Line.java index b4b1aa8ce6a..aa3c563c476 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Line.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Line.java @@ -73,7 +73,7 @@ private Set calcLine(int x0, int y0, int x1, int y1) int err = dx - dy; while (true) { - points.add(new Point(x0, y0)); + points.add(new IntPoint(x0, y0)); if (x0 == x1 && y0 == y1) { break; @@ -102,13 +102,13 @@ private Set calcLine(int x0, int y0, int x1, int y1) */ protected float[] calcColor(Point p) { - int numberOfColorComponents = color0.length; - float[] pc = new float[numberOfColorComponents]; if (point0.x == point1.x && point0.y == point1.y) { return color0; } - else if (point0.x == point1.x) + int numberOfColorComponents = color0.length; + float[] pc = new float[numberOfColorComponents]; + if (point0.x == point1.x) { float l = point1.y - point0.y; for (int i = 0; i < numberOfColorComponents; i++) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDMeshBasedShadingType.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDMeshBasedShadingType.java new file mode 100644 index 00000000000..8eda73e9bc9 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDMeshBasedShadingType.java @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.shading; + +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.io.EOFException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.MemoryCacheImageInputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.pdmodel.common.PDRange; +import org.apache.pdfbox.util.Matrix; + +/** + * Common resources for shading types 6 and 7 + */ +abstract class PDMeshBasedShadingType extends PDShadingType4 +{ + + private static final Log LOG = LogFactory.getLog(PDMeshBasedShadingType.class); + + PDMeshBasedShadingType(COSDictionary shadingDictionary) + { + super(shadingDictionary); + } + + /** + * Create a patch list from a data stream, the returned list contains all the patches contained in the data stream. + * + * @param shadingType the shading type + * @param xform transformation for user to device space + * @param matrix the pattern matrix concatenated with that of the parent content stream + * @param controlPoints number of control points, 12 for type 6 shading and 16 for type 7 shading + * @return the obtained patch list + * @throws IOException when something went wrong + */ + @SuppressWarnings({ "squid:S2583", "squid:S1166" }) + final List collectPatches(AffineTransform xform, Matrix matrix, int controlPoints) + throws IOException + { + COSDictionary dict = getCOSObject(); + if (!(dict instanceof COSStream)) + { + return Collections.emptyList(); + } + PDRange rangeX = getDecodeForParameter(0); + PDRange rangeY = getDecodeForParameter(1); + if (Float.compare(rangeX.getMin(), rangeX.getMax()) == 0 + || Float.compare(rangeY.getMin(), rangeY.getMax()) == 0) + { + return Collections.emptyList(); + } + int bitsPerFlag = getBitsPerFlag(); + PDRange[] colRange = new PDRange[getNumberOfColorComponents()]; + for (int i = 0; i < colRange.length; ++i) + { + colRange[i] = getDecodeForParameter(2 + i); + if (colRange[i] == null) + { + throw new IOException("Range missing in shading /Decode entry"); + } + } + List list = new ArrayList(); + long maxSrcCoord = (long) Math.pow(2, getBitsPerCoordinate()) - 1; + long maxSrcColor = (long) Math.pow(2, getBitsPerComponent()) - 1; + COSStream cosStream = (COSStream) dict; + + ImageInputStream mciis = new MemoryCacheImageInputStream(cosStream.createInputStream()); + try + { + Point2D[] implicitEdge = new Point2D[4]; + float[][] implicitCornerColor = new float[2][colRange.length]; + byte flag = 0; + + try + { + flag = (byte) (mciis.readBits(bitsPerFlag) & 3); + } + catch (EOFException ex) + { + LOG.error(ex); + return list; + } + + boolean eof = false; + while (!eof) + { + try + { + boolean isFree = (flag == 0); + Patch current = readPatch(mciis, isFree, implicitEdge, implicitCornerColor, + maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, matrix, xform, + controlPoints); + if (current == null) + { + break; + } + list.add(current); + flag = (byte) (mciis.readBits(bitsPerFlag) & 3); + switch (flag) + { + case 0: + break; + case 1: + implicitEdge = current.getFlag1Edge(); + implicitCornerColor = current.getFlag1Color(); + break; + case 2: + implicitEdge = current.getFlag2Edge(); + implicitCornerColor = current.getFlag2Color(); + break; + case 3: + implicitEdge = current.getFlag3Edge(); + implicitCornerColor = current.getFlag3Color(); + break; + default: + LOG.warn("bad flag: " + flag); + break; + } + } + catch (EOFException ex) + { + eof = true; + } + } + } + finally + { + mciis.close(); + } + return list; + } + + /** + * Read a single patch from a data stream, a patch contains information of its coordinates and color parameters. + * + * @param input the image source data stream + * @param isFree whether this is a free patch + * @param implicitEdge implicit edge when a patch is not free, otherwise it's not used + * @param implicitCornerColor implicit colors when a patch is not free, otherwise it's not used + * @param maxSrcCoord the maximum coordinate value calculated from source data + * @param maxSrcColor the maximum color value calculated from source data + * @param rangeX range for coordinate x + * @param rangeY range for coordinate y + * @param colRange range for color + * @param matrix the pattern matrix concatenated with that of the parent content stream + * @param xform transformation for user to device space + * @param controlPoints number of control points, 12 for type 6 shading and 16 for type 7 shading + * @return a single patch + * @throws IOException when something went wrong + */ + protected Patch readPatch(ImageInputStream input, boolean isFree, Point2D[] implicitEdge, + float[][] implicitCornerColor, long maxSrcCoord, long maxSrcColor, PDRange rangeX, + PDRange rangeY, PDRange[] colRange, Matrix matrix, AffineTransform xform, + int controlPoints) throws IOException + { + int numberOfColorComponents = getNumberOfColorComponents(); + float[][] color = new float[4][numberOfColorComponents]; + Point2D[] points = new Point2D[controlPoints]; + int pStart = 4; + int cStart = 2; + if (isFree) + { + pStart = 0; + cStart = 0; + } + else + { + points[0] = implicitEdge[0]; + points[1] = implicitEdge[1]; + points[2] = implicitEdge[2]; + points[3] = implicitEdge[3]; + + for (int i = 0; i < numberOfColorComponents; i++) + { + color[0][i] = implicitCornerColor[0][i]; + color[1][i] = implicitCornerColor[1][i]; + } + } + + try + { + for (int i = pStart; i < controlPoints; i++) + { + long x = input.readBits(getBitsPerCoordinate()); + long y = input.readBits(getBitsPerCoordinate()); + float px = interpolate(x, maxSrcCoord, rangeX.getMin(), rangeX.getMax()); + float py = interpolate(y, maxSrcCoord, rangeY.getMin(), rangeY.getMax()); + Point2D p = matrix.transformPoint(px, py); + xform.transform(p, p); + points[i] = p; + } + for (int i = cStart; i < 4; i++) + { + for (int j = 0; j < numberOfColorComponents; j++) + { + long c = input.readBits(getBitsPerComponent()); + color[i][j] = interpolate(c, maxSrcColor, colRange[j].getMin(), + colRange[j].getMax()); + } + } + } + catch (EOFException ex) + { + LOG.debug("EOF", ex); + return null; + } + return generatePatch(points, color); + } + + /** + * Create a patch using control points and 4 corner color values, in Type6ShadingContext, a CoonsPatch is returned; + * in Type6ShadingContext, a TensorPatch is returned. + * + * @param points 12 or 16 control points + * @param color 4 corner colors + * @return a patch instance + */ + abstract Patch generatePatch(Point2D[] points, float[][] color); + + @Override + public abstract Rectangle2D getBounds(AffineTransform xform, Matrix matrix) throws IOException; + + Rectangle2D getBounds(AffineTransform xform, Matrix matrix, int controlPoints) + throws IOException + { + Rectangle2D bounds = null; + for (Patch patch : collectPatches(xform, matrix, controlPoints)) + { + for (ShadedTriangle shadedTriangle : patch.listOfTriangles) + { + if (bounds == null) + { + bounds = new Rectangle2D.Double(shadedTriangle.corner[0].getX(), + shadedTriangle.corner[0].getY(), 0, 0); + } + bounds.add(shadedTriangle.corner[0]); + bounds.add(shadedTriangle.corner[1]); + bounds.add(shadedTriangle.corner[2]); + } + } + return bounds; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShading.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShading.java index 306839d0003..e346d20ce19 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShading.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShading.java @@ -17,6 +17,8 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Paint; +import java.awt.geom.AffineTransform; +import java.awt.geom.Rectangle2D; import java.io.IOException; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; @@ -195,6 +197,19 @@ public void setBBox(PDRectangle newBBox) } } + /** + * Calculate a bounding rectangle around the areas of this shading context. + * + * @param xform + * @param matrix + * @return Bounding rectangle or null, if not supported by this shading type. + * @throws java.io.IOException + */ + public Rectangle2D getBounds(AffineTransform xform, Matrix matrix) throws IOException + { + return null; + } + /** * This will set the AntiAlias value. * @@ -252,36 +267,36 @@ public void setColorSpace(PDColorSpace colorSpace) /** * Create the correct PD Model shading based on the COS base shading. * - * @param resourceDictionary the COS shading dictionary + * @param shadingDictionary the COS shading dictionary * @return the newly created shading resources object * @throws IOException if we are unable to create the PDShading object */ - public static PDShading create(COSDictionary resourceDictionary) throws IOException + public static PDShading create(COSDictionary shadingDictionary) throws IOException { PDShading shading = null; - int shadingType = resourceDictionary.getInt(COSName.SHADING_TYPE, 0); + int shadingType = shadingDictionary.getInt(COSName.SHADING_TYPE, 0); switch (shadingType) { case SHADING_TYPE1: - shading = new PDShadingType1(resourceDictionary); + shading = new PDShadingType1(shadingDictionary); break; case SHADING_TYPE2: - shading = new PDShadingType2(resourceDictionary); + shading = new PDShadingType2(shadingDictionary); break; case SHADING_TYPE3: - shading = new PDShadingType3(resourceDictionary); + shading = new PDShadingType3(shadingDictionary); break; case SHADING_TYPE4: - shading = new PDShadingType4(resourceDictionary); + shading = new PDShadingType4(shadingDictionary); break; case SHADING_TYPE5: - shading = new PDShadingType5(resourceDictionary); + shading = new PDShadingType5(shadingDictionary); break; case SHADING_TYPE6: - shading = new PDShadingType6(resourceDictionary); + shading = new PDShadingType6(shadingDictionary); break; case SHADING_TYPE7: - shading = new PDShadingType7(resourceDictionary); + shading = new PDShadingType7(shadingDictionary); break; default: throw new IOException("Error: Unknown shading type " + shadingType); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType1.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType1.java index e1df9f336b9..e7164f57a4e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType1.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType1.java @@ -55,16 +55,7 @@ public int getShadingType() */ public Matrix getMatrix() { - COSArray array = (COSArray) getCOSObject().getDictionaryObject(COSName.MATRIX); - if (array != null) - { - return new Matrix(array); - } - else - { - // identity matrix is the default - return new Matrix(); - } + return Matrix.createMatrix(getCOSObject().getDictionaryObject(COSName.MATRIX)); } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType4.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType4.java index f739c30a6b5..e8e9aaef201 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType4.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType4.java @@ -17,9 +17,23 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Paint; +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.io.EOFException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.MemoryCacheImageInputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.pdmodel.common.PDRange; import org.apache.pdfbox.util.Matrix; /** @@ -27,6 +41,8 @@ */ public class PDShadingType4 extends PDTriangleBasedShadingType { + private static final Log LOG = LogFactory.getLog(PDShadingType4.class); + /** * Constructor using the given shading dictionary. * @@ -69,4 +85,120 @@ public Paint toPaint(Matrix matrix) { return new Type4ShadingPaint(this, matrix); } + + @SuppressWarnings("squid:S1166") + @Override + List collectTriangles(AffineTransform xform, Matrix matrix) + throws IOException + { + int bitsPerFlag = getBitsPerFlag(); + COSDictionary dict = getCOSObject(); + if (!(dict instanceof COSStream)) + { + return Collections.emptyList(); + } + PDRange rangeX = getDecodeForParameter(0); + PDRange rangeY = getDecodeForParameter(1); + if (Float.compare(rangeX.getMin(), rangeX.getMax()) == 0 || + Float.compare(rangeY.getMin(), rangeY.getMax()) == 0) + { + return Collections.emptyList(); + } + PDRange[] colRange = new PDRange[getNumberOfColorComponents()]; + for (int i = 0; i < colRange.length; ++i) + { + colRange[i] = getDecodeForParameter(2 + i); + } + List list = new ArrayList(); + long maxSrcCoord = (long) Math.pow(2, getBitsPerCoordinate()) - 1; + long maxSrcColor = (long) Math.pow(2, getBitsPerComponent()) - 1; + COSStream stream = (COSStream) dict; + + ImageInputStream mciis = new MemoryCacheImageInputStream(stream.createInputStream()); + try + { + byte flag = (byte) 0; + try + { + flag = (byte) (mciis.readBits(bitsPerFlag) & 3); + } + catch (EOFException ex) + { + LOG.error(ex); + } + + boolean eof = false; + while (!eof) + { + Vertex p0; + Vertex p1; + Vertex p2; + Point2D[] ps; + float[][] cs; + int lastIndex; + try + { + switch (flag) + { + case 0: + p0 = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, + matrix, xform); + flag = (byte) (mciis.readBits(bitsPerFlag) & 3); + if (flag != 0) + { + LOG.error("bad triangle: " + flag); + } + p1 = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, + matrix, xform); + mciis.readBits(bitsPerFlag); + if (flag != 0) + { + LOG.error("bad triangle: " + flag); + } + p2 = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, + matrix, xform); + ps = new Point2D[] { p0.point, p1.point, p2.point }; + cs = new float[][] { p0.color, p1.color, p2.color }; + list.add(new ShadedTriangle(ps, cs)); + flag = (byte) (mciis.readBits(bitsPerFlag) & 3); + break; + case 1: + case 2: + lastIndex = list.size() - 1; + if (lastIndex < 0) + { + LOG.error("broken data stream: " + list.size()); + } + else + { + ShadedTriangle preTri = list.get(lastIndex); + p2 = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, + colRange, matrix, xform); + ps = new Point2D[] { flag == 1 ? preTri.corner[1] : preTri.corner[0], + preTri.corner[2], + p2.point }; + cs = new float[][] { flag == 1 ? preTri.color[1] : preTri.color[0], + preTri.color[2], + p2.color }; + list.add(new ShadedTriangle(ps, cs)); + flag = (byte) (mciis.readBits(bitsPerFlag) & 3); + } + break; + default: + LOG.warn("bad flag: " + flag); + break; + } + } + catch (EOFException ex) + { + eof = true; + } + } + } + finally + { + mciis.close(); + } + return list; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType5.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType5.java index c4a5e46b9bc..99a4cacf9aa 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType5.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType5.java @@ -17,9 +17,21 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Paint; +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.io.EOFException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.MemoryCacheImageInputStream; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.pdmodel.common.PDRange; import org.apache.pdfbox.util.Matrix; /** @@ -69,4 +81,99 @@ public Paint toPaint(Matrix matrix) { return new Type5ShadingPaint(this, matrix); } + + @SuppressWarnings("squid:S1166") + @Override + List collectTriangles(AffineTransform xform, Matrix matrix) throws IOException + { + COSDictionary dict = getCOSObject(); + if (!(dict instanceof COSStream)) + { + return Collections.emptyList(); + } + PDRange rangeX = getDecodeForParameter(0); + PDRange rangeY = getDecodeForParameter(1); + if (Float.compare(rangeX.getMin(), rangeX.getMax()) == 0 || + Float.compare(rangeY.getMin(), rangeY.getMax()) == 0) + { + return Collections.emptyList(); + } + int numPerRow = getVerticesPerRow(); + PDRange[] colRange = new PDRange[getNumberOfColorComponents()]; + for (int i = 0; i < colRange.length; ++i) + { + colRange[i] = getDecodeForParameter(2 + i); + } + List vlist = new ArrayList(); + long maxSrcCoord = (long) Math.pow(2, getBitsPerCoordinate()) - 1; + long maxSrcColor = (long) Math.pow(2, getBitsPerComponent()) - 1; + COSStream cosStream = (COSStream) dict; + + ImageInputStream mciis = new MemoryCacheImageInputStream(cosStream.createInputStream()); + try + { + boolean eof = false; + while (!eof) + { + Vertex p; + try + { + p = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, matrix, xform); + vlist.add(p); + } + catch (EOFException ex) + { + eof = true; + } + } + } + finally + { + mciis.close(); + } + int rowNum = vlist.size() / numPerRow; + Vertex[][] latticeArray = new Vertex[rowNum][numPerRow]; + List list = new ArrayList(); + if (rowNum < 2) + { + // must have at least two rows; if not, return empty list + return list; + } + for (int i = 0; i < rowNum; i++) + { + for (int j = 0; j < numPerRow; j++) + { + latticeArray[i][j] = vlist.get(i * numPerRow + j); + } + } + + Point2D[] ps = new Point2D[3]; // array will be shallow-cloned in ShadedTriangle constructor + float[][] cs = new float[3][]; + for (int i = 0; i < rowNum - 1; i++) + { + for (int j = 0; j < numPerRow - 1; j++) + { + ps[0] = latticeArray[i][j].point; + ps[1] = latticeArray[i][j + 1].point; + ps[2] = latticeArray[i + 1][j].point; + + cs[0] = latticeArray[i][j].color; + cs[1] = latticeArray[i][j + 1].color; + cs[2] = latticeArray[i + 1][j].color; + + list.add(new ShadedTriangle(ps, cs)); + + ps[0] = latticeArray[i][j + 1].point; + ps[1] = latticeArray[i + 1][j].point; + ps[2] = latticeArray[i + 1][j + 1].point; + + cs[0] = latticeArray[i][j + 1].color; + cs[1] = latticeArray[i + 1][j].color; + cs[2] = latticeArray[i + 1][j + 1].color; + + list.add(new ShadedTriangle(ps, cs)); + } + } + return list; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType6.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType6.java index 65be20f0188..2b870ca2a7b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType6.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType6.java @@ -17,6 +17,10 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Paint; +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.io.IOException; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.util.Matrix; @@ -24,7 +28,7 @@ /** * Resources for a shading type 6 (Coons Patch Mesh). */ -public class PDShadingType6 extends PDShadingType4 +public class PDShadingType6 extends PDMeshBasedShadingType { /** * Constructor using the given shading dictionary. @@ -47,4 +51,16 @@ public Paint toPaint(Matrix matrix) { return new Type6ShadingPaint(this, matrix); } + + @Override + protected Patch generatePatch(Point2D[] points, float[][] color) + { + return new CoonsPatch(points, color); + } + + @Override + public Rectangle2D getBounds(AffineTransform xform, Matrix matrix) throws IOException + { + return getBounds(xform, matrix, 12); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType7.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType7.java index 34070a25766..5948cc9901a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType7.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDShadingType7.java @@ -17,6 +17,10 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Paint; +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.io.IOException; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.util.Matrix; @@ -24,7 +28,7 @@ /** * Resources for a shading type 7 (Tensor-Product Patch Mesh). */ -public class PDShadingType7 extends PDShadingType6 +public class PDShadingType7 extends PDMeshBasedShadingType { /** * Constructor using the given shading dictionary. @@ -47,4 +51,16 @@ public Paint toPaint(Matrix matrix) { return new Type7ShadingPaint(this, matrix); } + + @Override + protected Patch generatePatch(Point2D[] points, float[][] color) + { + return new TensorPatch(points, color); + } + + @Override + public Rectangle2D getBounds(AffineTransform xform, Matrix matrix) throws IOException + { + return getBounds(xform, matrix, 16); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDTriangleBasedShadingType.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDTriangleBasedShadingType.java index 73d6eb9d39f..3a4f29efc70 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDTriangleBasedShadingType.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PDTriangleBasedShadingType.java @@ -15,10 +15,21 @@ */ package org.apache.pdfbox.pdmodel.graphics.shading; +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.io.IOException; +import java.util.List; + +import javax.imageio.stream.ImageInputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.PDRange; +import org.apache.pdfbox.util.Matrix; /** * Common resources for shading types 4,5,6 and 7 @@ -30,20 +41,30 @@ abstract class PDTriangleBasedShadingType extends PDShading // value: same as the value of Range private COSArray decode = null; + private static final Log LOG = LogFactory.getLog(PDTriangleBasedShadingType.class); + + private int bitsPerCoordinate = -1; + private int bitsPerColorComponent = -1; + private int numberOfColorComponents = -1; + PDTriangleBasedShadingType(COSDictionary shadingDictionary) { super(shadingDictionary); } /** - * The bits per component of this shading. This will return -1 if one has - * not been set. + * The bits per component of this shading. This will return -1 if one has not been set. * * @return the number of bits per component */ public int getBitsPerComponent() { - return getCOSObject().getInt(COSName.BITS_PER_COMPONENT, -1); + if (bitsPerColorComponent == -1) + { + bitsPerColorComponent = getCOSObject().getInt(COSName.BITS_PER_COMPONENT, -1); + LOG.debug("bitsPerColorComponent: " + bitsPerColorComponent); + } + return bitsPerColorComponent; } /** @@ -54,6 +75,7 @@ public int getBitsPerComponent() public void setBitsPerComponent(int bitsPerComponent) { getCOSObject().setInt(COSName.BITS_PER_COMPONENT, bitsPerComponent); + bitsPerColorComponent = bitsPerComponent; } /** @@ -64,17 +86,39 @@ public void setBitsPerComponent(int bitsPerComponent) */ public int getBitsPerCoordinate() { - return getCOSObject().getInt(COSName.BITS_PER_COORDINATE, -1); + if (bitsPerCoordinate == -1) + { + bitsPerCoordinate = getCOSObject().getInt(COSName.BITS_PER_COORDINATE, -1); + LOG.debug("bitsPerCoordinate: " + (Math.pow(2, bitsPerCoordinate) - 1)); + } + return bitsPerCoordinate; } /** * Set the number of bits per coordinate. * - * @param bitsPerComponent the number of bits per coordinate + * @param bitsPerCoordinate the number of bits per coordinate + */ + public void setBitsPerCoordinate(int bitsPerCoordinate) + { + getCOSObject().setInt(COSName.BITS_PER_COORDINATE, bitsPerCoordinate); + this.bitsPerCoordinate = bitsPerCoordinate; + } + + /** + * The number of color components of this shading. + * + * @return number of color components of this shading */ - public void setBitsPerCoordinate(int bitsPerComponent) + public int getNumberOfColorComponents() throws IOException { - getCOSObject().setInt(COSName.BITS_PER_COORDINATE, bitsPerComponent); + if (numberOfColorComponents == -1) + { + numberOfColorComponents = getFunction() != null ? 1 + : getColorSpace().getNumberOfComponents(); + LOG.debug("numberOfColorComponents: " + numberOfColorComponents); + } + return numberOfColorComponents; } /** @@ -118,5 +162,90 @@ public PDRange getDecodeForParameter(int paramNum) } return retval; } + + /** + * Calculate the interpolation, see p.345 pdf spec 1.7. + * + * @param src src value + * @param srcMax max src value (2^bits-1) + * @param dstMin min dst value + * @param dstMax max dst value + * @return interpolated value + */ + protected float interpolate(float src, long srcMax, float dstMin, float dstMax) + { + return dstMin + (src * (dstMax - dstMin) / srcMax); + } + + /** + * Read a vertex from the bit input stream performs interpolations. + * + * @param input bit input stream + * @param maxSrcCoord max value for source coordinate (2^bits-1) + * @param maxSrcColor max value for source color (2^bits-1) + * @param rangeX dest range for X + * @param rangeY dest range for Y + * @param colRangeTab dest range array for colors + * @param matrix the pattern matrix concatenated with that of the parent content stream + * @return a new vertex with the flag and the interpolated values + * @throws IOException if something went wrong + */ + protected Vertex readVertex(ImageInputStream input, long maxSrcCoord, long maxSrcColor, + PDRange rangeX, PDRange rangeY, PDRange[] colRangeTab, + Matrix matrix, AffineTransform xform) throws IOException + { + float[] colorComponentTab = new float[numberOfColorComponents]; + long x = input.readBits(bitsPerCoordinate); + long y = input.readBits(bitsPerCoordinate); + float dstX = interpolate(x, maxSrcCoord, rangeX.getMin(), rangeX.getMax()); + float dstY = interpolate(y, maxSrcCoord, rangeY.getMin(), rangeY.getMax()); + LOG.debug("coord: " + String.format("[%06X,%06X] -> [%f,%f]", x, y, dstX, dstY)); + Point2D p = matrix.transformPoint(dstX, dstY); + xform.transform(p, p); + for (int n = 0; n < numberOfColorComponents; ++n) + { + int color = (int) input.readBits(bitsPerColorComponent); + colorComponentTab[n] = interpolate(color, maxSrcColor, colRangeTab[n].getMin(), + colRangeTab[n].getMax()); + LOG.debug("color[" + n + "]: " + color + "/" + String.format("%02x", color) + + "-> color[" + n + "]: " + colorComponentTab[n]); + } + + // "Each set of vertex data shall occupy a whole number of bytes. + // If the total number of bits required is not divisible by 8, the last data byte + // for each vertex is padded at the end with extra bits, which shall be ignored." + int bitOffset = input.getBitOffset(); + if (bitOffset != 0) + { + input.readBits(8 - bitOffset); + } + + return new Vertex(p, colorComponentTab); + } + + abstract List collectTriangles(AffineTransform xform, Matrix matrix) throws IOException; + + @Override + public Rectangle2D getBounds(AffineTransform xform, Matrix matrix) throws IOException + { + Rectangle2D bounds = null; + for (ShadedTriangle shadedTriangle : collectTriangles(xform, matrix)) + { + if (bounds == null) + { + bounds = new Rectangle2D.Double(shadedTriangle.corner[0].getX(), + shadedTriangle.corner[0].getY(), 0, 0); + } + bounds.add(shadedTriangle.corner[0]); + bounds.add(shadedTriangle.corner[1]); + bounds.add(shadedTriangle.corner[2]); + } + if (bounds == null) + { + // Speeds up files where triangles are empty, e.g. ghostscript file 690425 + return new Rectangle2D.Float(); + } + return bounds; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Patch.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Patch.java index 10e0033b7a0..39bf4a454f4 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Patch.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Patch.java @@ -41,11 +41,9 @@ abstract class Patch /** * Constructor of Patch. * - * @param ctl control points, size is 12 (for type 6 shading) or 16 (for - * type 7 shading) * @param color 4 corner's colors */ - Patch(Point2D[] ctl, float[][] color) + Patch(float[][] color) { cornerColor = color.clone(); } @@ -181,8 +179,10 @@ protected List getShadedTriangles(CoordinateColorPair[][] patchC { for (int j = 1; j < szU; j++) { - Point2D p0 = patchCC[i - 1][j - 1].coordinate, p1 = patchCC[i - 1][j].coordinate, p2 = patchCC[i][j].coordinate, - p3 = patchCC[i][j - 1].coordinate; + Point2D p0 = patchCC[i - 1][j - 1].coordinate; + Point2D p1 = patchCC[i - 1][j].coordinate; + Point2D p2 = patchCC[i][j].coordinate; + Point2D p3 = patchCC[i][j - 1].coordinate; boolean ll = true; if (overlaps(p0, p1) || overlaps(p0, p3)) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PatchMeshesShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PatchMeshesShadingContext.java index 46eafda831f..4608a3a7baa 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PatchMeshesShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/PatchMeshesShadingContext.java @@ -18,21 +18,12 @@ import java.awt.Point; import java.awt.Rectangle; import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; import java.awt.image.ColorModel; -import java.io.EOFException; import java.io.IOException; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import javax.imageio.stream.ImageInputStream; -import javax.imageio.stream.MemoryCacheImageInputStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.pdmodel.common.PDRange; + import org.apache.pdfbox.util.Matrix; /** @@ -43,12 +34,11 @@ */ abstract class PatchMeshesShadingContext extends TriangleBasedShadingContext { - private static final Log LOG = LogFactory.getLog(PatchMeshesShadingContext.class); /** * patch list */ - private List patchList = new ArrayList(); + private List patchList; /** * Constructor creates an instance to be used for fill operations. @@ -61,201 +51,15 @@ abstract class PatchMeshesShadingContext extends TriangleBasedShadingContext * @param controlPoints number of control points, 12 for type 6 shading and 16 for type 7 shading * @throws IOException if something went wrong */ - protected PatchMeshesShadingContext(PDShadingType6 shading, ColorModel colorModel, + protected PatchMeshesShadingContext(PDMeshBasedShadingType shading, ColorModel colorModel, AffineTransform xform, Matrix matrix, Rectangle deviceBounds, int controlPoints) throws IOException { super(shading, colorModel, xform, matrix); - patchList = collectPatches(shading, xform, matrix, controlPoints); + patchList = shading.collectPatches(xform, matrix, controlPoints); createPixelTable(deviceBounds); } - /** - * Create a patch list from a data stream, the returned list contains all the patches contained - * in the data stream. - * - * @param shadingType the shading type - * @param xform transformation for user to device space - * @param matrix the pattern matrix concatenated with that of the parent content stream - * @param controlPoints number of control points, 12 for type 6 shading and 16 for type 7 shading - * @return the obtained patch list - * @throws IOException when something went wrong - */ - final List collectPatches(PDShadingType6 shadingType, AffineTransform xform, - Matrix matrix, int controlPoints) throws IOException - { - COSDictionary dict = shadingType.getCOSObject(); - int bitsPerFlag = shadingType.getBitsPerFlag(); - PDRange rangeX = shadingType.getDecodeForParameter(0); - PDRange rangeY = shadingType.getDecodeForParameter(1); - PDRange[] colRange = new PDRange[numberOfColorComponents]; - for (int i = 0; i < numberOfColorComponents; ++i) - { - colRange[i] = shadingType.getDecodeForParameter(2 + i); - } - List list = new ArrayList(); - long maxSrcCoord = (long) Math.pow(2, bitsPerCoordinate) - 1; - long maxSrcColor = (long) Math.pow(2, bitsPerColorComponent) - 1; - COSStream cosStream = (COSStream) dict; - - ImageInputStream mciis = new MemoryCacheImageInputStream(cosStream.createInputStream()); - try - { - Point2D[] implicitEdge = new Point2D[4]; - float[][] implicitCornerColor = new float[2][numberOfColorComponents]; - byte flag = 0; - - try - { - flag = (byte) (mciis.readBits(bitsPerFlag) & 3); - } - catch (EOFException ex) - { - LOG.error(ex); - } - - while (true) - { - try - { - boolean isFree = (flag == 0); - Patch current = readPatch(mciis, isFree, implicitEdge, implicitCornerColor, - maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, matrix, xform, controlPoints); - if (current == null) - { - break; - } - list.add(current); - flag = (byte) (mciis.readBits(bitsPerFlag) & 3); - switch (flag) - { - case 0: - break; - case 1: - implicitEdge = current.getFlag1Edge(); - implicitCornerColor = current.getFlag1Color(); - break; - case 2: - implicitEdge = current.getFlag2Edge(); - implicitCornerColor = current.getFlag2Color(); - break; - case 3: - implicitEdge = current.getFlag3Edge(); - implicitCornerColor = current.getFlag3Color(); - break; - default: - LOG.warn("bad flag: " + flag); - break; - } - } - catch (EOFException ex) - { - break; - } - } - } - finally - { - mciis.close(); - } - return list; - } - - /** - * Read a single patch from a data stream, a patch contains information of its coordinates and - * color parameters. - * - * @param input the image source data stream - * @param isFree whether this is a free patch - * @param implicitEdge implicit edge when a patch is not free, otherwise it's not used - * @param implicitCornerColor implicit colors when a patch is not free, otherwise it's not used - * @param maxSrcCoord the maximum coordinate value calculated from source data - * @param maxSrcColor the maximum color value calculated from source data - * @param rangeX range for coordinate x - * @param rangeY range for coordinate y - * @param colRange range for color - * @param matrix the pattern matrix concatenated with that of the parent content stream - * @param xform transformation for user to device space - * @param controlPoints number of control points, 12 for type 6 shading and 16 for type 7 shading - * @return a single patch - * @throws IOException when something went wrong - */ - protected Patch readPatch(ImageInputStream input, boolean isFree, Point2D[] implicitEdge, - float[][] implicitCornerColor, long maxSrcCoord, long maxSrcColor, - PDRange rangeX, PDRange rangeY, PDRange[] colRange, Matrix matrix, - AffineTransform xform, int controlPoints) throws IOException - { - float[][] color = new float[4][numberOfColorComponents]; - Point2D[] points = new Point2D[controlPoints]; - int pStart = 4, cStart = 2; - if (isFree) - { - pStart = 0; - cStart = 0; - } - else - { - points[0] = implicitEdge[0]; - points[1] = implicitEdge[1]; - points[2] = implicitEdge[2]; - points[3] = implicitEdge[3]; - - for (int i = 0; i < numberOfColorComponents; i++) - { - color[0][i] = implicitCornerColor[0][i]; - color[1][i] = implicitCornerColor[1][i]; - } - } - - try - { - for (int i = pStart; i < controlPoints; i++) - { - long x = input.readBits(bitsPerCoordinate); - long y = input.readBits(bitsPerCoordinate); - float px = interpolate(x, maxSrcCoord, rangeX.getMin(), rangeX.getMax()); - float py = interpolate(y, maxSrcCoord, rangeY.getMin(), rangeY.getMax()); - Point2D p = matrix.transformPoint(px, py); - xform.transform(p, p); - points[i] = p; - } - for (int i = cStart; i < 4; i++) - { - for (int j = 0; j < numberOfColorComponents; j++) - { - long c = input.readBits(bitsPerColorComponent); - color[i][j] = interpolate(c, maxSrcColor, colRange[j].getMin(), - colRange[j].getMax()); - } - } - } - catch (EOFException ex) - { - LOG.debug("EOF"); - return null; - } - return generatePatch(points, color); - } - - /** - * Create a patch using control points and 4 corner color values, in - * Type6ShadingContext, a CoonsPatch is returned; in Type6ShadingContext, a - * TensorPatch is returned. - * - * @param points 12 or 16 control points - * @param color 4 corner colors - * @return a patch instance - */ - abstract Patch generatePatch(Point2D[] points, float[][] color); - - /** - * Get a point coordinate on a line by linear interpolation. - */ - private float interpolate(float x, long maxValue, float rangeMin, float rangeMax) - { - return rangeMin + (x / maxValue) * (rangeMax - rangeMin); - } - @Override protected Map calcPixelTable(Rectangle deviceBounds) throws IOException { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/RadialShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/RadialShadingContext.java index 0b37f0df2ce..eb261d36d16 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/RadialShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/RadialShadingContext.java @@ -118,7 +118,8 @@ public RadialShadingContext(PDShadingType3 shading, ColorModel colorModel, } catch (NoninvertibleTransformException ex) { - LOG.error(ex, ex); + LOG.error(ex.getMessage() + ", matrix: " + matrix, ex); + rat = new AffineTransform(); } // shading space -> device space @@ -181,28 +182,16 @@ public Raster getRaster(int x, int y, int w, int h) float inputValue = -1; boolean useBackground; int[] data = new int[w * h * 4]; + float[] values = new float[2]; for (int j = 0; j < h; j++) { - double currentY = y + j; - if (bboxRect != null && (currentY < minBBoxY || currentY > maxBBoxY)) - { - continue; - } for (int i = 0; i < w; i++) { - double currentX = x + i; - if (bboxRect != null && (currentX < minBBoxX || currentX > maxBBoxX)) - { - continue; - } - - float[] values = new float[] { x + i, y + j }; + values[0] = x + i; + values[1] = y + j; rat.transform(values, 0, values, 0, 1); - currentX = values[0]; - currentY = values[1]; - useBackground = false; - float[] inputValues = calculateInputValues(currentX, currentY); + float[] inputValues = calculateInputValues(values[0], values[1]); if (Float.isNaN(inputValues[0]) && Float.isNaN(inputValues[1])) { if (getBackground() == null) @@ -298,7 +287,7 @@ else if (inputValue < 0) int value; if (useBackground) { - // use the given backgound color values + // use the given background color values value = getRgbBackground(); } else diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/RadialShadingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/RadialShadingPaint.java index 87dcdde81c5..cc8e05803b1 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/RadialShadingPaint.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/RadialShadingPaint.java @@ -17,7 +17,6 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Color; -import java.awt.Paint; import java.awt.PaintContext; import java.awt.Rectangle; import java.awt.RenderingHints; @@ -34,13 +33,10 @@ * AWT Paint for radial shading. * */ -public class RadialShadingPaint implements Paint +public class RadialShadingPaint extends ShadingPaint { private static final Log LOG = LogFactory.getLog(RadialShadingPaint.class); - private final PDShadingType3 shading; - private final Matrix matrix; - /** * Constructor. * @@ -49,8 +45,7 @@ public class RadialShadingPaint implements Paint */ RadialShadingPaint(PDShadingType3 shading, Matrix matrix) { - this.shading = shading; - this.matrix = matrix; + super(shading, matrix); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadedTriangle.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadedTriangle.java index 5910120c435..9bde9d1a41f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadedTriangle.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadedTriangle.java @@ -52,12 +52,13 @@ class ShadedTriangle /** * Constructor. * - * @param p an array of the 3 vertices of a triangle + * @param p an array of the 3 vertices of a triangle; the Point2D objects should not be modified + * by the caller. * @param c an array of color corresponding the vertex array p */ ShadedTriangle(Point2D[] p, float[][] c) { - corner = p.clone(); + corner = p.clone(); // yes this is a shallow clone color = c.clone(); area = getArea(p[0], p[1], p[2]); degree = calcDeg(p); @@ -157,7 +158,7 @@ public boolean contains(Point2D p) { if (degree == 1) { - return overlaps(corner[0], p) | overlaps(corner[1], p) | overlaps(corner[2], p); + return overlaps(corner[0], p) || overlaps(corner[1], p) || overlaps(corner[2], p); } else if (degree == 2) { @@ -216,7 +217,7 @@ private double edgeEquationValue(Point2D p, Point2D p1, Point2D p2) - (p2.getX() - p1.getX()) * (p.getY() - p1.getY()); } - // calcuate the area of a triangle + // calculate the area of a triangle private double getArea(Point2D a, Point2D b, Point2D c) { return Math.abs((c.getX() - b.getX()) * (c.getY() - a.getY()) @@ -234,30 +235,29 @@ public float[] calcColor(Point2D p) int numberOfColorComponents = color[0].length; float[] pCol = new float[numberOfColorComponents]; - if (degree == 1) - { - for (int i = 0; i < numberOfColorComponents; i++) - { - // average - pCol[i] = (color[0][i] + color[1][i] + color[2][i]) / 3.0f; - } - } - else if (degree == 2) + switch (degree) { - // linear interpolation - Point tp = new Point((int) Math.round(p.getX()), (int) Math.round(p.getY())); - return line.calcColor(tp); - } - else - { - float aw = (float) (getArea(p, corner[1], corner[2]) / area); - float bw = (float) (getArea(p, corner[2], corner[0]) / area); - float cw = (float) (getArea(p, corner[0], corner[1]) / area); - for (int i = 0; i < numberOfColorComponents; i++) - { - // barycentric interpolation - pCol[i] = color[0][i] * aw + color[1][i] * bw + color[2][i] * cw; - } + case 1: + for (int i = 0; i < numberOfColorComponents; i++) + { + // average + pCol[i] = (color[0][i] + color[1][i] + color[2][i]) / 3.0f; + } + break; + case 2: + // linear interpolation + Point tp = new Point((int) Math.round(p.getX()), (int) Math.round(p.getY())); + return line.calcColor(tp); + default: + float aw = (float) (getArea(p, corner[1], corner[2]) / area); + float bw = (float) (getArea(p, corner[2], corner[0]) / area); + float cw = (float) (getArea(p, corner[0], corner[1]) / area); + for (int i = 0; i < numberOfColorComponents; i++) + { + // barycentric interpolation + pCol[i] = color[0][i] * aw + color[1][i] * bw + color[2][i] * cw; + } + break; } return pCol; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadingContext.java index 8535a38b950..7700309ba06 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadingContext.java @@ -22,10 +22,7 @@ import java.awt.image.ComponentColorModel; import java.awt.image.DataBuffer; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; import org.apache.pdfbox.util.Matrix; @@ -37,11 +34,6 @@ */ public abstract class ShadingContext { - private static final Log LOG = LogFactory.getLog(ShadingContext.class); - - protected PDRectangle bboxRect; - protected float minBBoxX, minBBoxY, maxBBoxX, maxBBoxY; - private float[] background; private int rgbBackground; private final PDShading shading; @@ -69,12 +61,6 @@ public ShadingContext(PDShading shading, ColorModel cm, AffineTransform xform, outputColorModel = new ComponentColorModel(outputCS, true, false, Transparency.TRANSLUCENT, DataBuffer.TYPE_BYTE); - bboxRect = shading.getBBox(); - if (bboxRect != null) - { - transformBBox(matrix, xform); - } - // get background values if available COSArray bg = shading.getBackground(); if (bg != null) @@ -103,29 +89,6 @@ int getRgbBackground() { return rgbBackground; } - - private void transformBBox(Matrix matrix, AffineTransform xform) - { - float[] bboxTab = new float[4]; - bboxTab[0] = bboxRect.getLowerLeftX(); - bboxTab[1] = bboxRect.getLowerLeftY(); - bboxTab[2] = bboxRect.getUpperRightX(); - bboxTab[3] = bboxRect.getUpperRightY(); - - // transform the coords using the given matrix - matrix.createAffineTransform().transform(bboxTab, 0, bboxTab, 0, 2); - - xform.transform(bboxTab, 0, bboxTab, 0, 2); - minBBoxX = Math.min(bboxTab[0], bboxTab[2]); - minBBoxY = Math.min(bboxTab[1], bboxTab[3]); - maxBBoxX = Math.max(bboxTab[0], bboxTab[2]); - maxBBoxY = Math.max(bboxTab[1], bboxTab[3]); - if (minBBoxX >= maxBBoxX || minBBoxY >= maxBBoxY) - { - LOG.warn("empty BBox is ignored"); - bboxRect = null; - } - } /** * Convert color values from shading colorspace to RGB color values encoded diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadingPaint.java new file mode 100644 index 00000000000..342f38d73b5 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/ShadingPaint.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.shading; + +import java.awt.Paint; + +import org.apache.pdfbox.util.Matrix; + +/** + * This is base class for all PDShading-Paints to allow other low level libraries access to the + * shading source data. One user of this interface is the PdfBoxGraphics2D-adapter. + * + * @param the actual PDShading class. + */ +public abstract class ShadingPaint implements Paint +{ + protected final T shading; + protected final Matrix matrix; + + ShadingPaint(T shading, Matrix matrix) + { + this.shading = shading; + this.matrix = matrix; + } + + /** + * @return the PDShading of this paint + */ + public T getShading() + { + return shading; + } + + /** + * @return the active Matrix of this paint + */ + public Matrix getMatrix() + { + return matrix; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/TensorPatch.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/TensorPatch.java index c4138a7b0ac..10009fb0b62 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/TensorPatch.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/TensorPatch.java @@ -29,12 +29,12 @@ class TensorPatch extends Patch /** * Constructor of a patch for type 7 shading. * - * @param points 16 control points + * @param tcp 16 control points * @param color 4 corner colors */ protected TensorPatch(Point2D[] tcp, float[][] color) { - super(tcp, color); + super(color); controlPoints = reshapeControlPoints(tcp); level = calcLevel(); listOfTriangles = getTriangles(); @@ -206,7 +206,7 @@ protected Point2D[] getFlag3Edge() /* dividing a patch into a grid according to level, then calculate the coordinate and color of each crossing point in the grid, the rule to calculate the coordinate is tensor-product which - is defined in page 119 of PDF32000_2008.pdf, the method to calculate the cooresponding color is + is defined in page 119 of PDF32000_2008.pdf, the method to calculate the corresponding color is bilinear interpolation */ private CoordinateColorPair[][] getPatchCoordinatesColor() diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/TriangleBasedShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/TriangleBasedShadingContext.java index 5d051c35316..6da4f44c520 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/TriangleBasedShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/TriangleBasedShadingContext.java @@ -25,8 +25,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; + import org.apache.pdfbox.util.Matrix; /** @@ -38,14 +37,6 @@ */ abstract class TriangleBasedShadingContext extends ShadingContext implements PaintContext { - private static final Log LOG = LogFactory.getLog(TriangleBasedShadingContext.class); - - protected int bitsPerCoordinate; - protected int bitsPerColorComponent; - protected int numberOfColorComponents; - - private final boolean hasFunction; - // map of pixels within triangles to their RGB color private Map pixelTable; @@ -62,14 +53,6 @@ abstract class TriangleBasedShadingContext extends ShadingContext implements Pai Matrix matrix) throws IOException { super(shading, cm, xform, matrix); - PDTriangleBasedShadingType triangleBasedShadingType = (PDTriangleBasedShadingType) shading; - hasFunction = shading.getFunction() != null; - bitsPerCoordinate = triangleBasedShadingType.getBitsPerCoordinate(); - LOG.debug("bitsPerCoordinate: " + (Math.pow(2, bitsPerCoordinate) - 1)); - bitsPerColorComponent = triangleBasedShadingType.getBitsPerComponent(); - LOG.debug("bitsPerColorComponent: " + bitsPerColorComponent); - numberOfColorComponents = hasFunction ? 1 : getShadingColorSpace().getNumberOfComponents(); - LOG.debug("numberOfColorComponents: " + numberOfColorComponents); } /** @@ -115,13 +98,37 @@ protected void calcPixelTable(List triangleList, Map triangleList, Map maxBBoxY)) - { - continue; - } for (int col = 0; col < w; col++) { - int currentX = x + col; - if (bboxRect != null && (currentX < minBBoxX || currentX > maxBBoxX)) - { - continue; - } - Point p = new Point(currentX, currentY); + Point p = new IntPoint(x + col, y + row); int value; - if (pixelTable.containsKey(p)) + Integer v = pixelTable.get(p); + if (v != null) { - value = pixelTable.get(p); + value = v; } else { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type1ShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type1ShadingContext.java index 76c337e788d..77da75ae46c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type1ShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type1ShadingContext.java @@ -77,7 +77,8 @@ class Type1ShadingContext extends ShadingContext implements PaintContext } catch (NoninvertibleTransformException ex) { - LOG.error(ex, ex); + LOG.error(ex.getMessage() + ", matrix: " + matrix, ex); + rat = new AffineTransform(); } } @@ -100,25 +101,15 @@ public Raster getRaster(int x, int y, int w, int h) { WritableRaster raster = getColorModel().createCompatibleWritableRaster(w, h); int[] data = new int[w * h * 4]; + float[] values = new float[2]; for (int j = 0; j < h; j++) { - int currentY = y + j; - if (bboxRect != null && (currentY < minBBoxY || currentY > maxBBoxY)) - { - continue; - } - for (int i = 0; i < w; i++) { - int currentX = x + i; - if (bboxRect != null && (currentX < minBBoxX || currentX > maxBBoxX)) - { - continue; - } - int index = (j * w + i) * 4; boolean useBackground = false; - float[] values = new float[] { x + i, y + j }; + values[0] = x + i; + values[1] = y + j; rat.transform(values, 0, values, 0, 1); if (values[0] < domain[0] || values[0] > domain[1] || values[1] < domain[2] || values[1] > domain[3]) @@ -131,19 +122,21 @@ public Raster getRaster(int x, int y, int w, int h) } // evaluate function + float[] tmpValues; // "values" can't be reused due to different length if (useBackground) { - values = getBackground(); + tmpValues = getBackground(); } else { try { - values = type1ShadingType.evalFunction(values); + tmpValues = type1ShadingType.evalFunction(values); } catch (IOException e) { LOG.error("error while processing a function", e); + continue; } } @@ -153,16 +146,17 @@ public Raster getRaster(int x, int y, int w, int h) { try { - values = shadingColorSpace.toRGB(values); + tmpValues = shadingColorSpace.toRGB(tmpValues); } catch (IOException e) { LOG.error("error processing color space", e); + continue; } } - data[index] = (int) (values[0] * 255); - data[index + 1] = (int) (values[1] * 255); - data[index + 2] = (int) (values[2] * 255); + data[index] = (int) (tmpValues[0] * 255); + data[index + 1] = (int) (tmpValues[1] * 255); + data[index + 2] = (int) (tmpValues[2] * 255); data[index + 3] = 255; } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type1ShadingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type1ShadingPaint.java index 891b605a6f2..e8bb4e08d6a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type1ShadingPaint.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type1ShadingPaint.java @@ -17,7 +17,6 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Color; -import java.awt.Paint; import java.awt.PaintContext; import java.awt.Rectangle; import java.awt.RenderingHints; @@ -32,13 +31,10 @@ /** * AWT PaintContext for function-based (Type 1) shading. */ -class Type1ShadingPaint implements Paint +class Type1ShadingPaint extends ShadingPaint { private static final Log LOG = LogFactory.getLog(Type1ShadingPaint.class); - private final PDShadingType1 shading; - private final Matrix matrix; - /** * Constructor. * @@ -47,8 +43,7 @@ class Type1ShadingPaint implements Paint */ Type1ShadingPaint(PDShadingType1 shading, Matrix matrix) { - this.shading = shading; - this.matrix = matrix; + super(shading, matrix); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type4ShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type4ShadingContext.java index 41e1ae1cc3c..465d7e5c3fa 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type4ShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type4ShadingContext.java @@ -18,19 +18,11 @@ import java.awt.Rectangle; import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; import java.awt.image.ColorModel; -import java.io.EOFException; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import javax.imageio.stream.ImageInputStream; -import javax.imageio.stream.MemoryCacheImageInputStream; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.pdmodel.common.PDRange; import org.apache.pdfbox.util.Matrix; /** @@ -61,108 +53,7 @@ class Type4ShadingContext extends GouraudShadingContext bitsPerFlag = shading.getBitsPerFlag(); //TODO handle cases where bitperflag isn't 8 LOG.debug("bitsPerFlag: " + bitsPerFlag); - setTriangleList(collectTriangles(shading, xform, matrix)); + setTriangleList(shading.collectTriangles(xform, matrix)); createPixelTable(deviceBounds); } - - private List collectTriangles(PDShadingType4 freeTriangleShadingType, AffineTransform xform, Matrix matrix) - throws IOException - { - COSDictionary dict = freeTriangleShadingType.getCOSObject(); - PDRange rangeX = freeTriangleShadingType.getDecodeForParameter(0); - PDRange rangeY = freeTriangleShadingType.getDecodeForParameter(1); - PDRange[] colRange = new PDRange[numberOfColorComponents]; - for (int i = 0; i < numberOfColorComponents; ++i) - { - colRange[i] = freeTriangleShadingType.getDecodeForParameter(2 + i); - } - List list = new ArrayList(); - long maxSrcCoord = (long) Math.pow(2, bitsPerCoordinate) - 1; - long maxSrcColor = (long) Math.pow(2, bitsPerColorComponent) - 1; - COSStream stream = (COSStream) dict; - - ImageInputStream mciis = new MemoryCacheImageInputStream(stream.createInputStream()); - try - { - byte flag = (byte) 0; - try - { - flag = (byte) (mciis.readBits(bitsPerFlag) & 3); - } - catch (EOFException ex) - { - LOG.error(ex); - } - - while (true) - { - Vertex p0, p1, p2; - Point2D[] ps; - float[][] cs; - int lastIndex; - try - { - switch (flag) - { - case 0: - p0 = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, - matrix, xform); - flag = (byte) (mciis.readBits(bitsPerFlag) & 3); - if (flag != 0) - { - LOG.error("bad triangle: " + flag); - } - p1 = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, - matrix, xform); - mciis.readBits(bitsPerFlag); - if (flag != 0) - { - LOG.error("bad triangle: " + flag); - } - p2 = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, - matrix, xform); - ps = new Point2D[] { p0.point, p1.point, p2.point }; - cs = new float[][] { p0.color, p1.color, p2.color }; - list.add(new ShadedTriangle(ps, cs)); - flag = (byte) (mciis.readBits(bitsPerFlag) & 3); - break; - case 1: - case 2: - lastIndex = list.size() - 1; - if (lastIndex < 0) - { - LOG.error("broken data stream: " + list.size()); - } - else - { - ShadedTriangle preTri = list.get(lastIndex); - p2 = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, - colRange, matrix, xform); - ps = new Point2D[] { flag == 1 ? preTri.corner[1] : preTri.corner[0], - preTri.corner[2], - p2.point }; - cs = new float[][] { flag == 1 ? preTri.color[1] : preTri.color[0], - preTri.color[2], - p2.color }; - list.add(new ShadedTriangle(ps, cs)); - flag = (byte) (mciis.readBits(bitsPerFlag) & 3); - } - break; - default: - LOG.warn("bad flag: " + flag); - break; - } - } - catch (EOFException ex) - { - break; - } - } - } - finally - { - mciis.close(); - } - return list; - } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type4ShadingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type4ShadingPaint.java index 1c236e6d302..151c024c188 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type4ShadingPaint.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type4ShadingPaint.java @@ -32,13 +32,10 @@ /** * AWT PaintContext for Gouraud Triangle Mesh (Type 4) shading. */ -class Type4ShadingPaint implements Paint +class Type4ShadingPaint extends ShadingPaint { private static final Log LOG = LogFactory.getLog(Type4ShadingPaint.class); - private final PDShadingType4 shading; - private final Matrix matrix; - /** * Constructor. * @@ -47,8 +44,7 @@ class Type4ShadingPaint implements Paint */ Type4ShadingPaint(PDShadingType4 shading, Matrix matrix) { - this.shading = shading; - this.matrix = matrix; + super(shading, matrix); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type5ShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type5ShadingContext.java index 1286266dcb1..b61ac3efc03 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type5ShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type5ShadingContext.java @@ -18,19 +18,11 @@ import java.awt.Rectangle; import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; import java.awt.image.ColorModel; -import java.io.EOFException; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import javax.imageio.stream.ImageInputStream; -import javax.imageio.stream.MemoryCacheImageInputStream; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.pdmodel.common.PDRange; import org.apache.pdfbox.util.Matrix; /** @@ -59,94 +51,7 @@ class Type5ShadingContext extends GouraudShadingContext LOG.debug("Type5ShadingContext"); - setTriangleList(collectTriangles(shading, xform, matrix)); + setTriangleList(shading.collectTriangles(xform, matrix)); createPixelTable(deviceBounds); } - - private List collectTriangles(PDShadingType5 latticeTriangleShadingType, - AffineTransform xform, Matrix matrix) throws IOException - { - COSDictionary cosDictionary = latticeTriangleShadingType.getCOSObject(); - PDRange rangeX = latticeTriangleShadingType.getDecodeForParameter(0); - PDRange rangeY = latticeTriangleShadingType.getDecodeForParameter(1); - int numPerRow = latticeTriangleShadingType.getVerticesPerRow(); - PDRange[] colRange = new PDRange[numberOfColorComponents]; - for (int i = 0; i < numberOfColorComponents; ++i) - { - colRange[i] = latticeTriangleShadingType.getDecodeForParameter(2 + i); - } - List vlist = new ArrayList(); - long maxSrcCoord = (long) Math.pow(2, bitsPerCoordinate) - 1; - long maxSrcColor = (long) Math.pow(2, bitsPerColorComponent) - 1; - COSStream cosStream = (COSStream) cosDictionary; - - ImageInputStream mciis = new MemoryCacheImageInputStream(cosStream.createInputStream()); - try - { - while (true) - { - Vertex p; - try - { - p = readVertex(mciis, maxSrcCoord, maxSrcColor, rangeX, rangeY, colRange, matrix, xform); - vlist.add(p); - } - catch (EOFException ex) - { - break; - } - } - } - finally - { - mciis.close(); - } - int sz = vlist.size(), rowNum = sz / numPerRow; - Vertex[][] latticeArray = new Vertex[rowNum][numPerRow]; - List list = new ArrayList(); - if (rowNum < 2) - { - // must have at least two rows; if not, return empty list - return list; - } - for (int i = 0; i < rowNum; i++) - { - for (int j = 0; j < numPerRow; j++) - { - latticeArray[i][j] = vlist.get(i * numPerRow + j); - } - } - - for (int i = 0; i < rowNum - 1; i++) - { - for (int j = 0; j < numPerRow - 1; j++) - { - Point2D[] ps = new Point2D[] { - latticeArray[i][j].point, - latticeArray[i][j + 1].point, - latticeArray[i + 1][j].point }; - - float[][] cs = new float[][] { - latticeArray[i][j].color, - latticeArray[i][j + 1].color, - latticeArray[i + 1][j].color }; - - list.add(new ShadedTriangle(ps, cs)); - - ps = new Point2D[] { - latticeArray[i][j + 1].point, - latticeArray[i + 1][j].point, - latticeArray[i + 1][j + 1].point }; - - cs = new float[][]{ - latticeArray[i][j + 1].color, - latticeArray[i + 1][j].color, - latticeArray[i + 1][j + 1].color }; - - list.add(new ShadedTriangle(ps, cs)); - } - } - return list; - } - } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type5ShadingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type5ShadingPaint.java index 64a4021b6ac..2c9d62c3f64 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type5ShadingPaint.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type5ShadingPaint.java @@ -17,7 +17,6 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Color; -import java.awt.Paint; import java.awt.PaintContext; import java.awt.Rectangle; import java.awt.RenderingHints; @@ -32,13 +31,10 @@ /** * AWT Paint for Gouraud Triangle Lattice (Type 5) shading. */ -class Type5ShadingPaint implements Paint +class Type5ShadingPaint extends ShadingPaint { private static final Log LOG = LogFactory.getLog(Type5ShadingPaint.class); - private final PDShadingType5 shading; - private final Matrix matrix; - /** * Constructor. * @@ -47,8 +43,7 @@ class Type5ShadingPaint implements Paint */ Type5ShadingPaint(PDShadingType5 shading, Matrix matrix) { - this.shading = shading; - this.matrix = matrix; + super(shading, matrix); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type6ShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type6ShadingContext.java index c6e802cad38..8556c4ad58d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type6ShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type6ShadingContext.java @@ -17,7 +17,6 @@ import java.awt.Rectangle; import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; import java.awt.image.ColorModel; import java.io.IOException; import org.apache.pdfbox.util.Matrix; @@ -45,10 +44,4 @@ class Type6ShadingContext extends PatchMeshesShadingContext { super(shading, colorModel, xform, matrix, deviceBounds, 12); } - - @Override - protected Patch generatePatch(Point2D[] points, float[][] color) - { - return new CoonsPatch(points, color); - } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type6ShadingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type6ShadingPaint.java index 5cd8dbfb4ae..d023eb67691 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type6ShadingPaint.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type6ShadingPaint.java @@ -16,7 +16,6 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Color; -import java.awt.Paint; import java.awt.PaintContext; import java.awt.Rectangle; import java.awt.RenderingHints; @@ -34,13 +33,10 @@ * * @author Shaola Ren */ -class Type6ShadingPaint implements Paint +class Type6ShadingPaint extends ShadingPaint { private static final Log LOG = LogFactory.getLog(Type6ShadingPaint.class); - private final PDShadingType6 shading; - private final Matrix matrix; - /** * Constructor. * @@ -49,8 +45,7 @@ class Type6ShadingPaint implements Paint */ Type6ShadingPaint(PDShadingType6 shading, Matrix matrix) { - this.shading = shading; - this.matrix = matrix; + super(shading, matrix); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type7ShadingContext.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type7ShadingContext.java index 2215e26985f..8939dd2130a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type7ShadingContext.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type7ShadingContext.java @@ -17,7 +17,6 @@ import java.awt.Rectangle; import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; import java.awt.image.ColorModel; import java.io.IOException; import org.apache.pdfbox.util.Matrix; @@ -45,10 +44,4 @@ class Type7ShadingContext extends PatchMeshesShadingContext { super(shading, colorModel, xform, matrix, deviceBounds, 16); } - - @Override - protected Patch generatePatch(Point2D[] points, float[][] color) - { - return new TensorPatch(points, color); - } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type7ShadingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type7ShadingPaint.java index 64a2fda6068..974436931fb 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type7ShadingPaint.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/Type7ShadingPaint.java @@ -16,7 +16,6 @@ package org.apache.pdfbox.pdmodel.graphics.shading; import java.awt.Color; -import java.awt.Paint; import java.awt.PaintContext; import java.awt.Rectangle; import java.awt.RenderingHints; @@ -34,13 +33,10 @@ * * @author Shaola Ren */ -class Type7ShadingPaint implements Paint +class Type7ShadingPaint extends ShadingPaint { private static final Log LOG = LogFactory.getLog(Type7ShadingPaint.class); - private final PDShadingType7 shading; - private final Matrix matrix; - /** * Constructor. * @@ -49,8 +45,7 @@ class Type7ShadingPaint implements Paint */ Type7ShadingPaint(PDShadingType7 shading, Matrix matrix) { - this.shading = shading; - this.matrix = matrix; + super(shading, matrix); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/package.html index be4744cf695..c483f8d4e85 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/shading/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDExtendedGraphicsState.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDExtendedGraphicsState.java index 0c435ea9a82..05b51d36f02 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDExtendedGraphicsState.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDExtendedGraphicsState.java @@ -70,7 +70,7 @@ public void copyIntoGraphicsState( PDGraphicsState gs ) throws IOException { if( key.equals( COSName.LW ) ) { - gs.setLineWidth( getLineWidth() ); + gs.setLineWidth( defaultIfNull( getLineWidth(), 1 ) ); } else if( key.equals( COSName.LC ) ) { @@ -82,7 +82,7 @@ else if( key.equals( COSName.LJ ) ) } else if( key.equals( COSName.ML ) ) { - gs.setMiterLimit( getMiterLimit() ); + gs.setMiterLimit( defaultIfNull( getMiterLimit(), 10 ) ); } else if( key.equals( COSName.D ) ) { @@ -94,7 +94,15 @@ else if( key.equals( COSName.RI ) ) } else if( key.equals( COSName.OPM ) ) { - gs.setOverprintMode( getOverprintMode().doubleValue() ); + gs.setOverprintMode( defaultIfNull( getOverprintMode(), 0 ) ); + } + else if( key.equals( COSName.OP ) ) + { + gs.setOverprint( getStrokingOverprintControl()); + } + else if( key.equals( COSName.OP_NS ) ) + { + gs.setNonStrokingOverprint(getNonStrokingOverprintControl()); } else if( key.equals( COSName.FONT ) ) { @@ -107,11 +115,11 @@ else if( key.equals( COSName.FONT ) ) } else if( key.equals( COSName.FL ) ) { - gs.setFlatness( getFlatnessTolerance() ); + gs.setFlatness( defaultIfNull( getFlatnessTolerance(), 1.0f ) ); } else if( key.equals( COSName.SM ) ) { - gs.setSmoothness( getSmoothnessTolerance() ); + gs.setSmoothness( defaultIfNull( getSmoothnessTolerance(), 0 ) ); } else if( key.equals( COSName.SA ) ) { @@ -119,11 +127,11 @@ else if( key.equals( COSName.SA ) ) } else if( key.equals( COSName.CA ) ) { - gs.setAlphaConstant(getStrokingAlphaConstant()); + gs.setAlphaConstant( defaultIfNull( getStrokingAlphaConstant(), 1.0f ) ); } else if( key.equals( COSName.CA_NS ) ) { - gs.setNonStrokeAlphaConstants(getNonStrokingAlphaConstant() ); + gs.setNonStrokeAlphaConstant( defaultIfNull( getNonStrokingAlphaConstant(), 1.0f ) ); } else if( key.equals( COSName.AIS ) ) { @@ -135,7 +143,14 @@ else if( key.equals( COSName.TK ) ) } else if( key.equals( COSName.SMASK ) ) { - gs.setSoftMask(getSoftMask()); + PDSoftMask softmask = getSoftMask(); + if (softmask != null) + { + // Softmask must know the CTM at the time the ExtGState is activated. Read + // https://bugs.ghostscript.com/show_bug.cgi?id=691157#c7 for a good explanation. + softmask.setInitialTransformationMatrix(gs.getCurrentTransformationMatrix().clone()); + } + gs.setSoftMask(softmask); } else if( key.equals( COSName.BM ) ) { @@ -158,6 +173,20 @@ else if (key.equals(COSName.TR2)) } } + /** + * Returns the provided default value in case 'standard' value is null. To be used + * in cases unboxing may lead to a NPE. + * + * @param standardValue 'standard' value + * @param defaultValue default value + * + * @return 'standard' value if not null otherwise default value + */ + private float defaultIfNull(Float standardValue, float defaultValue) + { + return standardValue != null ? standardValue : defaultValue; + } + /** * This will get the underlying dictionary that this class acts on. * @@ -258,15 +287,15 @@ public void setMiterLimit( Float miterLimit ) public PDLineDashPattern getLineDashPattern() { PDLineDashPattern retval = null; - COSArray dp = (COSArray) dict.getDictionaryObject( COSName.D ); - if( dp != null ) + COSBase dp = dict.getDictionaryObject( COSName.D ); + if( dp instanceof COSArray && ((COSArray)dp).size() == 2) { - COSArray array = new COSArray(); - dp.addAll(dp); - dp.remove(dp.size() - 1); - int phase = dp.getInt(dp.size() - 1); - - retval = new PDLineDashPattern( array, phase ); + COSBase dashArray = ((COSArray)dp).getObject(0); + COSBase phase = ((COSArray)dp).getObject(1); + if (dashArray instanceof COSArray && phase instanceof COSNumber) + { + retval = new PDLineDashPattern((COSArray) dashArray, ((COSNumber) phase).intValue()); + } } return retval; } @@ -498,7 +527,9 @@ public void setNonStrokingAlphaConstant( Float alpha ) } /** - * This will get the alpha source flag. + * This will get the alpha source flag (“alpha is shape”), that specifies whether the current + * soft mask and alpha constant shall be interpreted as shape values (true) or opacity values + * (false). * * @return The alpha source flag. */ @@ -508,7 +539,9 @@ public boolean getAlphaSourceFlag() } /** - * This will get the alpha source flag. + * This will get the alpha source flag (“alpha is shape”), that specifies whether the current + * soft mask and alpha constant shall be interpreted as shape values (true) or opacity values + * (false). * * @param alpha The alpha source flag. */ @@ -527,13 +560,27 @@ public BlendMode getBlendMode() return BlendMode.getInstance(dict.getDictionaryObject(COSName.BM)); } + /** + * Set the blending mode. + * + * @param bm + */ + public void setBlendMode(BlendMode bm) + { + dict.setItem(COSName.BM, BlendMode.getCOSName(bm)); + } + /** * Returns the soft mask stored in the COS dictionary * - * @return the soft mask + * @return the soft mask or null if there isn't any. */ public PDSoftMask getSoftMask() { + if (!dict.containsKey(COSName.SMASK)) + { + return null; + } return PDSoftMask.create(dict.getDictionaryObject(COSName.SMASK)); } @@ -569,9 +616,10 @@ public void setTextKnockoutFlag( boolean tk ) private Float getFloatItem( COSName key ) { Float retval = null; - COSNumber value = (COSNumber) dict.getDictionaryObject( key ); - if( value != null ) + COSBase base = dict.getDictionaryObject(key); + if (base instanceof COSNumber) { + COSNumber value = (COSNumber) base; retval = value.floatValue(); } return retval; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java index abf415b9fa8..d6ee2391cb4 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java @@ -61,6 +61,7 @@ public class PDGraphicsState implements Cloneable // DEVICE-DEPENDENT parameters private boolean overprint = false; + private boolean nonStrokingOverprint = false; private double overprintMode = 0; //black generation //undercolor removal @@ -199,9 +200,9 @@ public void setStrokeAdjustment(boolean value) } /** - * Get the value of the stroke alpha constants property. + * Get the value of the stroke alpha constant property. * - * @return The value of the stroke alpha constants parameter. + * @return The value of the stroke alpha constant parameter. */ public double getAlphaConstant() { @@ -209,9 +210,9 @@ public double getAlphaConstant() } /** - * set the value of the stroke alpha constants property. + * set the value of the stroke alpha constant property. * - * @param value The value of the stroke alpha constants parameter. + * @param value The value of the stroke alpha constant parameter. */ public void setAlphaConstant(double value) { @@ -219,25 +220,49 @@ public void setAlphaConstant(double value) } /** - * Get the value of the non-stroke alpha constants property. + * Get the value of the non-stroke alpha constant property. * - * @return The value of the non-stroke alpha constants parameter. + * @return The value of the non-stroke alpha constant parameter. + * @deprecated use {@link #getNonStrokeAlphaConstant() } */ + @Deprecated public double getNonStrokeAlphaConstants() { return nonStrokingAlphaConstant; } /** - * set the value of the non-stroke alpha constants property. + * set the value of the non-stroke alpha constant property. * - * @param value The value of the non-stroke alpha constants parameter. + * @param value The value of the non-stroke alpha constant parameter. + * @deprecated use {@link #setNonStrokeAlphaConstant(double) } */ + @Deprecated public void setNonStrokeAlphaConstants(double value) { nonStrokingAlphaConstant = value; } + /** + * Get the value of the non-stroke alpha constant property. + * + * @return The value of the non-stroke alpha constant parameter. + */ + public double getNonStrokeAlphaConstant() + { + return nonStrokingAlphaConstant; + } + + /** + * set the value of the non-stroke alpha constant property. + * + * @param value The value of the non-stroke alpha constant parameter. + */ + public void setNonStrokeAlphaConstant(double value) + { + nonStrokingAlphaConstant = value; + } + /** * get the value of the stroke alpha source property. * @@ -299,8 +324,6 @@ public void setBlendMode(BlendMode blendMode) this.blendMode = blendMode; } - /** - /** * get the value of the overprint property. * @@ -321,6 +344,26 @@ public void setOverprint(boolean value) overprint = value; } + /** + * get the value of the non stroking overprint property. + * + * @return The value of the non stroking overprint parameter. + */ + public boolean isNonStrokingOverprint() + { + return nonStrokingOverprint; + } + + /** + * set the value of the non stroking overprint property. + * + * @param value The value of the non stroking overprint parameter. + */ + public void setNonStrokingOverprint(boolean value) + { + nonStrokingOverprint = value; + } + /** * get the value of the overprint mode property. * @@ -516,7 +559,7 @@ public PDColorSpace getStrokingColorSpace() } /** - * Sets the the stroking color space. + * Sets the stroking color space. * * @param colorSpace The new stroking color space. */ @@ -536,7 +579,7 @@ public PDColorSpace getNonStrokingColorSpace() } /** - * Sets the the non-stroking color space. + * Sets the non-stroking color space. * * @param colorSpace The new non-stroking color space. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDSoftMask.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDSoftMask.java index 3e3a3d2884b..29cea43e4ed 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDSoftMask.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDSoftMask.java @@ -28,11 +28,12 @@ import org.apache.pdfbox.pdmodel.common.function.PDFunction; import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup; +import org.apache.pdfbox.util.Matrix; /** * Soft mask. * - * @author Kühn & Weyh Software, GmbH + * @author Kühn & Weyh Software GmbH */ public final class PDSoftMask implements COSObjectable { @@ -74,12 +75,18 @@ else if (dictionary instanceof COSDictionary) private COSArray backdropColor = null; private PDFunction transferFunction = null; + /** + * To allow a soft mask to know the CTM at the time of activation of the ExtGState. + */ + private Matrix ctm; + /** * Creates a new soft mask. + * + * @param dictionary The soft mask dictionary. */ public PDSoftMask(COSDictionary dictionary) { - super(); this.dictionary = dictionary; } @@ -114,7 +121,11 @@ public PDTransparencyGroup getGroup() throws IOException COSBase cosGroup = getCOSObject().getDictionaryObject(COSName.G); if (cosGroup != null) { - group = (PDTransparencyGroup) PDXObject.createXObject(cosGroup, null); + PDXObject x = PDXObject.createXObject(cosGroup, null); + if (x instanceof PDTransparencyGroup) + { + group = (PDTransparencyGroup) x; + } } } return group; @@ -148,4 +159,24 @@ public PDFunction getTransferFunction() throws IOException } return transferFunction; } + + /** + * Set the CTM that is valid at the time the ExtGState was activated. + * + * @param ctm + */ + void setInitialTransformationMatrix(Matrix ctm) + { + this.ctm = ctm; + } + + /** + * Returns the CTM at the time the ExtGState was activated. + * + * @return the CTM at the time the ExtGState was activated. + */ + public Matrix getInitialTransformationMatrix() + { + return ctm; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingIntent.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingIntent.java index 117fab42935..b0cff2dc794 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingIntent.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingIntent.java @@ -46,23 +46,14 @@ public enum RenderingIntent public static RenderingIntent fromString(String value) { - if (value.equals("AbsoluteColorimetric")) + for (RenderingIntent instance : RenderingIntent.values()) { - return ABSOLUTE_COLORIMETRIC; + if (instance.value.equals(value)) + { + return instance; + } } - else if (value.equals("RelativeColorimetric")) - { - return RELATIVE_COLORIMETRIC; - } - else if (value.equals("Saturation")) - { - return SATURATION; - } - else if (value.equals("Perceptual")) - { - return PERCEPTUAL; - } - // "If a conforming reader does not recognize the specified name, + // "If a conforming reader does not recognize the specified name, // it shall use the RelativeColorimetric intent by default." return RELATIVE_COLORIMETRIC; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingMode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingMode.java index 32932fa1d42..a0426bc113d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingMode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingMode.java @@ -1,120 +1,120 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.graphics.state; - -/** - * Text Rendering Mode. - * - * @author John Hewson - */ -public enum RenderingMode -{ - /** - * Fill text. - */ - FILL(0), - - /** - * Stroke text. - */ - STROKE(1), - - /** - * Fill, then stroke text. - */ - FILL_STROKE(2), - - /** - * Neither fill nor stroke text (invisible) - */ - NEITHER(3), - - /** - * Fill text and add to path for clipping. - */ - FILL_CLIP(4), - - /** - * Stroke text and add to path for clipping. - */ - STROKE_CLIP(5), - - /** - * Fill, then stroke text and add to path for clipping. - */ - FILL_STROKE_CLIP(6), - - /** - * Add text to path for clipping. - */ - NEITHER_CLIP(7); - - private static final RenderingMode[] VALUES = RenderingMode.values(); - - public static RenderingMode fromInt(int value) - { - return VALUES[value]; - } - - private final int value; - - RenderingMode(int value) - { - this.value = value; - } - - /** - * Returns the integer value of this mode, as used in a PDF file. - */ - public int intValue() - { - return value; - } - - /** - * Returns true is this mode fills text. - */ - public boolean isFill() - { - return this == FILL || - this == FILL_STROKE || - this == FILL_CLIP || - this == FILL_STROKE_CLIP; - } - - /** - * Returns true is this mode strokes text. - */ - public boolean isStroke() - { - return this == STROKE || - this == FILL_STROKE || - this == STROKE || - this == FILL_STROKE_CLIP; - } - - /** - * Returns true is this mode clips text. - */ - public boolean isClip() - { - return this == FILL_CLIP || - this == STROKE_CLIP || - this == FILL_STROKE_CLIP || - this == NEITHER_CLIP; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.state; + +/** + * Text Rendering Mode. + * + * @author John Hewson + */ +public enum RenderingMode +{ + /** + * Fill text. + */ + FILL(0), + + /** + * Stroke text. + */ + STROKE(1), + + /** + * Fill, then stroke text. + */ + FILL_STROKE(2), + + /** + * Neither fill nor stroke text (invisible) + */ + NEITHER(3), + + /** + * Fill text and add to path for clipping. + */ + FILL_CLIP(4), + + /** + * Stroke text and add to path for clipping. + */ + STROKE_CLIP(5), + + /** + * Fill, then stroke text and add to path for clipping. + */ + FILL_STROKE_CLIP(6), + + /** + * Add text to path for clipping. + */ + NEITHER_CLIP(7); + + private static final RenderingMode[] VALUES = RenderingMode.values(); + + public static RenderingMode fromInt(int value) + { + return VALUES[value]; + } + + private final int value; + + RenderingMode(int value) + { + this.value = value; + } + + /** + * Returns the integer value of this mode, as used in a PDF file. + */ + public int intValue() + { + return value; + } + + /** + * Returns true is this mode fills text. + */ + public boolean isFill() + { + return this == FILL || + this == FILL_STROKE || + this == FILL_CLIP || + this == FILL_STROKE_CLIP; + } + + /** + * Returns true is this mode strokes text. + */ + public boolean isStroke() + { + return this == STROKE || + this == FILL_STROKE || + this == STROKE_CLIP || + this == FILL_STROKE_CLIP; + } + + /** + * Returns true is this mode clips text. + */ + public boolean isClip() + { + return this == FILL_CLIP || + this == STROKE_CLIP || + this == FILL_STROKE_CLIP || + this == NEITHER_CLIP; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/package.html index a213c6a8068..76c8a66397b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/OpenMode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/OpenMode.java new file mode 100644 index 00000000000..ed14bc80284 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/OpenMode.java @@ -0,0 +1,40 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.action; + +/** + * This will specify whether to open the destination document in a new window. + * + * @author Tilman Hausherr + */ +public enum OpenMode +{ + /** + * The viewer application should behave in accordance with the current user preference. + */ + USER_PREFERENCE, + + /** + * Destination document will replace the current document in the same window. + */ + SAME_WINDOW, + + /** + * Open the destination document in a new window. + */ + NEW_WINDOW +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDAction.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDAction.java index 645fbf34d70..3b029af3c2a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDAction.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDAction.java @@ -98,7 +98,6 @@ public final void setType( String type ) /** * This will get the type of action that the actions dictionary describes. - * If present, must be Action for an action dictionary. * * @return The S entry of actions dictionary. */ @@ -109,7 +108,6 @@ public String getSubType() /** * This will set the type of action that the actions dictionary describes. - * If present, must be Action for an action dictionary. * * @param s The new type of action. */ @@ -137,7 +135,7 @@ public List getNext() else if( next instanceof COSArray ) { COSArray array = (COSArray)next; - List actions = new ArrayList(); + List actions = new ArrayList(array.size()); for( int i=0; i= 1) + { + COSBase page = destArray.getObject(0); + if (!(page instanceof COSDictionary)) + { + throw new IllegalArgumentException("Destination of a GoToE action must be " + + "a page dictionary object"); + } + } + } + getCOSObject().setItem(COSName.D, d); + } + + /** + * This will get the file in which the destination is located. + * + * @return The F entry of the specific embedded go-to action dictionary. + * + * @throws IOException If there is an error creating the file spec. + */ + public PDFileSpecification getFile() throws IOException + { + return PDFileSpecification.createFS(getCOSObject().getDictionaryObject(COSName.F)); + } + + /** + * This will set the file in which the destination is located. + * + * @param fs The file specification. + */ + public void setFile(PDFileSpecification fs) + { + getCOSObject().setItem(COSName.F, fs); + } + + /** + * This will specify whether to open the destination document in a new window, in the same + * window, or behave in accordance with the current user preference. + * + * @return A flag specifying how to open the destination document. + */ + public OpenMode getOpenInNewWindow() + { + if (getCOSObject().getDictionaryObject(COSName.NEW_WINDOW) instanceof COSBoolean) + { + COSBoolean b = (COSBoolean) getCOSObject().getDictionaryObject(COSName.NEW_WINDOW); + return b.getValue() ? OpenMode.NEW_WINDOW : OpenMode.SAME_WINDOW; + } + return OpenMode.USER_PREFERENCE; + } + + /** + * This will specify whether to open the destination document in a new window. + * + * @param value The flag value. + */ + public void setOpenInNewWindow(OpenMode value) + { + if (null == value) + { + getCOSObject().removeItem(COSName.NEW_WINDOW); + return; + } + switch (value) + { + case USER_PREFERENCE: + getCOSObject().removeItem(COSName.NEW_WINDOW); + break; + case SAME_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, false); + break; + case NEW_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, true); + break; + default: + // shouldn't happen unless the enum type is changed + break; + } + } + + /** + * Get the target directory. + * + * @return the target directory or null if there is none. + */ + public PDTargetDirectory getTargetDirectory() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.T); + if (base instanceof COSDictionary) + { + return new PDTargetDirectory((COSDictionary) base); + } + return null; + } + + /** + * Sets the target directory. + * + * @param targetDirectory the target directory. + */ + public void setTargetDirectory(PDTargetDirectory targetDirectory) + { + getCOSObject().setItem(COSName.T, targetDirectory); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionFactory.java index 0fdbd3a18c9..f7d8510d5d1 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionFactory.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionFactory.java @@ -101,6 +101,10 @@ else if (PDActionThread.SUB_TYPE.equals(type)) { retval = new PDActionThread(action); } + else if (PDActionEmbeddedGoTo.SUB_TYPE.equals(type)) + { + retval = new PDActionEmbeddedGoTo(action); + } } return retval; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionGoTo.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionGoTo.java index 4027201c207..98849eae8a5 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionGoTo.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionGoTo.java @@ -43,7 +43,6 @@ public class PDActionGoTo extends PDAction */ public PDActionGoTo() { - super(); setSubType( SUB_TYPE ); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionImportData.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionImportData.java index aebcd157527..5e501275a8f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionImportData.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionImportData.java @@ -39,7 +39,6 @@ public class PDActionImportData extends PDAction */ public PDActionImportData() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionJavaScript.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionJavaScript.java index 9748ccba872..7cc30e63bdf 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionJavaScript.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionJavaScript.java @@ -39,7 +39,6 @@ public class PDActionJavaScript extends PDAction */ public PDActionJavaScript() { - super(); setSubType( SUB_TYPE ); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionLaunch.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionLaunch.java index 1645b2bb620..38f7ea0dc02 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionLaunch.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionLaunch.java @@ -18,6 +18,7 @@ import java.io.IOException; +import org.apache.pdfbox.cos.COSBoolean; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; @@ -42,7 +43,6 @@ public class PDActionLaunch extends PDAction */ public PDActionLaunch() { - super(); setSubType( SUB_TYPE ); } @@ -216,19 +216,70 @@ public void setP( String p ) * ignored if the file designated by the F entry is not a PDF document. * * @return A flag specifying whether to open the destination document in a new window. + * + * @deprecated use {@link #getOpenInNewWindow()} */ + @Deprecated public boolean shouldOpenInNewWindow() { - return action.getBoolean( "NewWindow", true ); + return action.getBoolean(COSName.NEW_WINDOW, true); } /** * This will specify the destination document to open in a new window. * * @param value The flag value. + * + * @deprecated use {@link #setOpenInNewWindow(OpenMode)} */ + @Deprecated public void setOpenInNewWindow( boolean value ) { - action.setBoolean( "NewWindow", value ); + action.setBoolean(COSName.NEW_WINDOW, value); + } + + /** + * This will specify whether to open the destination document in a new window, in the same + * window, or behave in accordance with the current user preference. + * + * @return A flag specifying how to open the destination document. + */ + public OpenMode getOpenInNewWindow() + { + if (getCOSObject().getDictionaryObject(COSName.NEW_WINDOW) instanceof COSBoolean) + { + COSBoolean b = (COSBoolean) getCOSObject().getDictionaryObject(COSName.NEW_WINDOW); + return b.getValue() ? OpenMode.NEW_WINDOW : OpenMode.SAME_WINDOW; + } + return OpenMode.USER_PREFERENCE; + } + + /** + * This will specify whether to open the destination document in a new window. + * + * @param value The flag value. + */ + public void setOpenInNewWindow(OpenMode value) + { + if (null == value) + { + getCOSObject().removeItem(COSName.NEW_WINDOW); + return; + } + switch (value) + { + case USER_PREFERENCE: + getCOSObject().removeItem(COSName.NEW_WINDOW); + break; + case SAME_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, false); + break; + case NEW_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, true); + break; + default: + // shouldn't happen unless the enum type is changed + break; + } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionMovie.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionMovie.java index 932e0578f0c..8ee4dfc76c0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionMovie.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionMovie.java @@ -36,7 +36,6 @@ public class PDActionMovie extends PDAction */ public PDActionMovie() { - action = new COSDictionary(); setSubType(SUB_TYPE); } @@ -55,7 +54,9 @@ public PDActionMovie(COSDictionary a) * a Movie action. * * @return The S entry of the specific Movie action dictionary. + * @deprecated use {@link #getSubType() }. */ + @Deprecated public String getS() { return action.getNameAsString(COSName.S); @@ -66,7 +67,9 @@ public String getS() * a Movie action. * * @param s The Movie action. + * @deprecated use {@link #setSubType(java.lang.String) }. */ + @Deprecated public void setS(String s) { action.setName(COSName.S, s); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionNamed.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionNamed.java index 0fbe245365d..b697403d011 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionNamed.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionNamed.java @@ -33,7 +33,6 @@ public class PDActionNamed extends PDAction */ public PDActionNamed() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionRemoteGoTo.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionRemoteGoTo.java index 973cb7823df..d02d0673bb6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionRemoteGoTo.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionRemoteGoTo.java @@ -19,6 +19,7 @@ import java.io.IOException; import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSBoolean; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; @@ -42,7 +43,6 @@ public class PDActionRemoteGoTo extends PDAction */ public PDActionRemoteGoTo() { - action = new COSDictionary(); setSubType( SUB_TYPE ); } @@ -61,7 +61,9 @@ public PDActionRemoteGoTo( COSDictionary a ) * It must be GoToR for a remote go-to action. * * @return The S entry of the specific remote go-to action dictionary. + * @deprecated use {@link #getSubType() }. */ + @Deprecated public String getS() { return action.getNameAsString( COSName.S ); @@ -72,7 +74,9 @@ public String getS() * It must be GoToR for a remote go-to action. * * @param s The remote go-to action. + * @deprecated use {@link #setSubType(java.lang.String) }. */ + @Deprecated public void setS( String s ) { action.setName( COSName.S, s ); @@ -139,19 +143,70 @@ public void setD( COSBase d ) * should behave in accordance with the current user preference. * * @return A flag specifying whether to open the destination document in a new window. + * + * @deprecated use {@link #getOpenInNewWindow()} */ + @Deprecated public boolean shouldOpenInNewWindow() { - return action.getBoolean( "NewWindow", true ); + return action.getBoolean(COSName.NEW_WINDOW, true ); } /** * This will specify the destination document to open in a new window. * * @param value The flag value. + * + * @deprecated use {@link #setOpenInNewWindow(OpenMode)} */ + @Deprecated public void setOpenInNewWindow( boolean value ) { - action.setBoolean( "NewWindow", value ); + action.setBoolean(COSName.NEW_WINDOW, value ); + } + + /** + * This will specify whether to open the destination document in a new window, in the same + * window, or behave in accordance with the current user preference. + * + * @return A flag specifying how to open the destination document. + */ + public OpenMode getOpenInNewWindow() + { + if (getCOSObject().getDictionaryObject(COSName.NEW_WINDOW) instanceof COSBoolean) + { + COSBoolean b = (COSBoolean) getCOSObject().getDictionaryObject(COSName.NEW_WINDOW); + return b.getValue() ? OpenMode.NEW_WINDOW : OpenMode.SAME_WINDOW; + } + return OpenMode.USER_PREFERENCE; + } + + /** + * This will specify whether to open the destination document in a new window. + * + * @param value The flag value. + */ + public void setOpenInNewWindow(OpenMode value) + { + if (null == value) + { + getCOSObject().removeItem(COSName.NEW_WINDOW); + return; + } + switch (value) + { + case USER_PREFERENCE: + getCOSObject().removeItem(COSName.NEW_WINDOW); + break; + case SAME_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, false); + break; + case NEW_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, true); + break; + default: + // shouldn't happen unless the enum type is changed + break; + } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionResetForm.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionResetForm.java index 34bd8c25c51..07ff0119ff7 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionResetForm.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionResetForm.java @@ -38,7 +38,6 @@ public class PDActionResetForm extends PDAction */ public PDActionResetForm() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionSound.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionSound.java index 33af5392a0f..afde58edfa8 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionSound.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionSound.java @@ -17,8 +17,12 @@ package org.apache.pdfbox.pdmodel.interactive.action; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSBoolean; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSStream; /** * This represents a Sound action that can be executed in a PDF document @@ -38,7 +42,6 @@ public class PDActionSound extends PDAction */ public PDActionSound() { - action = new COSDictionary(); setSubType(SUB_TYPE); } @@ -57,7 +60,9 @@ public PDActionSound(COSDictionary a) * a Sound action. * * @return The S entry of the specific Sound action dictionary. + * @deprecated use {@link #getSubType() }. */ + @Deprecated public String getS() { return action.getNameAsString(COSName.S); @@ -68,10 +73,157 @@ public String getS() * a Sound action. * * @param s The Sound action. + * @deprecated use {@link #setSubType(java.lang.String) }. */ + @Deprecated public void setS(String s) { action.setName(COSName.S, s); } + /** + * Sets the sound object. + * + * @param sound the sound object defining the sound that shall be played. + */ + public void setSound(COSStream sound) + { + action.setItem(COSName.SOUND, sound); + } + + /** + * Gets the sound object. + * + * @return The sound object defining the sound that shall be played. + */ + public COSStream getSound() + { + COSBase base = action.getDictionaryObject(COSName.SOUND); + if (base instanceof COSStream) + { + return (COSStream) base; + } + return null; + } + + /** + * Gets the volume at which to play the sound, in the range −1.0 to 1.0. + * + * @param volume The volume at which to play the sound, in the range −1.0 to 1.0. + * + * @throws IllegalArgumentException if the volume parameter is outside of the range −1.0 to 1.0. + */ + public void setVolume(float volume) + { + if (volume < -1 || volume > 1) + { + throw new IllegalArgumentException("volume outside of the range −1.0 to 1.0"); + } + action.setFloat(COSName.VOLUME, volume); + } + + /** + * Sets the volume. + * + * @return The volume at which to play the sound, in the range −1.0 to 1.0. Default value: 1.0. + */ + public float getVolume() + { + COSBase base = action.getDictionaryObject(COSName.VOLUME); + if (base instanceof COSNumber) + { + float volume = ((COSNumber) base).floatValue(); + if (volume < -1 || volume > 1) + { + volume = 1; + } + return volume; + } + return 1; + } + + /** + * A flag specifying whether to play the sound synchronously or asynchronously. When true, the + * reader allows no further user interaction other than canceling the sound until the sound has + * been completely played. + * + * @param synchronous Whether to play the sound synchronously (true) or asynchronously (false). + */ + public void setSynchronous(boolean synchronous) + { + action.setBoolean(COSName.SYNCHRONOUS, synchronous); + } + + /** + * Gets the synchronous flag. It specifyes whether to play the sound synchronously or + * asynchronously. When true, the reader allows no further user interaction other than canceling + * the sound until the sound has been completely played. + * + * @return Whether to play the sound synchronously (true) or asynchronously (false, also the + * default). + */ + public boolean getSynchronous() + { + COSBase base = action.getDictionaryObject(COSName.SYNCHRONOUS); + if (base instanceof COSBoolean) + { + return ((COSBoolean) base).getValue(); + } + return false; + } + + /** + * A flag specifying whether to repeat the sound indefinitely. + * + * @param repeat Whether to repeat the sound indefinitely. + */ + public void setRepeat(boolean repeat) + { + action.setBoolean(COSName.REPEAT, repeat); + } + + /** + * Gets whether to repeat the sound indefinitely. + * + * @return Whether to repeat the sound indefinitely (default: false). + */ + public boolean getRepeat() + { + COSBase base = action.getDictionaryObject(COSName.REPEAT); + if (base instanceof COSBoolean) + { + return ((COSBoolean) base).getValue(); + } + return false; + } + + /** + * The flag specifying whether to mix this sound with any other sound already playing. If this + * flag is false, any previously playing sound shall be stopped before starting this sound; this + * can be used to stop a repeating sound (see Repeat). Default value: false. + * + * @param mix whether to mix this sound with any other sound already playing. (false). + */ + public void setMix(boolean mix) + { + action.setBoolean(COSName.MIX, mix); + } + + /** + * Gets the flag specifying whether to mix this sound with any other sound already playing. If + * this flag is false, any previously playing sound shall be stopped before starting this sound; + * this can be used to stop a repeating sound (see Repeat). + * + * @return whether to mix this sound with any other sound already playing (default: false). + */ + public boolean getMix() + { + COSBase base = action.getDictionaryObject(COSName.MIX); + if (base instanceof COSBoolean) + { + return ((COSBoolean) base).getValue(); + } + return false; + } + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java index abc3c3f6e05..0b6b71c8e04 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java @@ -16,8 +16,11 @@ */ package org.apache.pdfbox.pdmodel.interactive.action; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.util.Charsets; /** * This represents a URI action that can be executed in a PDF document. @@ -37,7 +40,6 @@ public class PDActionURI extends PDAction */ public PDActionURI() { - action = new COSDictionary(); setSubType(SUB_TYPE); } @@ -56,7 +58,9 @@ public PDActionURI(COSDictionary a) * It must be URI for a URI action. * * @return The S entry of the specific URI action dictionary. + * @deprecated use {@link #getSubType() }. */ + @Deprecated public String getS() { return action.getNameAsString(COSName.S); @@ -67,21 +71,42 @@ public String getS() * It must be URI for a URI action. * * @param s The URI action. + * @deprecated use {@link #setSubType(java.lang.String) }. */ + @Deprecated public void setS(String s) { action.setName(COSName.S, s); } /** - * This will get the uniform resource identifier to resolve, encoded in - * 7-bit ASCII. + * This will get the uniform resource identifier to resolve. It should be encoded in 7-bit + * ASCII, but UTF-8 and UTF-16 are supported too. * - * @return The URI entry of the specific URI action dictionary. + * @return The URI entry of the specific URI action dictionary or null if there isn't any. */ public String getURI() { - return action.getString(COSName.URI); + COSBase base = action.getDictionaryObject(COSName.URI); + if (base instanceof COSString) + { + byte[] bytes = ((COSString) base).getBytes(); + if (bytes.length >= 2) + { + // UTF-16 (BE) + if ((bytes[0] & 0xFF) == 0xFE && (bytes[1] & 0xFF) == 0xFF) + { + return action.getString(COSName.URI); + } + // UTF-16 (LE) + if ((bytes[0] & 0xFF) == 0xFF && (bytes[1] & 0xFF) == 0xFE) + { + return action.getString(COSName.URI); + } + } + return new String(bytes, Charsets.UTF_8); + } + return null; } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDTargetDirectory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDTargetDirectory.java new file mode 100644 index 00000000000..8d328e36cb5 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDTargetDirectory.java @@ -0,0 +1,281 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.action; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSInteger; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDNamedDestination; + +/** + * A target dictionary specifying path information to the target document. Each target dictionary + * specifies one element in the full path to the target and may have nested target dictionaries + * specifying additional elements. + * + * @author Tilman Hausherr + */ +public class PDTargetDirectory implements COSObjectable +{ + private final COSDictionary dict; + + /** + * Default constructor, creates target directory. + */ + public PDTargetDirectory() + { + dict = new COSDictionary(); + } + + /** + * Create a target directory from an existing dictionary. + * + * @param dictionary The existing graphics state. + */ + public PDTargetDirectory(COSDictionary dictionary) + { + dict = dictionary; + } + + /** + * This will get the underlying dictionary that this class acts on. + * + * @return The underlying dictionary for this class. + */ + @Override + public COSDictionary getCOSObject() + { + return dict; + } + + /** + * Get the relationship between the current document and the target (which may be an + * intermediate target). + * + * @return the relationship as a name. Valid values are P (the target is the parent of the + * current document) and C (the target is a child of the current document). Invalid values or + * null are also returned. + */ + public COSName getRelationship() + { + COSBase base = dict.getItem(COSName.R); + if (base instanceof COSName) + { + return (COSName) base; + } + return null; + } + + /** + * Set the relationship between the current document and the target (which may be an + * intermediate target). + * + * @param relationship Valid values are P (the target is the parent of the current document) and + * C (the target is a child of the current document). + * + * throws IllegalArgumentException if the parameter is not P or C. + */ + public void setRelationship(COSName relationship) + { + if (!COSName.P.equals(relationship) && !COSName.C.equals(relationship)) + { + throw new IllegalArgumentException("The only valid are P or C, not " + relationship.getName()); + } + dict.setItem(COSName.R, relationship); + } + + /** + * Get the name of the file as found in the EmbeddedFiles name tree. This is only to be used if + * the target is a child of the current document. + * + * @return a filename or null if there is none. + */ + public String getFilename() + { + return dict.getString(COSName.N); + } + + /** + * Sets the name of the file as found in the EmbeddedFiles name tree. This is only to be used if + * the target is a child of the current document. + * + * @param filename a filename or null if the entry is to be deleted. + */ + public void setFilename(String filename) + { + dict.setString(COSName.N, filename); + } + + /** + * Get the target directory. If this entry is absent, the current document is the target file + * containing the destination. + * + * @return the target directory or null if the current document is the target file containing + * the destination. + */ + public PDTargetDirectory getTargetDirectory() + { + COSBase base = dict.getDictionaryObject(COSName.T); + if (base instanceof COSDictionary) + { + return new PDTargetDirectory((COSDictionary) base); + } + return null; + } + + /** + * Sets the target directory. + * + * @param targetDirectory the target directory or null if the current document is the target + * file containing the destination. + */ + public void setTargetDirectory(PDTargetDirectory targetDirectory) + { + dict.setItem(COSName.T, targetDirectory); + } + + /** + * If the value in the /P entry is an integer, this will get the page number (zero-based) in the + * current document containing the file attachment annotation. + * + * @return the zero based page number or -1 if the /P entry value is missing or not a number. + */ + public int getPageNumber() + { + COSBase base = dict.getDictionaryObject(COSName.P); + if (base instanceof COSInteger) + { + return ((COSInteger) base).intValue(); + } + return -1; + } + + /** + * Set the page number (zero-based) in the current document containing the file attachment + * annotation. + * + * @param pageNumber the zero based page number. If this is < 0 then the entry is removed. + */ + public void setPageNumber(int pageNumber) + { + if (pageNumber < 0) + { + dict.removeItem(COSName.P); + } + else + { + dict.setInt(COSName.P, pageNumber); + } + } + + /** + * If the value in the /P entry is a string, this will get a named destination in the current + * document that provides the page number of the file attachment annotation. + * + * @return a named destination or null if the /P entry value is missing or not a string. + */ + public PDNamedDestination getNamedDestination() + { + COSBase base = dict.getDictionaryObject(COSName.P); + if (base instanceof COSString) + { + return new PDNamedDestination((COSString) base); + } + return null; + } + + /** + * This will set a named destination in the current document that provides the page number of + * the file attachment annotation. + * + * @param dest a named destination or null if the entry is to be removed. + */ + public void setNamedDestination(PDNamedDestination dest) + { + if (dest == null) + { + dict.removeItem(COSName.P); + } + else + { + dict.setItem(COSName.P, dest); + } + } + + /** + * If the value in the /A entry is an integer, this will get the index (zero-based) of the + * annotation in the /Annots array of the page specified by the /P entry. + * + * @return the zero based page number or -1 if the /P entry value is missing or not a number. + */ + public int getAnnotationIndex() + { + COSBase base = dict.getDictionaryObject(COSName.A); + if (base instanceof COSInteger) + { + return ((COSInteger) base).intValue(); + } + return -1; + } + + /** + * This will set the index (zero-based) of the annotation in the /Annots array of the page + * specified by the /P entry. + * + * @param index the zero based index. If this is < 0 then the entry is removed. + */ + public void setAnnotationIndex(int index) + { + if (index < 0) + { + dict.removeItem(COSName.A); + } + else + { + dict.setInt(COSName.A, index); + } + } + + /** + * If the value in the /A entry is a string, this will get the value of the /NM entry in the + * annotation dictionary. + * + * @return the /NM value of an annotation dictionary or null if the /A entry value is missing or + * not a string. + */ + public String getAnnotationName() + { + COSBase base = dict.getDictionaryObject(COSName.A); + if (base instanceof COSString) + { + return ((COSString) base).getString(); + } + return null; + } + + /** + * This will get the value of the /NM entry in the annotation dictionary. + * + * @param name the /NM value of an annotation dictionary or null if the entry is to be removed. + */ + public void setAnnotationName(String name) + { + dict.setString(COSName.A, name); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDURIDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDURIDictionary.java index 6f11f0bfa21..ad20eefa436 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDURIDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDURIDictionary.java @@ -80,7 +80,7 @@ public String getBase() * The use of this entry is parallel to that of the body element <BASE>, as described * in the HTML 4.01 Specification. * - * @param base The the base URI to be used. + * @param base The base URI to be used. */ public void setBase(String base) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDWindowsLaunchParams.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDWindowsLaunchParams.java index cac0e6ec8ea..0b1fa29fd60 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDWindowsLaunchParams.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDWindowsLaunchParams.java @@ -22,7 +22,7 @@ import org.apache.pdfbox.pdmodel.common.COSObjectable; /** - * Launch paramaters for the windows OS. + * Launch parameters for the windows OS. * * @author Ben Litchfield */ @@ -33,7 +33,7 @@ public class PDWindowsLaunchParams implements COSObjectable */ public static final String OPERATION_OPEN = "open"; /** - * The print operation for the lanuch. + * The print operation for the launch. */ public static final String OPERATION_PRINT = "print"; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/package.html index cc39b5675d8..804ef67a054 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/AnnotationFilter.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/AnnotationFilter.java new file mode 100644 index 00000000000..f38f6d160bc --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/AnnotationFilter.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation; + +/** + * Simple interface allowing the use of an annotation filter visitor. + * + * @author Maxime Veron + * + */ +public interface AnnotationFilter +{ + boolean accept(PDAnnotation annotation); +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotation.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotation.java index 9e96690f8c1..27f88f3f6c3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotation.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotation.java @@ -27,9 +27,11 @@ import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; @@ -84,15 +86,21 @@ public abstract class PDAnnotation implements COSObjectable * An annotation flag. */ private static final int FLAG_TOGGLE_NO_VIEW = 1 << 8; + /** + * An annotation flag. + * @see #setLockedContents(boolean) + */ + private static final int FLAG_LOCKED_CONTENTS = 1 << 9; private final COSDictionary dictionary; /** * Create the correct annotation from the base COS object. - * + * * @param base The COS object that is the annotation. * @return The correctly typed annotation object. - * @throws IOException If there is an error while creating the annotation. + * + * @throws IOException If the annotation type is unknown. */ public static PDAnnotation createAnnotation(COSBase base) throws IOException { @@ -135,12 +143,9 @@ else if (PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT.equals(subtype) || PDAnnotationTextMarkup.SUB_TYPE_SQUIGGLY.equals(subtype) || PDAnnotationTextMarkup.SUB_TYPE_STRIKEOUT.equals(subtype)) { + // see 12.5.6.10 Text Markup Annotations annot = new PDAnnotationTextMarkup(annotDic); } - else if (PDAnnotationLink.SUB_TYPE.equals(subtype)) - { - annot = new PDAnnotationLink(annotDic); - } else if (PDAnnotationWidget.SUB_TYPE.equals(subtype)) { annot = new PDAnnotationWidget(annotDic); @@ -190,6 +195,35 @@ public PDAnnotation(COSDictionary dict) dictionary.setItem(COSName.TYPE, COSName.ANNOT); } + /** + * {@inheritDoc} + */ + @Override + public boolean equals (Object o) { + if (o == this) + { + return true; + } + + if (!(o instanceof PDAnnotation)) + { + return false; + } + + COSDictionary toBeCompared = ((PDAnnotation) o).getCOSObject(); + return toBeCompared.equals(getCOSObject()); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return dictionary.hashCode(); + } + + /** * The annotation rectangle, defining the location of the annotation on the page in default user space units. This * is usually required and should not return null on valid PDF documents. But where this is a parent form field with @@ -203,10 +237,10 @@ public PDRectangle getRectangle() PDRectangle rectangle = null; if (rectArray != null) { - if (rectArray.size() == 4 && rectArray.get(0) instanceof COSNumber - && rectArray.get(1) instanceof COSNumber - && rectArray.get(2) instanceof COSNumber - && rectArray.get(3) instanceof COSNumber) + if (rectArray.size() == 4 && rectArray.getObject(0) instanceof COSNumber + && rectArray.getObject(1) instanceof COSNumber + && rectArray.getObject(2) instanceof COSNumber + && rectArray.getObject(3) instanceof COSNumber) { rectangle = new PDRectangle(rectArray); } @@ -265,12 +299,7 @@ public COSDictionary getCOSObject() */ public COSName getAppearanceState() { - COSName name = (COSName) getCOSObject().getDictionaryObject(COSName.AS); - if (name != null) - { - return name; - } - return null; + return getCOSObject().getCOSName(COSName.AS); } /** @@ -280,14 +309,7 @@ public COSName getAppearanceState() */ public void setAppearanceState(String as) { - if (as == null) - { - getCOSObject().removeItem(COSName.AS); - } - else - { - getCOSObject().setItem(COSName.AS, COSName.getPDFName(as)); - } + getCOSObject().setName(COSName.AS, as); } /** @@ -297,10 +319,10 @@ public void setAppearanceState(String as) */ public PDAppearanceDictionary getAppearance() { - COSDictionary apDic = (COSDictionary) dictionary.getDictionaryObject(COSName.AP); - if (apDic != null) + COSBase base = dictionary.getDictionaryObject(COSName.AP); + if (base instanceof COSDictionary) { - return new PDAppearanceDictionary(apDic); + return new PDAppearanceDictionary((COSDictionary) base); } return null; } @@ -312,12 +334,7 @@ public PDAppearanceDictionary getAppearance() */ public void setAppearance(PDAppearanceDictionary appearance) { - COSDictionary ap = null; - if (appearance != null) - { - ap = appearance.getCOSObject(); - } - dictionary.setItem(COSName.AP, ap); + dictionary.setItem(COSName.AP, appearance); } /** @@ -529,6 +546,35 @@ public void setToggleNoView(boolean toggleNoView) getCOSObject().setFlag(COSName.F, FLAG_TOGGLE_NO_VIEW, toggleNoView); } + /** + * Get the LockedContents flag. + * + * @return The LockedContents flag. + * @see #setLockedContents(boolean) + */ + public boolean isLockedContents() + { + return getCOSObject().getFlag(COSName.F, FLAG_LOCKED_CONTENTS); + } + + /** + * Set the LockedContents flag. If set, do not allow the contents of the annotation to be + * modified by the user. This flag does not restrict deletion of the annotation or changes to + * other annotation properties, such as position and size. + * + * @param lockedContents The new LockedContents flag value. + * @see + * PDF + * 32000-1:2008 12.5.3, Table 165 + * @see #isLockedContents() + * @see #FLAG_LOCKED_CONTENTS + * @since PDF 1.7 + */ + public void setLockedContents(boolean lockedContents) + { + getCOSObject().setFlag(COSName.F, FLAG_LOCKED_CONTENTS, lockedContents); + } + /** * Get the "contents" of the field. * @@ -552,7 +598,7 @@ public void setContents(String value) /** * This will retrieve the date and time the annotation was modified. * - * @return the modified date/time (often in date format, but can be an arbitary string). + * @return the modified date/time (often in date format, but can be an arbitrary string). */ public String getModifiedDate() { @@ -607,12 +653,13 @@ public void setAnnotationName(String nm) /** * This will get the key of this annotation in the structural parent tree. - * - * @return the integer key of the annotation's entry in the structural parent tree + * + * @return the integer key of the annotation's entry in the structural parent tree or -1 if + * there isn't any. */ public int getStructParent() { - return getCOSObject().getInt(COSName.STRUCT_PARENT, 0); + return getCOSObject().getInt(COSName.STRUCT_PARENT); } /** @@ -626,26 +673,68 @@ public void setStructParent(int structParent) } /** - * This will retrieve the border array. If none is available, it will return the default, which - * is [0 0 1]. + * This will get the optional content group or optional content membership dictionary for the + * annotation. + * + * @return The optional content group or optional content membership dictionary or null if there + * is none. + */ + public PDPropertyList getOptionalContent() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.OC); + if (base instanceof COSDictionary) + { + return PDPropertyList.create((COSDictionary) base); + } + return null; + } + + /** + * Sets the optional content group or optional content membership dictionary for the annotation. + * + * @param oc The optional content group or optional content membership dictionary. + */ + public void setOptionalContent(PDPropertyList oc) + { + getCOSObject().setItem(COSName.OC, oc); + } + + /** + * This will retrieve the border array. If none is available then it will return the default, + * which is [0 0 1]. The array consists of at least three numbers defining the horizontal corner + * radius, vertical corner radius, and border width. The array may have a fourth element, an + * optional dash array defining a pattern of dashes and gaps that shall be used in drawing the + * border. If the array has less than three elements, it will be filled with 0. * - * @return the border array. + * @return the border array, never null. */ public COSArray getBorder() { COSBase base = getCOSObject().getDictionaryObject(COSName.BORDER); COSArray border; - if (!(base instanceof COSArray)) + if (base instanceof COSArray) + { + border = (COSArray) base; + if (border.size() < 3) + { + // create a copy to avoid altering the PDF + COSArray newBorder = new COSArray(); + newBorder.addAll(border); + border = newBorder; + // Adobe Reader behaves as if missing elements are 0. + while (border.size() < 3) + { + border.add(COSInteger.ZERO); + } + } + } + else { border = new COSArray(); border.add(COSInteger.ZERO); border.add(COSInteger.ZERO); border.add(COSInteger.ONE); } - else - { - border = (COSArray) base; - } return border; } @@ -735,18 +824,41 @@ public void setPage(PDPage page) } /** - * This will retrieve the corresponding page of this annotation. - * - * @return the corresponding page + * This will retrieve the corresponding page of this annotation. See also + * this answer about what to do if + * the page isn't available. + * + * @return The corresponding page or null if not available. */ public PDPage getPage() { - COSDictionary p = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.P); - if (p != null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.P); + if (base instanceof COSDictionary) { - return new PDPage(p); + return new PDPage((COSDictionary) base); } return null; } + /** + * Create the appearance entry for this annotation. Not having it may prevent display in some + * viewers. This method is for overriding in subclasses, the default implementation does + * nothing. + * + * @param document + */ + public void constructAppearances(PDDocument document) + { + } + + /** + * Create the appearance entry for this annotation. Not having it may prevent display in some + * viewers. This method is for overriding in subclasses, the default implementation does + * nothing. + * + */ + public void constructAppearances() + { + } + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationFileAttachment.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationFileAttachment.java index a2295fef500..d77bb864f9f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationFileAttachment.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationFileAttachment.java @@ -23,7 +23,7 @@ import org.apache.pdfbox.pdmodel.common.filespecification.PDFileSpecification; /** - * This is the class that represents a file attachement. + * This is the class that represents a file attachment. * * @author Ben Litchfield */ @@ -56,14 +56,13 @@ public class PDAnnotationFileAttachment extends PDAnnotationMarkup */ public PDAnnotationFileAttachment() { - super(); - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** * Creates a Link annotation from a COSDictionary, expected to be a correct object definition. * - * @param field the PDF objet to represent as a field. + * @param field the PDF object to represent as a field. */ public PDAnnotationFileAttachment(COSDictionary field) { @@ -99,16 +98,28 @@ public void setFile(PDFileSpecification file) */ public String getAttachmentName() { - return getCOSObject().getNameAsString("Name", ATTACHMENT_NAME_PUSH_PIN); + return getCOSObject().getNameAsString(COSName.NAME, ATTACHMENT_NAME_PUSH_PIN); } /** - * Set the name used to draw the attachement icon. See the ATTACHMENT_NAME_XXX constants. + * Set the name used to draw the attachment icon. See the ATTACHMENT_NAME_XXX constants. * * @param name The name of the visual icon to draw. + * @deprecated use {@link #setAttachmentName(java.lang.String)}. */ + @Deprecated public void setAttachementName(String name) { - getCOSObject().setName("Name", name); + getCOSObject().setName(COSName.NAME, name); + } + + /** + * Set the name used to draw the attachment icon. See the ATTACHMENT_NAME_XXX constants. + * + * @param name The name of the visual icon to draw. + */ + public void setAttachmentName(String name) + { + getCOSObject().setName(COSName.NAME, name); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationLine.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationLine.java index a891c6af0fc..3eae8d941ec 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationLine.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationLine.java @@ -21,7 +21,10 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDLineAppearanceHandler; /** * This is the class that represents a line annotation. Introduced in PDF 1.3 specification @@ -30,6 +33,7 @@ */ public class PDAnnotationLine extends PDAnnotationMarkup { + private PDAppearanceHandler customAppearanceHandler; /* * The various values for intent (get/setIT, see the PDF 1.6 reference Table 8.22 @@ -90,7 +94,7 @@ public class PDAnnotationLine extends PDAnnotationMarkup public static final String LE_R_OPEN_ARROW = "ROpenArrow"; /** - * Constant for a revered closed arrow line ending. + * Constant for a reversed closed arrow line ending. */ public static final String LE_R_CLOSED_ARROW = "RClosedArrow"; @@ -109,9 +113,8 @@ public class PDAnnotationLine extends PDAnnotationMarkup */ public PDAnnotationLine() { - super(); - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); - // Dictionary value L is mandatory, fill in with arbitary value + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); + // Dictionary value L is mandatory, fill in with arbitrary value setLine(new float[] { 0, 0, 0, 0 }); } @@ -159,8 +162,9 @@ public void setStartPointEndingStyle(String style) { style = LE_NONE; } - COSArray array = (COSArray) getCOSObject().getDictionaryObject(COSName.LE); - if (array == null) + COSBase base = getCOSObject().getDictionaryObject(COSName.LE); + COSArray array; + if (!(base instanceof COSArray) || ((COSArray) base).size() == 0) { array = new COSArray(); array.add(COSName.getPDFName(style)); @@ -169,6 +173,7 @@ public void setStartPointEndingStyle(String style) } else { + array = (COSArray) base; array.setName(0, style); } } @@ -180,14 +185,12 @@ public void setStartPointEndingStyle(String style) */ public String getStartPointEndingStyle() { - String retval = LE_NONE; - COSArray array = (COSArray) getCOSObject().getDictionaryObject(COSName.LE); - if (array != null) + COSBase base = getCOSObject().getDictionaryObject(COSName.LE); + if (base instanceof COSArray && ((COSArray) base).size() >= 2) { - retval = array.getName(0); + return ((COSArray) base).getName(0); } - - return retval; + return LE_NONE; } /** @@ -201,8 +204,9 @@ public void setEndPointEndingStyle(String style) { style = LE_NONE; } - COSArray array = (COSArray) getCOSObject().getDictionaryObject(COSName.LE); - if (array == null) + COSBase base = getCOSObject().getDictionaryObject(COSName.LE); + COSArray array; + if (!(base instanceof COSArray) || ((COSArray) base).size() < 2) { array = new COSArray(); array.add(COSName.getPDFName(LE_NONE)); @@ -211,6 +215,7 @@ public void setEndPointEndingStyle(String style) } else { + array = (COSArray) base; array.setName(1, style); } } @@ -222,14 +227,12 @@ public void setEndPointEndingStyle(String style) */ public String getEndPointEndingStyle() { - String retval = LE_NONE; - COSArray array = (COSArray) getCOSObject().getDictionaryObject(COSName.LE); - if (array != null) + COSBase base = getCOSObject().getDictionaryObject(COSName.LE); + if (base instanceof COSArray && ((COSArray) base).size() >= 2) { - retval = array.getName(1); + return ((COSArray) base).getName(1); } - - return retval; + return LE_NONE; } /** @@ -308,7 +311,7 @@ public PDBorderStyleDictionary getBorderStyle() */ public float getLeaderLineLength() { - return this.getCOSObject().getFloat(COSName.LL); + return this.getCOSObject().getFloat(COSName.LL, 0); } /** @@ -328,7 +331,7 @@ public void setLeaderLineLength(float leaderLineLength) */ public float getLeaderLineExtensionLength() { - return this.getCOSObject().getFloat(COSName.LLE); + return this.getCOSObject().getFloat(COSName.LLE, 0); } /** @@ -348,7 +351,7 @@ public void setLeaderLineExtensionLength(float leaderLineExtensionLength) */ public float getLeaderLineOffsetLength() { - return this.getCOSObject().getFloat(COSName.LLO); + return this.getCOSObject().getFloat(COSName.LLO, 0); } /** @@ -368,7 +371,7 @@ public void setLeaderLineOffsetLength(float leaderLineOffsetLength) */ public String getCaptionPositioning() { - return this.getCOSObject().getString(COSName.CP); + return this.getCOSObject().getNameAsString(COSName.CP); } /** @@ -378,7 +381,7 @@ public String getCaptionPositioning() */ public void setCaptionPositioning(String captionPositioning) { - this.getCOSObject().setString(COSName.CP, captionPositioning); + this.getCOSObject().setName(COSName.CP, captionPositioning); } /** @@ -454,4 +457,34 @@ public float getCaptionVerticalOffset() return retval; } + /** + * Set a custom appearance handler for generating the annotations appearance streams. + * + * @param appearanceHandler + */ + public void setCustomAppearanceHandler(PDAppearanceHandler appearanceHandler) + { + customAppearanceHandler = appearanceHandler; + } + + @Override + public void constructAppearances() + { + this.constructAppearances(null); + } + + @Override + public void constructAppearances(PDDocument document) + { + if (customAppearanceHandler == null) + { + PDLineAppearanceHandler appearanceHandler = new PDLineAppearanceHandler(this, document); + appearanceHandler.generateAppearanceStreams(); + } + else + { + customAppearanceHandler.generateAppearanceStreams(); + } + } + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationLink.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationLink.java index 92a884c6fdd..8695389769f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationLink.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationLink.java @@ -22,9 +22,12 @@ import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.interactive.action.PDActionFactory; import org.apache.pdfbox.pdmodel.interactive.action.PDAction; import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDLinkAppearanceHandler; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination; /** @@ -35,7 +38,8 @@ */ public class PDAnnotationLink extends PDAnnotation { - + private PDAppearanceHandler customAppearanceHandler; + /** * Constant values of the Text as defined in the PDF 1.6 reference Table 8.19. */ @@ -63,14 +67,13 @@ public class PDAnnotationLink extends PDAnnotation */ public PDAnnotationLink() { - super(); - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** * Creates a Link annotation from a COSDictionary, expected to be a correct object definition. * - * @param field the PDF objet to represent as a field. + * @param field the PDF object to represent as a field. */ public PDAnnotationLink(COSDictionary field) { @@ -85,8 +88,12 @@ public PDAnnotationLink(COSDictionary field) */ public PDAction getAction() { - COSDictionary action = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.A); - return PDActionFactory.createAction(action); + COSBase base = getCOSObject().getDictionaryObject(COSName.A); + if (base instanceof COSDictionary) + { + return PDActionFactory.createAction((COSDictionary) base); + } + return null; } /** @@ -188,10 +195,10 @@ public void setPreviousURI(PDActionURI pa) */ public PDActionURI getPreviousURI() { - COSDictionary pa = (COSDictionary) getCOSObject().getDictionaryObject("PA"); - if (pa != null) + COSBase base = getCOSObject().getDictionaryObject("PA"); + if (base instanceof COSDictionary) { - return new PDActionURI(pa); + return new PDActionURI((COSDictionary) base); } return null; } @@ -205,7 +212,7 @@ public void setQuadPoints(float[] quadPoints) { COSArray newQuadPoints = new COSArray(); newQuadPoints.setFloatArray(quadPoints); - getCOSObject().setItem("QuadPoints", newQuadPoints); + getCOSObject().setItem(COSName.QUADPOINTS, newQuadPoints); } /** @@ -215,12 +222,42 @@ public void setQuadPoints(float[] quadPoints) */ public float[] getQuadPoints() { - COSArray quadPoints = (COSArray) getCOSObject().getDictionaryObject("QuadPoints"); - if (quadPoints != null) + COSBase base = getCOSObject().getDictionaryObject(COSName.QUADPOINTS); + if (base instanceof COSArray) { - return quadPoints.toFloatArray(); + return ((COSArray) base).toFloatArray(); } // Should never happen as this is a required item return null; } + + /** + * Set a custom appearance handler for generating the annotations appearance streams. + * + * @param appearanceHandler + */ + public void setCustomAppearanceHandler(PDAppearanceHandler appearanceHandler) + { + customAppearanceHandler = appearanceHandler; + } + + @Override + public void constructAppearances() + { + this.constructAppearances(null); + } + + @Override + public void constructAppearances(PDDocument document) + { + if (customAppearanceHandler == null) + { + PDLinkAppearanceHandler appearanceHandler = new PDLinkAppearanceHandler(this, document); + appearanceHandler.generateAppearanceStreams(); + } + else + { + customAppearanceHandler.generateAppearanceStreams(); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationMarkup.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationMarkup.java index 42dbb83c6aa..b99a3c2eaae 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationMarkup.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationMarkup.java @@ -18,20 +18,39 @@ import java.io.IOException; import java.util.Calendar; + +import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDCaretAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDFreeTextAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDInkAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDPolygonAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDPolylineAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDSoundAppearanceHandler; + +// needed for the javadoc generation +import org.apache.pdfbox.pdmodel.interactive.form.PDVariableText; /** - * This class represents the additonal fields of a Markup type Annotation. See section 12.5.6 of ISO32000-1:2008 + * This class represents the additional fields of a Markup type Annotation. See section 12.5.6 of ISO32000-1:2008 * (starting with page 390) for details on annotation types. * * @author Paul King */ public class PDAnnotationMarkup extends PDAnnotation { + + private PDAppearanceHandler customAppearanceHandler; + /** * Constant for a FreeText type of annotation. */ @@ -57,6 +76,25 @@ public class PDAnnotationMarkup extends PDAnnotation */ public static final String SUB_TYPE_SOUND = "Sound"; + /* + * The various values of the free text annotation as defined in the PDF 1.7 reference Table 170 + */ + + /** + * A plain free-text annotation, also known as a text box comment. + */ + public static final String IT_FREE_TEXT = "FreeText"; + + /** + * A callout, associated with an area on the page through the callout line specified. + */ + public static final String IT_FREE_TEXT_CALLOUT = "FreeTextCallout"; + + /** + * The annotation is intended to function as a click-to-type or typewriter object. + */ + public static final String IT_FREE_TEXT_TYPE_WRITER = "FreeTextTypeWriter"; + /* * The various values of the reply type as defined in the PDF 1.7 reference Table 170 */ @@ -76,7 +114,6 @@ public class PDAnnotationMarkup extends PDAnnotation */ public PDAnnotationMarkup() { - super(); } /** @@ -355,4 +392,516 @@ public PDBorderStyleDictionary getBorderStyle() return null; } + /** + * This will set the line ending style. + * + * @param style The new style. + */ + public final void setLineEndingStyle(String style) + { + getCOSObject().setName(COSName.LE, style); + } + + /** + * This will retrieve the line ending style. + * + * @return The line ending style, possible values shown in the LE_ constants section, LE_NONE if + * missing, never null. + */ + public String getLineEndingStyle() + { + return getCOSObject().getNameAsString(COSName.LE, PDAnnotationLine.LE_NONE); + } + + // PDF 32000 specification has "the interior color with which to fill the annotation’s line endings" + // but it is the inside of the polygon. + + /** + * This will set interior color. + * + * @param ic color. + */ + public void setInteriorColor(PDColor ic) + { + getCOSObject().setItem(COSName.IC, ic.toCOSArray()); + } + + /** + * This will retrieve the interior color. + * + * @return object representing the color. + */ + public PDColor getInteriorColor() + { + return getColor(COSName.IC); + } + + /** + * This will set the border effect dictionary, specifying effects to be applied when drawing the + * line. This is supported by PDF 1.5 and higher. + * + * @param be The border effect dictionary to set. + * + */ + public void setBorderEffect(PDBorderEffectDictionary be) + { + getCOSObject().setItem(COSName.BE, be); + } + + /** + * This will retrieve the border effect dictionary, specifying effects to be applied used in + * drawing the line. + * + * @return The border effect dictionary + */ + public PDBorderEffectDictionary getBorderEffect() + { + COSDictionary be = (COSDictionary) getCOSObject().getDictionaryObject(COSName.BE); + if (be != null) + { + return new PDBorderEffectDictionary(be); + } + else + { + return null; + } + } + + /** + * Sets the paths that make this annotation. + * + * @param inkList An array of arrays, each representing a stroked path. Each array shall be a + * series of alternating horizontal and vertical coordinates. If the parameter is null the entry + * will be removed. + */ + public void setInkList(float[][] inkList) + { + if (inkList == null) + { + getCOSObject().removeItem(COSName.INKLIST); + return; + } + COSArray array = new COSArray(); + for (float[] path : inkList) + { + COSArray innerArray = new COSArray(); + innerArray.setFloatArray(path); + array.add(innerArray); + } + getCOSObject().setItem(COSName.INKLIST, array); + } + + /** + * Get one or more disjoint paths that make this annotation. + * + * @return An array of arrays, each representing a stroked path. Each array shall be a series of + * alternating horizontal and vertical coordinates. + */ + public float[][] getInkList() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.INKLIST); + if (base instanceof COSArray) + { + COSArray array = (COSArray) base; + float[][] inkList = new float[array.size()][]; + for (int i = 0; i < array.size(); ++i) + { + COSBase base2 = array.getObject(i); + if (base2 instanceof COSArray) + { + inkList[i] = ((COSArray) array.getObject(i)).toFloatArray(); + } + else + { + inkList[i] = new float[0]; + } + } + return inkList; + } + return new float[0][0]; + } + + /** + * Get the default appearance. + * + * @return a string describing the default appearance. + */ + public String getDefaultAppearance() + { + return getCOSObject().getString(COSName.DA); + } + + /** + * Set the default appearance. + * + * @param daValue a string describing the default appearance. + */ + public void setDefaultAppearance(String daValue) + { + getCOSObject().setString(COSName.DA, daValue); + } + + /** + * Get the default style string. + * + * The default style string defines the default style for rich text fields. + * + * @return the DS element of the dictionary object + */ + public String getDefaultStyleString() + { + return getCOSObject().getString(COSName.DS); + } + + /** + * Set the default style string. + * + * Providing null as the value will remove the default style string. + * + * @param defaultStyleString a string describing the default style. + */ + public void setDefaultStyleString(String defaultStyleString) + { + getCOSObject().setString(COSName.DS, defaultStyleString); + } + + /** + * This will get the 'quadding' or justification of the text to be displayed. + *
+ * 0 - Left (default)
+ * 1 - Centered
+ * 2 - Right
+ * Please see the QUADDING_CONSTANTS in {@link PDVariableText }. + * + * @return The justification of the text strings. + */ + public int getQ() + { + return getCOSObject().getInt(COSName.Q, 0); + } + + /** + * This will set the quadding/justification of the text. Please see the QUADDING_CONSTANTS + * in {@link PDVariableText }. + * + * @param q The new text justification. + */ + public void setQ(int q) + { + getCOSObject().setInt(COSName.Q, q); + } + + /** + * This will set the rectangle difference rectangle. Giving the difference between the + * annotations rectangle and where the drawing occurs. (To take account of any effects applied + * through the BE entry for example) + * + * @param rd the rectangle difference + * + */ + public void setRectDifference(PDRectangle rd) + { + getCOSObject().setItem(COSName.RD, rd); + } + + /** + * This will get the rectangle difference rectangle. Giving the difference between the + * annotations rectangle and where the drawing occurs. (To take account of any effects applied + * through the BE entry for example) + * + * @return the rectangle difference + */ + public PDRectangle getRectDifference() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.RD); + if (base instanceof COSArray) + { + return new PDRectangle((COSArray) base); + } + return null; + } + + /** + * This will set the difference between the annotations "outer" rectangle defined by + * /Rect and boundaries of the underlying. + * + *

This will set an equal difference for all sides

+ * + * @param difference from the annotations /Rect entry + */ + public void setRectDifferences(float difference) { + setRectDifferences(difference, difference, difference, difference); + } + + /** + * This will set the difference between the annotations "outer" rectangle defined by + * /Rect and the border. + * + * @param differenceLeft left difference from the annotations /Rect entry + * @param differenceTop top difference from the annotations /Rect entry + * @param differenceRight right difference from the annotations /Rect entry + * @param differenceBottom bottom difference from the annotations /Rect entry + * + */ + public void setRectDifferences(float differenceLeft, float differenceTop, float differenceRight, float differenceBottom) + { + COSArray margins = new COSArray(); + margins.add(new COSFloat(differenceLeft)); + margins.add(new COSFloat(differenceTop)); + margins.add(new COSFloat(differenceRight)); + margins.add(new COSFloat(differenceBottom)); + getCOSObject().setItem(COSName.RD, margins); + } + + /** + * This will get the margin between the annotations "outer" rectangle defined by + * /Rect and the boundaries of the underlying caret. + * + * @return the differences. If the entry hasn't been set am empty array is returned. + */ + public float[] getRectDifferences() + { + COSBase margin = getCOSObject().getItem(COSName.RD); + if (margin instanceof COSArray) + { + return ((COSArray) margin).toFloatArray(); + } + return new float[]{}; + } + + /** + * This will set the coordinates of the callout line. (PDF 1.6 and higher) Only relevant if the + * intent is FreeTextCallout. + * + * @param callout An array of four or six numbers specifying a callout line attached to the free + * text annotation. Six numbers [ x1 y1 x2 y2 x3 y3 ] represent the starting, knee point, and + * ending coordinates of the line in default user space, four numbers [ x1 y1 x2 y2 ] represent + * the starting and ending coordinates of the line. + */ + public final void setCallout(float[] callout) + { + COSArray newCallout = new COSArray(); + newCallout.setFloatArray(callout); + getCOSObject().setItem(COSName.CL, newCallout); + } + + /** + * This will get the coordinates of the callout line. (PDF 1.6 and higher) Only relevant if the + * intent is FreeTextCallout. + * + * @return An array of four or six numbers specifying a callout line attached to the free text + * annotation. Six numbers [ x1 y1 x2 y2 x3 y3 ] represent the starting, knee point, and ending + * coordinates of the line in default user space, four numbers [ x1 y1 x2 y2 ] represent the + * starting and ending coordinates of the line. + */ + public float[] getCallout() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.CL); + if (base instanceof COSArray) + { + return ((COSArray) base).toFloatArray(); + } + return null; + } + + /** + * This will set the line ending style for the start point, see the LE_ constants for the possible values. + * + * @param style The new style. + */ + public void setStartPointEndingStyle(String style) + { + String actualStyle = style == null ? PDAnnotationLine.LE_NONE : style; + COSBase base = getCOSObject().getDictionaryObject(COSName.LE); + COSArray array; + if (!(base instanceof COSArray) || ((COSArray) base).size() == 0) + { + array = new COSArray(); + array.add(COSName.getPDFName(actualStyle)); + array.add(COSName.getPDFName(PDAnnotationLine.LE_NONE)); + getCOSObject().setItem(COSName.LE, array); + } + else + { + array = (COSArray) base; + array.setName(0, actualStyle); + } + } + + /** + * This will retrieve the line ending style for the start point, possible values shown in the LE_ constants section. + * + * @return The ending style for the start point, LE_NONE if missing, never null. + */ + public String getStartPointEndingStyle() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.LE); + if (base instanceof COSArray && ((COSArray) base).size() >= 2) + { + return ((COSArray) base).getName(0, PDAnnotationLine.LE_NONE); + } + return PDAnnotationLine.LE_NONE; + } + + /** + * This will set the line ending style for the end point, see the LE_ constants for the possible values. + * + * @param style The new style. + */ + public void setEndPointEndingStyle(String style) + { + String actualStyle = style == null ? PDAnnotationLine.LE_NONE : style; + COSBase base = getCOSObject().getDictionaryObject(COSName.LE); + COSArray array; + if (!(base instanceof COSArray) || ((COSArray) base).size() < 2) + { + array = new COSArray(); + array.add(COSName.getPDFName(PDAnnotationLine.LE_NONE)); + array.add(COSName.getPDFName(actualStyle)); + getCOSObject().setItem(COSName.LE, array); + } + else + { + array = (COSArray) base; + array.setName(1, actualStyle); + } + } + + /** + * This will retrieve the line ending style for the end point, possible values shown in the LE_ constants section. + * + * @return The ending style for the end point, LE_NONE if missing, never null. + */ + public String getEndPointEndingStyle() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.LE); + if (base instanceof COSArray && ((COSArray) base).size() >= 2) + { + return ((COSArray) base).getName(1, PDAnnotationLine.LE_NONE); + } + return PDAnnotationLine.LE_NONE; + } + + + /** + * This will retrieve the numbers that shall represent the alternating horizontal and vertical + * coordinates. + * + * @return An array of floats representing the alternating horizontal and vertical coordinates. + */ + public float[] getVertices() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.VERTICES); + if (base instanceof COSArray) + { + return ((COSArray) base).toFloatArray(); + } + return null; + } + + /** + * This will set the numbers that shall represent the alternating horizontal and vertical + * coordinates. + * + * @param points an array with the numbers that shall represent the alternating horizontal and + * vertical coordinates. + */ + public void setVertices(float[] points) + { + COSArray ar = new COSArray(); + ar.setFloatArray(points); + getCOSObject().setItem(COSName.VERTICES, ar); + } + + + /** + * PDF 2.0: This will retrieve the arrays that shall represent the alternating horizontal + * and vertical coordinates for path building. + * + * @return An array of float arrays, each supplying the operands for a path building operator + * (m, l or c). The first array should have 2 elements, the others should have 2 or 6 elements. + */ + public float[][] getPath() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.PATH); + if (base instanceof COSArray) + { + COSArray array = (COSArray) base; + float[][] pathArray = new float[array.size()][]; + for (int i = 0; i < array.size(); ++i) + { + COSBase base2 = array.getObject(i); + if (base2 instanceof COSArray) + { + pathArray[i] = ((COSArray) array.getObject(i)).toFloatArray(); + } + else + { + pathArray[i] = new float[0]; + } + } + return pathArray; + } + return null; + } + + /** + * Set a custom appearance handler for generating the annotations appearance streams. + * + * @param appearanceHandler + */ + public void setCustomAppearanceHandler(PDAppearanceHandler appearanceHandler) + { + customAppearanceHandler = appearanceHandler; + } + + @Override + public void constructAppearances() + { + this.constructAppearances(null); + } + + @Override + public void constructAppearances(PDDocument document) + { + if (customAppearanceHandler == null) + { + PDAppearanceHandler appearanceHandler = null; + if (SUB_TYPE_CARET.equals(getSubtype())) + { + appearanceHandler = new PDCaretAppearanceHandler(this, document); + } + else if (SUB_TYPE_FREETEXT.equals(getSubtype())) + { + appearanceHandler = new PDFreeTextAppearanceHandler(this, document); + } + else if (SUB_TYPE_INK.equals(getSubtype())) + { + appearanceHandler = new PDInkAppearanceHandler(this, document); + } + else if (SUB_TYPE_POLYGON.equals(getSubtype())) + { + appearanceHandler = new PDPolygonAppearanceHandler(this, document); + } + else if (SUB_TYPE_POLYLINE.equals(getSubtype())) + { + appearanceHandler = new PDPolylineAppearanceHandler(this, document); + } + else if (SUB_TYPE_SOUND.equals(getSubtype())) + { + appearanceHandler = new PDSoundAppearanceHandler(this, document); + } + + if (appearanceHandler != null) + { + appearanceHandler.generateAppearanceStreams(); + } + } + else + { + customAppearanceHandler.generateAppearanceStreams(); + } + } + + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationPopup.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationPopup.java index 22c642ee548..e1e29dc85ba 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationPopup.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationPopup.java @@ -39,14 +39,13 @@ public class PDAnnotationPopup extends PDAnnotation */ public PDAnnotationPopup() { - super(); - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** * Creates a popup annotation from a COSDictionary, expected to be a correct object definition. * - * @param field the PDF objet to represent as a field. + * @param field the PDF object to represent as a field. */ public PDAnnotationPopup(COSDictionary field) { @@ -54,7 +53,7 @@ public PDAnnotationPopup(COSDictionary field) } /** - * This will set inital state of the annotation, open or closed. + * This will set the initial state of the annotation, open or closed. * * @param open Boolean value, true = open false = closed. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationRubberStamp.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationRubberStamp.java index 82dbbe9e750..93a17bcf6b0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationRubberStamp.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationRubberStamp.java @@ -98,14 +98,13 @@ public class PDAnnotationRubberStamp extends PDAnnotationMarkup */ public PDAnnotationRubberStamp() { - super(); - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** * Creates a Rubber Stamp annotation from a COSDictionary, expected to be a correct object definition. * - * @param field the PDF objet to represent as a field. + * @param field the PDF object to represent as a field. */ public PDAnnotationRubberStamp(COSDictionary field) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationSquareCircle.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationSquareCircle.java index 2e36b7a15ab..f7fa563d3f9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationSquareCircle.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationSquareCircle.java @@ -19,9 +19,14 @@ import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDCircleAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDSquareAppearanceHandler; /** * This is the class that represents a rectangular or eliptical annotation Introduced in PDF 1.3 specification . @@ -36,10 +41,12 @@ public class PDAnnotationSquareCircle extends PDAnnotationMarkup */ public static final String SUB_TYPE_SQUARE = "Square"; /** - * Constant for an Eliptical type of annotation. + * Constant for an elliptical type of annotation. */ public static final String SUB_TYPE_CIRCLE = "Circle"; + private PDAppearanceHandler customAppearanceHandler; + /** * Creates a Circle or Square annotation of the specified sub type. * @@ -47,14 +54,13 @@ public class PDAnnotationSquareCircle extends PDAnnotationMarkup */ public PDAnnotationSquareCircle(String subType) { - super(); setSubtype(subType); } /** * Creates a Line annotation from a COSDictionary, expected to be a correct object definition. * - * @param field the PDF objet to represent as a field. + * @param field the PDF object to represent as a field. */ public PDAnnotationSquareCircle(COSDictionary field) { @@ -192,4 +198,93 @@ public PDBorderStyleDictionary getBorderStyle() return null; } + /** + * This will set the difference between the annotations "outer" rectangle defined by /Rect and + * the border. + * + *

+ * This will set an equal difference for all sides

+ * + * @param difference from the annotations /Rect entry + */ + public void setRectDifferences(float difference) + { + setRectDifferences(difference, difference, difference, difference); + } + + /** + * This will set the difference between the annotations "outer" rectangle defined by + * /Rect and the border. + * + * @param differenceLeft left difference from the annotations /Rect entry + * @param differenceTop top difference from the annotations /Rect entry + * @param differenceRight right difference from the annotations /Rect entry + * @param differenceBottom bottom difference from the annotations /Rect entry + * + */ + public void setRectDifferences(float differenceLeft, float differenceTop, float differenceRight, float differenceBottom) + { + COSArray margins = new COSArray(); + margins.add(new COSFloat(differenceLeft)); + margins.add(new COSFloat(differenceTop)); + margins.add(new COSFloat(differenceRight)); + margins.add(new COSFloat(differenceBottom)); + getCOSObject().setItem(COSName.RD, margins); + } + + /** + * This will get the differences between the annotations "outer" rectangle defined by + * /Rect and the border. + * + * @return the differences. If the entry hasn't been set am empty array is returned. + */ + public float[] getRectDifferences() + { + COSBase margin = getCOSObject().getItem(COSName.RD); + if (margin instanceof COSArray) + { + return ((COSArray) margin).toFloatArray(); + } + return new float[]{}; + } + + /** + * Set a custom appearance handler for generating the annotations appearance streams. + * + * @param appearanceHandler + */ + @Override + public void setCustomAppearanceHandler(PDAppearanceHandler appearanceHandler) + { + customAppearanceHandler = appearanceHandler; + } + + @Override + public void constructAppearances() + { + this.constructAppearances(null); + } + + @Override + public void constructAppearances(PDDocument document) + { + if (customAppearanceHandler == null) + { + if (SUB_TYPE_CIRCLE.equals(getSubtype())) + { + PDCircleAppearanceHandler appearanceHandler = new PDCircleAppearanceHandler(this, document); + appearanceHandler.generateAppearanceStreams(); + } + else if (SUB_TYPE_SQUARE.equals(getSubtype())) + { + PDSquareAppearanceHandler appearanceHandler = new PDSquareAppearanceHandler(this, document); + appearanceHandler.generateAppearanceStreams(); + } + } + else + { + customAppearanceHandler.generateAppearanceStreams(); + } + } + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationText.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationText.java index 60ae8d87af4..1554c65b051 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationText.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationText.java @@ -18,6 +18,9 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDTextAppearanceHandler; /** * This is the class that represents a text annotation. @@ -26,6 +29,7 @@ */ public class PDAnnotationText extends PDAnnotationMarkup { + private PDAppearanceHandler customAppearanceHandler; /* * The various values of the Text as defined in the PDF 1.7 reference Table 172 @@ -66,6 +70,51 @@ public class PDAnnotationText extends PDAnnotationMarkup */ public static final String NAME_INSERT = "Insert"; + /** + * Constant for the name of a circle annotation. + */ + public static final String NAME_CIRCLE = "Circle"; + + /** + * Constant for the name of a cross annotation. + */ + public static final String NAME_CROSS = "Cross"; + + /** + * Constant for the name of a star annotation. + */ + public static final String NAME_STAR = "Star"; + + /** + * Constant for the name of a check annotation. + */ + public static final String NAME_CHECK = "Check"; + + /** + * Constant for the name of a right arrow annotation. + */ + public static final String NAME_RIGHT_ARROW = "RightArrow"; + + /** + * Constant for the name of a right pointer annotation. + */ + public static final String NAME_RIGHT_POINTER = "RightPointer"; + + /** + * Constant for the name of a crosshairs annotation. + */ + public static final String NAME_UP_ARROW = "UpArrow"; + + /** + * Constant for the name of a crosshairs annotation. + */ + public static final String NAME_UP_LEFT_ARROW = "UpLeftArrow"; + + /** + * Constant for the name of a crosshairs annotation. + */ + public static final String NAME_CROSS_HAIRS = "CrossHairs"; + /** * The type of annotation. */ @@ -76,8 +125,7 @@ public class PDAnnotationText extends PDAnnotationMarkup */ public PDAnnotationText() { - super(); - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** @@ -172,4 +220,35 @@ public void setStateModel(String stateModel) this.getCOSObject().setString(COSName.STATE_MODEL, stateModel); } + /** + * Set a custom appearance handler for generating the annotations appearance streams. + * + * @param appearanceHandler + */ + @Override + public void setCustomAppearanceHandler(PDAppearanceHandler appearanceHandler) + { + customAppearanceHandler = appearanceHandler; + } + + @Override + public void constructAppearances() + { + this.constructAppearances(null); + } + + @Override + public void constructAppearances(PDDocument document) + { + if (customAppearanceHandler == null) + { + PDTextAppearanceHandler appearanceHandler = new PDTextAppearanceHandler(this, document); + appearanceHandler.generateAppearanceStreams(); + } + else + { + customAppearanceHandler.generateAppearanceStreams(); + } + } + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationTextMarkup.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationTextMarkup.java index b6dab042ed5..57637b1a8f1 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationTextMarkup.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationTextMarkup.java @@ -18,7 +18,14 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDHighlightAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDSquigglyAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDStrikeoutAppearanceHandler; +import org.apache.pdfbox.pdmodel.interactive.annotation.handlers.PDUnderlineAppearanceHandler; /** * This is the abstract class that represents a text markup annotation Introduced in PDF 1.3 specification, except @@ -28,7 +35,8 @@ */ public class PDAnnotationTextMarkup extends PDAnnotationMarkup { - + private PDAppearanceHandler customAppearanceHandler; + /** * The types of annotation. */ @@ -58,7 +66,6 @@ private PDAnnotationTextMarkup() */ public PDAnnotationTextMarkup(String subType) { - super(); setSubtype(subType); // Quad points are required, set and empty array @@ -68,7 +75,7 @@ public PDAnnotationTextMarkup(String subType) /** * Creates a TextMarkup annotation from a COSDictionary, expected to be a correct object definition. * - * @param field the PDF objet to represent as a field. + * @param field the PDF object to represent as a field. */ public PDAnnotationTextMarkup(COSDictionary field) { @@ -94,15 +101,13 @@ public void setQuadPoints(float[] quadPoints) */ public float[] getQuadPoints() { - COSArray quadPoints = (COSArray) getCOSObject().getDictionaryObject(COSName.QUADPOINTS); - if (quadPoints != null) - { - return quadPoints.toFloatArray(); - } - else + COSBase base = getCOSObject().getDictionaryObject(COSName.QUADPOINTS); + if (base instanceof COSArray) { - return null; // Should never happen as this is a required item + return ((COSArray) base).toFloatArray(); } + // Should never happen as this is a required item + return null; } /** @@ -126,4 +131,54 @@ public String getSubtype() return getCOSObject().getNameAsString(COSName.SUBTYPE); } + /** + * Set a custom appearance handler for generating the annotations appearance streams. + * + * @param appearanceHandler + */ + @Override + public void setCustomAppearanceHandler(PDAppearanceHandler appearanceHandler) + { + customAppearanceHandler = appearanceHandler; + } + + @Override + public void constructAppearances() + { + this.constructAppearances(null); + } + + @Override + public void constructAppearances(PDDocument document) + { + if (customAppearanceHandler == null) + { + PDAppearanceHandler appearanceHandler = null; + if (SUB_TYPE_HIGHLIGHT.equals(getSubtype())) + { + appearanceHandler = new PDHighlightAppearanceHandler(this, document); + } + else if (SUB_TYPE_SQUIGGLY.equals(getSubtype())) + { + appearanceHandler = new PDSquigglyAppearanceHandler(this, document); + } + else if (SUB_TYPE_STRIKEOUT.equals(getSubtype())) + { + appearanceHandler = new PDStrikeoutAppearanceHandler(this, document); + } + else if (SUB_TYPE_UNDERLINE.equals(getSubtype())) + { + appearanceHandler = new PDUnderlineAppearanceHandler(this, document); + } + + if (appearanceHandler != null) + { + appearanceHandler.generateAppearanceStreams(); + } + } + else + { + customAppearanceHandler.generateAppearanceStreams(); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationUnknown.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationUnknown.java index 9e2734b6bcd..494deff6f91 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationUnknown.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationUnknown.java @@ -19,7 +19,7 @@ import org.apache.pdfbox.cos.COSDictionary; /** - * This is the class that represents an arbitary Unknown Annotation type. + * This is the class that represents an arbitrary Unknown Annotation type. * * @author Paul King */ @@ -27,7 +27,7 @@ public class PDAnnotationUnknown extends PDAnnotation { /** - * Creates an arbitary annotation from a COSDictionary, expected to be a correct object definition for some sort of + * Creates an arbitrary annotation from a COSDictionary, expected to be a correct object definition for some sort of * annotation. * * @param dic The dictionary which represents this Annotation. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationWidget.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationWidget.java index 682e019507d..c5fba83caac 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationWidget.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAnnotationWidget.java @@ -22,9 +22,12 @@ import org.apache.pdfbox.pdmodel.interactive.action.PDActionFactory; import org.apache.pdfbox.pdmodel.interactive.action.PDAnnotationAdditionalActions; import org.apache.pdfbox.pdmodel.interactive.action.PDAction; +import org.apache.pdfbox.pdmodel.interactive.form.PDTerminalField; /** - * This is the class that represents a widget. + * This is the class that represents a widget annotation. This represents the + * appearance of a field and manages user interactions. A field may have several + * widget annotations, which may be on several pages. * * @author Ben Litchfield */ @@ -40,7 +43,6 @@ public class PDAnnotationWidget extends PDAnnotation */ public PDAnnotationWidget() { - super(); getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } @@ -144,8 +146,12 @@ public void setAppearanceCharacteristics( */ public PDAction getAction() { - COSDictionary action = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.A); - return PDActionFactory.createAction(action); + COSBase base = this.getCOSObject().getDictionaryObject(COSName.A); + if (base instanceof COSDictionary) + { + return PDActionFactory.createAction((COSDictionary) base); + } + return null; } /** @@ -166,13 +172,12 @@ public void setAction(PDAction action) */ public PDAnnotationAdditionalActions getActions() { - COSDictionary aa = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.AA); - PDAnnotationAdditionalActions retval = null; - if (aa != null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.AA); + if (base instanceof COSDictionary) { - retval = new PDAnnotationAdditionalActions(aa); + return new PDAnnotationAdditionalActions((COSDictionary) base); } - return retval; + return null; } /** @@ -222,4 +227,23 @@ public PDBorderStyleDictionary getBorderStyle() // } // return null; // } + + /** + * Set the parent field of a widget annotation. This is only required if this widget annotation + * is one of multiple children in a field, and not to be set otherwise. You will usually not + * need this, because in most cases, fields have only one widget and share a common dictionary. + * A usage can be found in the CreateMultiWidgetsForm example. + * + * @param field the parent field. + * @throws IllegalArgumentException if setParent() was called for a field that shares a + * dictionary with its only widget. + */ + public void setParent(PDTerminalField field) + { + if (this.getCOSObject().equals(field.getCOSObject())) + { + throw new IllegalArgumentException("setParent() is not to be called for a field that shares a dictionary with its only widget"); + } + this.getCOSObject().setItem(COSName.PARENT, field); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceCharacteristicsDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceCharacteristicsDictionary.java index 336f98281dc..437c6bd211e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceCharacteristicsDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceCharacteristicsDictionary.java @@ -126,7 +126,7 @@ public void setBackground(PDColor c) */ public String getNormalCaption() { - return this.getCOSObject().getString("CA"); + return this.getCOSObject().getString(COSName.CA); } /** @@ -136,7 +136,7 @@ public String getNormalCaption() */ public void setNormalCaption(String caption) { - this.getCOSObject().setString("CA", caption); + this.getCOSObject().setString(COSName.CA, caption); } /** @@ -146,7 +146,7 @@ public void setNormalCaption(String caption) */ public String getRolloverCaption() { - return this.getCOSObject().getString("RC"); + return this.getCOSObject().getString(COSName.RC); } /** @@ -156,7 +156,7 @@ public String getRolloverCaption() */ public void setRolloverCaption(String caption) { - this.getCOSObject().setString("RC", caption); + this.getCOSObject().setString(COSName.RC, caption); } /** @@ -166,7 +166,7 @@ public void setRolloverCaption(String caption) */ public String getAlternateCaption() { - return this.getCOSObject().getString("AC"); + return this.getCOSObject().getString(COSName.AC); } /** @@ -176,7 +176,7 @@ public String getAlternateCaption() */ public void setAlternateCaption(String caption) { - this.getCOSObject().setString("AC", caption); + this.getCOSObject().setString(COSName.AC, caption); } /** @@ -186,7 +186,7 @@ public void setAlternateCaption(String caption) */ public PDFormXObject getNormalIcon() { - COSBase i = this.getCOSObject().getDictionaryObject("I"); + COSBase i = this.getCOSObject().getDictionaryObject(COSName.I); if (i instanceof COSStream) { return new PDFormXObject((COSStream)i); @@ -201,7 +201,7 @@ public PDFormXObject getNormalIcon() */ public PDFormXObject getRolloverIcon() { - COSBase i = this.getCOSObject().getDictionaryObject("RI"); + COSBase i = this.getCOSObject().getDictionaryObject(COSName.RI); if (i instanceof COSStream) { return new PDFormXObject((COSStream)i); @@ -216,7 +216,7 @@ public PDFormXObject getRolloverIcon() */ public PDFormXObject getAlternateIcon() { - COSBase i = this.getCOSObject().getDictionaryObject("IX"); + COSBase i = this.getCOSObject().getDictionaryObject(COSName.IX); if (i instanceof COSStream) { return new PDFormXObject((COSStream)i); @@ -229,7 +229,7 @@ private PDColor getColor(COSName itemName) COSBase c = this.getCOSObject().getItem(itemName); if (c instanceof COSArray) { - PDColorSpace colorSpace = null; + PDColorSpace colorSpace; switch (((COSArray) c).size()) { case 1: @@ -242,7 +242,7 @@ private PDColor getColor(COSName itemName) colorSpace = PDDeviceCMYK.INSTANCE; break; default: - break; + return null; } return new PDColor((COSArray) c, colorSpace); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceDictionary.java index 448e9bc0852..cbfc776024f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceDictionary.java @@ -66,14 +66,11 @@ public COSDictionary getCOSObject() public PDAppearanceEntry getNormalAppearance() { COSBase entry = dictionary.getDictionaryObject(COSName.N); - if (entry == null) - { - return null; - } - else + if (entry instanceof COSDictionary) { return new PDAppearanceEntry(entry); } + return null; } /** @@ -107,13 +104,13 @@ public void setNormalAppearance(PDAppearanceStream ap) public PDAppearanceEntry getRolloverAppearance() { COSBase entry = dictionary.getDictionaryObject(COSName.R); - if (entry == null) + if (entry instanceof COSDictionary) { - return getNormalAppearance(); + return new PDAppearanceEntry(entry); } else { - return new PDAppearanceEntry(entry); + return getNormalAppearance(); } } @@ -148,13 +145,13 @@ public void setRolloverAppearance(PDAppearanceStream ap) public PDAppearanceEntry getDownAppearance() { COSBase entry = dictionary.getDictionaryObject(COSName.D); - if (entry == null) + if (entry instanceof COSDictionary) { - return getNormalAppearance(); + return new PDAppearanceEntry(entry); } else { - return new PDAppearanceEntry(entry); + return getNormalAppearance(); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceEntry.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceEntry.java index 6a525e76743..c0d20b8049d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceEntry.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDAppearanceEntry.java @@ -80,7 +80,7 @@ public PDAppearanceStream getAppearanceStream() { if (!isStream()) { - throw new IllegalStateException(); + throw new IllegalStateException("This entry is not an appearance stream"); } return new PDAppearanceStream((COSStream) entry); } @@ -94,7 +94,7 @@ public Map getSubDictionary() { if (!isSubDictionary()) { - throw new IllegalStateException(); + throw new IllegalStateException("This entry is not an appearance subdictionary"); } COSDictionary dict = (COSDictionary) entry; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDBorderEffectDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDBorderEffectDictionary.java index 7f5c40ea7b8..7cdbc6d534a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDBorderEffectDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDBorderEffectDictionary.java @@ -106,7 +106,7 @@ public void setStyle(String s) /** * This will retrieve the border effect, see the STYLE_* constants for valid values. * - * @return the effect of the border + * @return the effect of the border or {@link #STYLE_SOLID} if none is found. */ public String getStyle() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDBorderStyleDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDBorderStyleDictionary.java index f58bcd39b01..08ded72474c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDBorderStyleDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/PDBorderStyleDictionary.java @@ -19,6 +19,7 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSInteger; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.pdmodel.graphics.PDLineDashPattern; @@ -94,21 +95,41 @@ public COSDictionary getCOSObject() /** * This will set the border width in points, 0 = no border. * + * There is a bug in Adobe Reader DC, float values are ignored for text field widgets. As a + * workaround, floats that are integers (e.g. 2.0) are written as integer in the PDF. + *

+ * In Adobe Acrobat DC, the values are shown as "0 = Invisible, 1 = Thin, 2 = Medium, 3 = Thick" + * for widget and link annotations. + * * @param w float the width in points */ public void setWidth(float w) { - getCOSObject().setFloat("W", w); + // PDFBOX-3929 workaround + if (w == (int) w) + { + getCOSObject().setInt(COSName.W, (int) w); + } + else + { + getCOSObject().setFloat(COSName.W, w); + } } /** * This will retrieve the border width in points, 0 = no border. * - * @return flaot the width of the border in points + * @return The width of the border in points. */ public float getWidth() { - return getCOSObject().getFloat("W", 1); + if (getCOSObject().getDictionaryObject(COSName.W) instanceof COSName) + { + // replicate Adobe behavior although it contradicts the specification + // https://github.com/mozilla/pdf.js/issues/10385 + return 0; + } + return getCOSObject().getFloat(COSName.W, 1); } /** @@ -118,7 +139,7 @@ public float getWidth() */ public void setStyle(String s) { - getCOSObject().setName("S", s); + getCOSObject().setName(COSName.S, s); } /** @@ -128,7 +149,7 @@ public void setStyle(String s) */ public String getStyle() { - return getCOSObject().getNameAsString("S", STYLE_SOLID); + return getCOSObject().getNameAsString(COSName.S, STYLE_SOLID); } /** @@ -138,12 +159,7 @@ public String getStyle() */ public void setDashStyle(COSArray dashArray) { - COSArray array = null; - if (dashArray != null) - { - array = dashArray; - } - getCOSObject().setItem("D", array); + getCOSObject().setItem(COSName.D, dashArray); } /** @@ -153,12 +169,12 @@ public void setDashStyle(COSArray dashArray) */ public PDLineDashPattern getDashStyle() { - COSArray d = (COSArray) getCOSObject().getDictionaryObject("D"); + COSArray d = (COSArray) getCOSObject().getDictionaryObject(COSName.D); if (d == null) { d = new COSArray(); d.add(COSInteger.THREE); - getCOSObject().setItem("D", d); + getCOSObject().setItem(COSName.D, d); } return new PDLineDashPattern(d, 0); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/AnnotationBorder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/AnnotationBorder.java new file mode 100644 index 00000000000..5e4e9c0909a --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/AnnotationBorder.java @@ -0,0 +1,88 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; + +/** + * Class to collect all sort of border info about annotations. + * + * @author Tilman Hausherr + */ +class AnnotationBorder +{ + float[] dashArray = null; + boolean underline = false; + float width = 0; + + // return border info. BorderStyle must be provided as parameter because + // method is not available in the base class + static AnnotationBorder getAnnotationBorder(PDAnnotation annotation, + PDBorderStyleDictionary borderStyle) + { + AnnotationBorder ab = new AnnotationBorder(); + if (borderStyle == null) + { + COSArray border = annotation.getBorder(); + if (border.size() >= 3 && border.getObject(2) instanceof COSNumber) + { + ab.width = ((COSNumber) border.getObject(2)).floatValue(); + } + if (border.size() > 3) + { + COSBase base3 = border.getObject(3); + if (base3 instanceof COSArray) + { + ab.dashArray = ((COSArray) base3).toFloatArray(); + } + } + } + else + { + ab.width = borderStyle.getWidth(); + if (borderStyle.getStyle().equals(PDBorderStyleDictionary.STYLE_DASHED)) + { + ab.dashArray = borderStyle.getDashStyle().getDashArray(); + } + if (borderStyle.getStyle().equals(PDBorderStyleDictionary.STYLE_UNDERLINE)) + { + ab.underline = true; + } + } + if (ab.dashArray != null) + { + boolean allZero = true; + for (float f : ab.dashArray) + { + if (Float.compare(f, 0) != 0) + { + allZero = false; + break; + } + } + if (allZero) + { + ab.dashArray = null; + } + } + return ab; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/CloudyBorder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/CloudyBorder.java new file mode 100644 index 00000000000..6855b378c9e --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/CloudyBorder.java @@ -0,0 +1,1105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.awt.geom.AffineTransform; +import java.awt.geom.Ellipse2D; +import java.awt.geom.PathIterator; +import java.awt.geom.Point2D; +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; + +/** + * Generates annotation appearances with a cloudy border. + *

+ * Dashed stroke styles are not recommended with cloudy borders. The result would + * not look good because some parts of the arcs are traced twice by the stroked + * path. Actually Acrobat Reader's line style dialog does not allow to choose a + * dashed and a cloudy style at the same time. + */ + +class CloudyBorder +{ + private static final double ANGLE_180_DEG = Math.PI; + private static final double ANGLE_90_DEG = Math.PI / 2; + private static final double ANGLE_34_DEG = Math.toRadians(34); + private static final double ANGLE_30_DEG = Math.toRadians(30); + private static final double ANGLE_12_DEG = Math.toRadians(12); + + private final PDAppearanceContentStream output; + private final PDRectangle annotRect; + private final double intensity; + private final double lineWidth; + private PDRectangle rectWithDiff; + private boolean outputStarted = false; + private double bboxMinX; + private double bboxMinY; + private double bboxMaxX; + private double bboxMaxY; + + /** + * Creates a new CloudyBorder that writes to the specified + * content stream. + * + * @param stream content stream + * @param intensity intensity of cloudy effect (entry I); typically 1.0 or 2.0 + * @param lineWidth line width for annotation border (entry W) + * @param rect annotation rectangle (entry Rect) + */ + CloudyBorder(PDAppearanceContentStream stream, double intensity, + double lineWidth, PDRectangle rect) + { + this.output = stream; + this.intensity = intensity; + this.lineWidth = lineWidth; + this.annotRect = rect; + } + + /** + * Creates a cloudy border for a rectangular annotation. + * The rectangle is specified by the RD entry and the + * Rect entry that was passed in to the constructor. + *

+ * This can be used for Square and FreeText annotations. However, this does + * not produce the text and the callout line for FreeTexts. + * + * @param rd entry RD, or null if the entry does not exist + * @throws IOException If there is an error writing to the stream. + */ + void createCloudyRectangle(PDRectangle rd) throws IOException + { + rectWithDiff = applyRectDiff(rd, lineWidth / 2); + double left = rectWithDiff.getLowerLeftX(); + double bottom = rectWithDiff.getLowerLeftY(); + double right = rectWithDiff.getUpperRightX(); + double top = rectWithDiff.getUpperRightY(); + + cloudyRectangleImpl(left, bottom, right, top, false); + finish(); + } + + /** + * Creates a cloudy border for a Polygon annotation. + * + * @param path polygon path + * @throws IOException If there is an error writing to the stream. + */ + void createCloudyPolygon(float[][] path) throws IOException + { + int n = path.length; + Point2D.Double[] polygon = new Point2D.Double[n]; + + for (int i = 0; i < n; i++) + { + float[] array = path[i]; + if (array.length == 2) + { + polygon[i] = new Point2D.Double(array[0], array[1]); + } + else if (array.length == 6) + { + // TODO Curve segments are not yet supported in cloudy border. + polygon[i] = new Point2D.Double(array[4], array[5]); + } + } + + cloudyPolygonImpl(polygon, false); + finish(); + } + + /** + * Creates a cloudy border for a Circle annotation. + * The ellipse is specified by the RD entry and the + * Rect entry that was passed in to the constructor. + * + * @param rd entry RD, or null if the entry does not exist + * @throws IOException If there is an error writing to the stream. + */ + void createCloudyEllipse(PDRectangle rd) throws IOException + { + rectWithDiff = applyRectDiff(rd, 0); + double left = rectWithDiff.getLowerLeftX(); + double bottom = rectWithDiff.getLowerLeftY(); + double right = rectWithDiff.getUpperRightX(); + double top = rectWithDiff.getUpperRightY(); + + cloudyEllipseImpl(left, bottom, right, top); + finish(); + } + + /** + * Returns the BBox entry (bounding box) for the + * appearance stream form XObject. + * + * @return Bounding box for appearance stream form XObject. + */ + PDRectangle getBBox() + { + return getRectangle(); + } + + /** + * Returns the updated Rect entry for the annotation. + * The rectangle completely contains the cloudy border. + * + * @return Annotation Rect. + */ + PDRectangle getRectangle() + { + return new PDRectangle((float)bboxMinX, (float)bboxMinY, + (float)(bboxMaxX - bboxMinX), (float)(bboxMaxY - bboxMinY)); + } + + /** + * Returns the Matrix entry for the appearance stream form XObject. + * + * @return Matrix for appearance stream form XObject. + */ + AffineTransform getMatrix() + { + return AffineTransform.getTranslateInstance(-bboxMinX, -bboxMinY); + } + + /** + * Returns the updated RD entry for Square and Circle annotations. + * + * @return Annotation RD value. + */ + PDRectangle getRectDifference() + { + if (annotRect == null) + { + float d = (float)lineWidth / 2; + return new PDRectangle(d, d, (float)lineWidth, (float)lineWidth); + } + + PDRectangle re = (rectWithDiff != null) ? rectWithDiff : annotRect; + + float left = re.getLowerLeftX() - (float)bboxMinX; + float bottom = re.getLowerLeftY() - (float)bboxMinY; + float right = (float)bboxMaxX - re.getUpperRightX(); + float top = (float)bboxMaxY - re.getUpperRightY(); + + return new PDRectangle(left, bottom, right - left, top - bottom); + } + + private static double cosine(double dx, double hypot) + { + if (Double.compare(hypot, 0.0) == 0) + { + return 0; + } + return dx / hypot; + } + + private static double sine(double dy, double hypot) + { + if (Double.compare(hypot, 0.0) == 0) + { + return 0; + } + return dy / hypot; + } + + /** + * Cloudy rectangle implementation is based on converting the rectangle + * to a polygon. + */ + private void cloudyRectangleImpl(double left, double bottom, + double right, double top, boolean isEllipse) throws IOException + { + double w = right - left; + double h = top - bottom; + + if (intensity <= 0.0) + { + output.addRect((float)left, (float)bottom, (float)w, (float)h); + bboxMinX = left; + bboxMinY = bottom; + bboxMaxX = right; + bboxMaxY = top; + return; + } + + // Make a polygon with direction equal to the positive angle direction. + Point2D.Double[] polygon; + + if (w < 1.0) + { + polygon = new Point2D.Double[] + { + new Point2D.Double(left, bottom), new Point2D.Double(left, top), + new Point2D.Double(left, bottom) + }; + } + else if (h < 1.0) + { + polygon = new Point2D.Double[] + { + new Point2D.Double(left, bottom), new Point2D.Double(right, bottom), + new Point2D.Double(left, bottom) + }; + } + else + { + polygon = new Point2D.Double[] + { + new Point2D.Double(left, bottom), new Point2D.Double(right, bottom), + new Point2D.Double(right, top), new Point2D.Double(left, top), + new Point2D.Double(left, bottom) + }; + } + + cloudyPolygonImpl(polygon, isEllipse); + } + + /** + * Cloudy polygon implementation. + * + * @param vertices polygon vertices; first and last point must be equal + * @param isEllipse specifies if the polygon represents an ellipse + */ + private void cloudyPolygonImpl(Point2D.Double[] vertices, boolean isEllipse) + throws IOException + { + Point2D.Double[] polygon = removeZeroLengthSegments(vertices); + getPositivePolygon(polygon); + int numPoints = polygon.length; + + if (numPoints < 2) + { + return; + } + if (intensity <= 0.0) + { + moveTo(polygon[0]); + for (int i = 1; i < numPoints; i++) + { + lineTo(polygon[i]); + } + return; + } + + double cloudRadius = isEllipse ? getEllipseCloudRadius() : getPolygonCloudRadius(); + + if (cloudRadius < 0.5) + { + cloudRadius = 0.5; + } + + final double k = Math.cos(ANGLE_34_DEG); + final double advIntermDefault = 2 * k * cloudRadius; + final double advCornerDefault = k * cloudRadius; + double[] array = new double[2]; + double anglePrev = 0; + + // The number of curls per polygon segment is hardly ever an integer, + // so the length of some curls must be adjustable. We adjust the angle + // of the trailing arc of corner curls and the leading arc of the first + // intermediate curl. + // In each polygon segment, we have n intermediate curls plus one half of a + // corner curl at each end. One of the n intermediate curls is adjustable. + // Thus the number of fixed (or unadjusted) intermediate curls is n - 1. + + // Find the adjusted angle `alpha` for the first corner curl. + int n0 = computeParamsPolygon(advIntermDefault, advCornerDefault, k, cloudRadius, + polygon[numPoints - 2].distance(polygon[0]), array); + double alphaPrev = (n0 == 0) ? array[0] : ANGLE_34_DEG; + + for (int j = 0; j + 1 < numPoints; j++) + { + Point2D.Double pt = polygon[j]; + Point2D.Double ptNext = polygon[j + 1]; + double length = pt.distance(ptNext); + if (Double.compare(length, 0.0) == 0) + { + alphaPrev = ANGLE_34_DEG; + continue; + } + + // n is the number of intermediate curls in the current polygon segment. + int n = computeParamsPolygon(advIntermDefault, advCornerDefault, k, + cloudRadius, length, array); + if (n < 0) + { + if (!outputStarted) + { + moveTo(pt); + } + continue; + } + + double alpha = array[0]; + double dx = array[1]; + + double angleCur = Math.atan2(ptNext.y - pt.y, ptNext.x - pt.x); + if (j == 0) + { + Point2D.Double ptPrev = polygon[numPoints - 2]; + anglePrev = Math.atan2(pt.y - ptPrev.y, pt.x - ptPrev.x); + } + + double cos = cosine(ptNext.x - pt.x, length); + double sin = sine(ptNext.y - pt.y, length); + double x = pt.x; + double y = pt.y; + + addCornerCurl(anglePrev, angleCur, cloudRadius, pt.x, pt.y, alpha, + alphaPrev, !outputStarted); + // Proceed to the center point of the first intermediate curl. + double adv = 2 * k * cloudRadius + 2 * dx; + x += adv * cos; + y += adv * sin; + + // Create the first intermediate curl. + int numInterm = n; + if (n >= 1) + { + addFirstIntermediateCurl(angleCur, cloudRadius, alpha, x, y); + x += advIntermDefault * cos; + y += advIntermDefault * sin; + numInterm = n - 1; + } + + // Create one intermediate curl and replicate it along the polygon segment. + Point2D.Double[] template = getIntermediateCurlTemplate(angleCur, cloudRadius); + for (int i = 0; i < numInterm; i++) + { + outputCurlTemplate(template, x, y); + x += advIntermDefault * cos; + y += advIntermDefault * sin; + } + + anglePrev = angleCur; + alphaPrev = (n == 0) ? alpha : ANGLE_34_DEG; + } + } + + /** + * Computes parameters for a cloudy polygon: n, alpha, and dx. + */ + private int computeParamsPolygon(double advInterm, double advCorner, double k, + double r, double length, double[] array) + { + if (Double.compare(length, 0.0) == 0) + { + array[0] = ANGLE_34_DEG; + array[1] = 0; + return -1; + } + + // n is the number of intermediate curls in the current polygon segment + int n = (int) Math.ceil((length - 2 * advCorner) / advInterm); + + // Fitting error along polygon segment + double e = length - (2 * advCorner + n * advInterm); + // Fitting error per each adjustable half curl + double dx = e / 2; + + // Convert fitting error to an angle that can be used to control arcs. + double arg = (k * r + dx) / r; + double alpha = (arg < -1.0 || arg > 1.0) ? 0.0 : Math.acos(arg); + + array[0] = alpha; + array[1] = dx; + return n; + } + + /** + * Creates a corner curl for polygons and ellipses. + */ + private void addCornerCurl(double anglePrev, double angleCur, double radius, + double cx, double cy, double alpha, double alphaPrev, boolean addMoveTo) + throws IOException + { + double a = anglePrev + ANGLE_180_DEG + alphaPrev; + double b = anglePrev + ANGLE_180_DEG + alphaPrev - Math.toRadians(22); + getArcSegment(a, b, cx, cy, radius, radius, null, addMoveTo); + + a = b; + b = angleCur - alpha; + getArc(a, b, radius, radius, cx, cy, null, false); + } + + /** + * Generates the first intermediate curl for a cloudy polygon. + */ + private void addFirstIntermediateCurl(double angleCur, double r, double alpha, + double cx, double cy) throws IOException + { + double a = angleCur + ANGLE_180_DEG; + + getArcSegment(a + alpha, a + alpha - ANGLE_30_DEG, cx, cy, r, r, null, false); + getArcSegment(a + alpha - ANGLE_30_DEG, a + ANGLE_90_DEG, cx, cy, r, r, null, false); + getArcSegment(a + ANGLE_90_DEG, a + ANGLE_180_DEG - ANGLE_34_DEG, + cx, cy, r, r, null, false); + } + + /** + * Returns a template for intermediate curls in a cloudy polygon. + */ + private Point2D.Double[] getIntermediateCurlTemplate(double angleCur, double r) + throws IOException + { + ArrayList points = new ArrayList(); + double a = angleCur + ANGLE_180_DEG; + + getArcSegment(a + ANGLE_34_DEG, a + ANGLE_12_DEG, 0, 0, r, r, points, false); + getArcSegment(a + ANGLE_12_DEG, a + ANGLE_90_DEG, 0, 0, r, r, points, false); + getArcSegment(a + ANGLE_90_DEG, a + ANGLE_180_DEG - ANGLE_34_DEG, + 0, 0, r, r, points, false); + + return points.toArray(new Point2D.Double[points.size()]); + } + + /** + * Writes the curl template points to the output and applies translation (x, y). + */ + private void outputCurlTemplate(Point2D.Double[] template, double x, double y) + throws IOException + { + int n = template.length; + int i = 0; + + if ((n % 3) == 1) + { + Point2D.Double a = template[0]; + moveTo(a.x + x, a.y + y); + i++; + } + for (; i + 2 < n; i += 3) + { + Point2D.Double a = template[i]; + Point2D.Double b = template[i + 1]; + Point2D.Double c = template[i + 2]; + curveTo(a.x + x, a.y + y, b.x + x, b.y + y, c.x + x, c.y + y); + } + } + + private PDRectangle applyRectDiff(PDRectangle rd, double min) + { + float rectLeft = annotRect.getLowerLeftX(); + float rectBottom = annotRect.getLowerLeftY(); + float rectRight = annotRect.getUpperRightX(); + float rectTop = annotRect.getUpperRightY(); + + // Normalize + rectLeft = Math.min(rectLeft, rectRight); + rectBottom = Math.min(rectBottom, rectTop); + rectRight = Math.max(rectLeft, rectRight); + rectTop = Math.max(rectBottom, rectTop); + + double rdLeft; + double rdBottom; + double rdRight; + double rdTop; + + if (rd != null) + { + rdLeft = Math.max(rd.getLowerLeftX(), min); + rdBottom = Math.max(rd.getLowerLeftY(), min); + rdRight = Math.max(rd.getUpperRightX(), min); + rdTop = Math.max(rd.getUpperRightY(), min); + } + else + { + rdLeft = min; + rdBottom = min; + rdRight = min; + rdTop = min; + } + + rectLeft += rdLeft; + rectBottom += rdBottom; + rectRight -= rdRight; + rectTop -= rdTop; + + return new PDRectangle(rectLeft, rectBottom, rectRight - rectLeft, rectTop - rectBottom); + } + + private void reversePolygon(Point2D.Double[] points) + { + int len = points.length; + int n = len / 2; + for (int i = 0; i < n; i++) + { + int j = len - i - 1; + Point2D.Double pi = points[i]; + Point2D.Double pj = points[j]; + points[i] = pj; + points[j] = pi; + } + } + + /** + * Makes a polygon whose direction is the same as the positive angle + * direction in the coordinate system. + * The polygon must not intersect itself. + */ + private void getPositivePolygon(Point2D.Double[] points) + { + if (getPolygonDirection(points) < 0) + { + reversePolygon(points); + } + } + + /** + * Returns the direction of the specified polygon. + * A positive value indicates that the polygon's direction is the same as the + * direction of positive angles in the coordinate system. + * A negative value indicates the opposite direction. + * + * The polygon must not intersect itself. A 2-point polygon is not acceptable. + * This is based on the "shoelace formula". + */ + private double getPolygonDirection(Point2D.Double[] points) + { + double a = 0; + int len = points.length; + for (int i = 0; i < len; i++) + { + int j = (i + 1) % len; + a += points[i].x * points[j].y - points[i].y * points[j].x; + } + return a; + } + + /** + * Creates one or more Bézier curves that represent an elliptical arc. + * Angles are in radians. + * The arc will always proceed in the positive angle direction. + * If the argument `out` is null, this writes the results to the instance + * variable `output`. + */ + private void getArc(double startAng, double endAng, double rx, double ry, + double cx, double cy, ArrayList out, boolean addMoveTo) throws IOException + { + final double angleIncr = Math.PI / 2; + double startx = rx * Math.cos(startAng) + cx; + double starty = ry * Math.sin(startAng) + cy; + + double angleTodo = endAng - startAng; + while (angleTodo < 0) + { + angleTodo += 2 * Math.PI; + } + double sweep = angleTodo; + double angleDone = 0; + + if (addMoveTo) + { + if (out != null) + { + out.add(new Point2D.Double(startx, starty)); + } + else + { + moveTo(startx, starty); + } + } + + while (angleTodo > angleIncr) + { + getArcSegment(startAng + angleDone, + startAng + angleDone + angleIncr, cx, cy, rx, ry, out, false); + angleDone += angleIncr; + angleTodo -= angleIncr; + } + + if (angleTodo > 0) + { + getArcSegment(startAng + angleDone, startAng + sweep, cx, cy, rx, ry, out, false); + } + } + + /** + * Creates a single Bézier curve that represents a section of an elliptical + * arc. The sweep angle of the section must not be larger than 90 degrees. + * If argument `out` is null, this writes the results to the instance + * variable `output`. + */ + private void getArcSegment(double startAng, double endAng, double cx, double cy, + double rx, double ry, ArrayList out, boolean addMoveTo) throws IOException + { + // Algorithm is from the FAQ of the news group comp.text.pdf + + double cosA = Math.cos(startAng); + double sinA = Math.sin(startAng); + double cosB = Math.cos(endAng); + double sinB = Math.sin(endAng); + double denom = Math.sin((endAng - startAng) / 2.0); + if (Double.compare(denom, 0.0) == 0) + { + // This can happen only if endAng == startAng. + // The arc sweep angle is zero, so we create no arc at all. + if (addMoveTo) + { + double xs = cx + rx * cosA; + double ys = cy + ry * sinA; + if (out != null) + { + out.add(new Point2D.Double(xs, ys)); + } + else + { + moveTo(xs, ys); + } + } + return; + } + double bcp = 1.333333333 * (1 - Math.cos((endAng - startAng) / 2.0)) / denom; + double p1x = cx + rx * (cosA - bcp * sinA); + double p1y = cy + ry * (sinA + bcp * cosA); + double p2x = cx + rx * (cosB + bcp * sinB); + double p2y = cy + ry * (sinB - bcp * cosB); + double p3x = cx + rx * cosB; + double p3y = cy + ry * sinB; + + if (addMoveTo) + { + double xs = cx + rx * cosA; + double ys = cy + ry * sinA; + if (out != null) + { + out.add(new Point2D.Double(xs, ys)); + } + else + { + moveTo(xs, ys); + } + } + + if (out != null) + { + out.add(new Point2D.Double(p1x, p1y)); + out.add(new Point2D.Double(p2x, p2y)); + out.add(new Point2D.Double(p3x, p3y)); + } + else + { + curveTo(p1x, p1y, p2x, p2y, p3x, p3y); + } + } + + /** + * Flattens an ellipse into a polygon. + */ + private static Point2D.Double[] flattenEllipse(double left, double bottom, + double right, double top) + { + Ellipse2D.Double ellipse = new Ellipse2D.Double(left, bottom, right - left, top - bottom); + final double flatness = 0.50; + PathIterator iterator = ellipse.getPathIterator(null, flatness); + double[] coords = new double[6]; + ArrayList points = new ArrayList(); + + while (!iterator.isDone()) + { + switch (iterator.currentSegment(coords)) + { + case PathIterator.SEG_MOVETO: + case PathIterator.SEG_LINETO: + points.add(new Point2D.Double(coords[0], coords[1])); + break; + // Curve segments are not expected because the path iterator is + // flattened. SEG_CLOSE can be ignored. + default: + break; + } + iterator.next(); + } + + int size = points.size(); + final double closeTestLimit = 0.05; + + if (size >= 2 && points.get(size - 1).distance(points.get(0)) > closeTestLimit) + { + points.add(points.get(points.size() - 1)); + } + return points.toArray(new Point2D.Double[points.size()]); + } + + /** + * Cloudy ellipse implementation. + */ + private void cloudyEllipseImpl(final double leftOrig, final double bottomOrig, + final double rightOrig, final double topOrig) throws IOException + { + if (intensity <= 0.0) + { + drawBasicEllipse(leftOrig, bottomOrig, rightOrig, topOrig); + return; + } + + double left = leftOrig; + double bottom = bottomOrig; + double right = rightOrig; + double top = topOrig; + double width = right - left; + double height = top - bottom; + double cloudRadius = getEllipseCloudRadius(); + + // Omit cloudy border if the ellipse is very small. + final double threshold1 = 0.50 * cloudRadius; + if (width < threshold1 && height < threshold1) + { + drawBasicEllipse(left, bottom, right, top); + return; + } + + // Draw a cloudy rectangle instead of an ellipse when the + // width or height is very small. + final double threshold2 = 5; + if ((width < threshold2 && height > 20) || (width > 20 && height < threshold2)) + { + cloudyRectangleImpl(left, bottom, right, top, true); + return; + } + + // Decrease radii (while center point does not move). This makes the + // "tails" of the curls almost touch the ellipse outline. + double radiusAdj = Math.sin(ANGLE_12_DEG) * cloudRadius - 1.50; + if (width > 2 * radiusAdj) + { + left += radiusAdj; + right -= radiusAdj; + } + else + { + double mid = (left + right) / 2; + left = mid - 0.10; + right = mid + 0.10; + } + if (height > 2 * radiusAdj) + { + top -= radiusAdj; + bottom += radiusAdj; + } + else + { + double mid = (top + bottom) / 2; + top = mid + 0.10; + bottom = mid - 0.10; + } + + // Flatten the ellipse into a polygon. The segment lengths of the flattened + // result don't need to be extremely short because the loop below is able to + // interpolate between polygon points when it computes the center points + // at which each curl is placed. + + Point2D.Double[] flatPolygon = flattenEllipse(left, bottom, right, top); + int numPoints = flatPolygon.length; + if (numPoints < 2) + { + return; + } + + double totLen = 0; + for(int i = 1; i < numPoints; i++){ + totLen += flatPolygon[i - 1].distance(flatPolygon[i]); + } + + final double k = Math.cos(ANGLE_34_DEG); + double curlAdvance = 2 * k * cloudRadius; + int n = (int) Math.ceil(totLen / curlAdvance); + if (n < 2) + { + drawBasicEllipse(leftOrig, bottomOrig, rightOrig, topOrig); + return; + } + + curlAdvance = totLen / n; + cloudRadius = curlAdvance / (2 * k); + + if (cloudRadius < 0.5) + { + cloudRadius = 0.5; + curlAdvance = 2 * k * cloudRadius; + } + else if (cloudRadius < 3.0) + { + // Draw a small circle when the scaled radius becomes very small. + // This happens also if intensity is much smaller than 1. + drawBasicEllipse(leftOrig, bottomOrig, rightOrig, topOrig); + return; + } + + // Construct centerPoints array, in which each point is the center point of a curl. + // The length of each centerPoints segment ideally equals curlAdv but that + // is not true in regions where the ellipse curvature is high. + + int centerPointsLength = n; + Point2D.Double[] centerPoints = new Point2D.Double[centerPointsLength]; + int centerPointsIndex = 0; + double lengthRemain = 0; + final double comparisonToler = lineWidth * 0.10; + + for (int i = 0; i + 1 < numPoints; i++) + { + Point2D.Double p1 = flatPolygon[i]; + Point2D.Double p2 = flatPolygon[i + 1]; + double dx = p2.x - p1.x; + double dy = p2.y - p1.y; + double length = p1.distance(p2); + if (Double.compare(length, 0.0) == 0) + { + continue; + } + double lengthTodo = length + lengthRemain; + if (lengthTodo >= curlAdvance - comparisonToler || i == numPoints - 2) + { + double cos = cosine(dx, length); + double sin = sine(dy, length); + double d = curlAdvance - lengthRemain; + do + { + double x = p1.x + d * cos; + double y = p1.y + d * sin; + if (centerPointsIndex < centerPointsLength) + { + centerPoints[centerPointsIndex++] = new Point2D.Double(x, y); + } + lengthTodo -= curlAdvance; + d += curlAdvance; + } + while (lengthTodo >= curlAdvance - comparisonToler); + + lengthRemain = lengthTodo; + if (lengthRemain < 0) + { + lengthRemain = 0; + } + } + else + { + lengthRemain += length; + } + } + + // Note: centerPoints does not repeat the first point as the last point + // to create a "closing" segment. + + // Place a curl at each point of the centerPoints array. + // In regions where the ellipse curvature is high, the centerPoints segments + // are shorter than the actual distance along the ellipse. Thus we must + // again compute arc adjustments like in cloudy polygons. + + numPoints = centerPointsIndex; + double anglePrev = 0; + double alphaPrev = 0; + + for (int i = 0; i < numPoints; i++) + { + int idxNext = i + 1; + if (i + 1 >= numPoints) + { + idxNext = 0; + } + Point2D.Double pt = centerPoints[i]; + Point2D.Double ptNext = centerPoints[idxNext]; + + if (i == 0) + { + Point2D.Double ptPrev = centerPoints[numPoints - 1]; + anglePrev = Math.atan2(pt.y - ptPrev.y, pt.x - ptPrev.x); + alphaPrev = computeParamsEllipse(ptPrev, pt, cloudRadius, curlAdvance); + } + + double angleCur = Math.atan2(ptNext.y - pt.y, ptNext.x - pt.x); + double alpha = computeParamsEllipse(pt, ptNext, cloudRadius, curlAdvance); + + addCornerCurl(anglePrev, angleCur, cloudRadius, pt.x, pt.y, alpha, + alphaPrev, !outputStarted); + + anglePrev = angleCur; + alphaPrev = alpha; + } + } + + /** + * Computes the alpha parameter for an ellipse curl. + */ + private double computeParamsEllipse(Point2D.Double pt, Point2D.Double ptNext, + double r, double curlAdv) + { + double length = pt.distance(ptNext); + if (Double.compare(length, 0.0) == 0) + { + return ANGLE_34_DEG; + } + + double e = length - curlAdv; + double arg = (curlAdv / 2 + e / 2) / r; + return (arg < -1.0 || arg > 1.0) ? 0.0 : Math.acos(arg); + } + + private Point2D.Double[] removeZeroLengthSegments(Point2D.Double[] polygon) + { + int np = polygon.length; + if (np <= 2) + { + return polygon; + } + + final double toler = 0.50; + int npNew = np; + Point2D.Double ptPrev = polygon[0]; + + // Don't remove the last point if it equals the first point. + for (int i = 1; i < np; i++) + { + Point2D.Double pt = polygon[i]; + if (Math.abs(pt.x - ptPrev.x) < toler && Math.abs(pt.y - ptPrev.y) < toler) + { + polygon[i] = null; + npNew--; + } + ptPrev = pt; + } + + if (npNew == np) + { + return polygon; + } + + Point2D.Double[] polygonNew = new Point2D.Double[npNew]; + int j = 0; + for (int i = 0; i < np; i++) + { + Point2D.Double pt = polygon[i]; + if (pt != null) + { + polygonNew[j++] = pt; + } + } + + return polygonNew; + } + + /** + * Draws an ellipse without a cloudy border effect. + */ + private void drawBasicEllipse(double left, double bottom, double right, double top) + throws IOException + { + double rx = Math.abs(right - left) / 2; + double ry = Math.abs(top - bottom) / 2; + double cx = (left + right) / 2; + double cy = (bottom + top) / 2; + getArc(0, 2 * Math.PI, rx, ry, cx, cy, null, true); + } + + private void beginOutput(double x, double y) throws IOException + { + bboxMinX = x; + bboxMinY = y; + bboxMaxX = x; + bboxMaxY = y; + outputStarted = true; + // Set line join to bevel to avoid spikes + output.setLineJoinStyle(2); + } + + private void updateBBox(double x, double y) + { + bboxMinX = Math.min(bboxMinX, x); + bboxMinY = Math.min(bboxMinY, y); + bboxMaxX = Math.max(bboxMaxX, x); + bboxMaxY = Math.max(bboxMaxY, y); + } + + private void moveTo(Point2D.Double p) throws IOException + { + moveTo(p.x, p.y); + } + + private void moveTo(double x, double y) throws IOException + { + if (outputStarted) + { + updateBBox(x, y); + } + else + { + beginOutput(x, y); + } + + output.moveTo((float)x, (float)y); + } + + private void lineTo(Point2D.Double p) throws IOException + { + lineTo(p.x, p.y); + } + + private void lineTo(double x, double y) throws IOException + { + if (outputStarted) + { + updateBBox(x, y); + } + else + { + beginOutput(x, y); + } + + output.lineTo((float)x, (float)y); + } + + private void curveTo(double ax, double ay, double bx, double by, double cx, double cy) + throws IOException + { + updateBBox(ax, ay); + updateBBox(bx, by); + updateBBox(cx, cy); + output.curveTo((float)ax, (float)ay, (float)bx, (float)by, (float)cx, (float)cy); + } + + private void finish() throws IOException + { + if (outputStarted) + { + output.closePath(); + } + + if (lineWidth > 0) + { + double d = lineWidth / 2; + bboxMinX -= d; + bboxMinY -= d; + bboxMaxX += d; + bboxMaxY += d; + } + } + + private double getEllipseCloudRadius() + { + // Equation deduced from Acrobat Reader's appearance streams. Circle + // annotations have a slightly larger radius than Polygons and Squares. + return 4.75 * intensity + 0.5 * lineWidth; + } + + private double getPolygonCloudRadius() + { + // Equation deduced from Acrobat Reader's appearance streams. + return 4 * intensity + 0.5 * lineWidth; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDAbstractAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDAbstractAppearanceHandler.java new file mode 100644 index 00000000000..ed8ee902832 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDAbstractAppearanceHandler.java @@ -0,0 +1,537 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.awt.geom.AffineTransform; +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLine; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationSquareCircle; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; + +/** + * Generic handler to generate the fields appearance. + * + * Individual handler will provide specific implementations for different field + * types. + * + */ +public abstract class PDAbstractAppearanceHandler implements PDAppearanceHandler +{ + private final PDAnnotation annotation; + protected PDDocument document; + + /** + * Line ending styles where the line has to be drawn shorter (minus line width). + */ + protected static final Set SHORT_STYLES = createShortStyles(); + + static final double ARROW_ANGLE = Math.toRadians(30); + + /** + * Line ending styles where there is an interior color. + */ + protected static final Set INTERIOR_COLOR_STYLES = createInteriorColorStyles(); + + /** + * Line ending styles where the shape changes its angle, e.g. arrows. + */ + protected static final Set ANGLED_STYLES = createAngledStyles(); + + public PDAbstractAppearanceHandler(PDAnnotation annotation) + { + this(annotation, null); + } + + public PDAbstractAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + this.annotation = annotation; + this.document = document; + } + + PDAnnotation getAnnotation() + { + return annotation; + } + + PDColor getColor() + { + return annotation.getColor(); + } + + PDRectangle getRectangle() + { + return annotation.getRectangle(); + } + + protected COSStream createCOSStream() + { + return document == null ? new COSStream() : document.getDocument().createCOSStream(); + } + + /** + * Get the annotations appearance dictionary. + * + *

+ * This will get the annotations appearance dictionary. If this is not + * existent an empty appearance dictionary will be created. + * + * @return the annotations appearance dictionary + */ + PDAppearanceDictionary getAppearance() + { + PDAppearanceDictionary appearanceDictionary = annotation.getAppearance(); + if (appearanceDictionary == null) + { + appearanceDictionary = new PDAppearanceDictionary(); + annotation.setAppearance(appearanceDictionary); + } + return appearanceDictionary; + } + + /** + * Get the annotations normal appearance content stream. + * + *

+ * This will get the annotations normal appearance content stream, to 'draw' to. It will be + * uncompressed. + * + * @return the appearance entry representing the normal appearance. + * @throws IOException + */ + PDAppearanceContentStream getNormalAppearanceAsContentStream() throws IOException + { + return getNormalAppearanceAsContentStream(false); + } + + /** + * Get the annotations normal appearance content stream. + * + *

+ * This will get the annotations normal appearance content stream, to 'draw' to. + * + * @param compress whether the content stream is to be compressed. Set this to true when + * creating long content streams. + * @return the appearance entry representing the normal appearance. + * @throws IOException + */ + PDAppearanceContentStream getNormalAppearanceAsContentStream(boolean compress) throws IOException + { + PDAppearanceEntry appearanceEntry = getNormalAppearance(); + return getAppearanceEntryAsContentStream(appearanceEntry, compress); + } + + /** + * Get the annotations down appearance. + * + *

+ * This will get the annotations down appearance. If this is not existent an + * empty appearance entry will be created. + * + * @return the appearance entry representing the down appearance. + */ + PDAppearanceEntry getDownAppearance() + { + PDAppearanceDictionary appearanceDictionary = getAppearance(); + PDAppearanceEntry downAppearanceEntry = appearanceDictionary.getDownAppearance(); + + if (downAppearanceEntry.isSubDictionary()) + { + downAppearanceEntry = new PDAppearanceEntry(createCOSStream()); + appearanceDictionary.setDownAppearance(downAppearanceEntry); + } + + return downAppearanceEntry; + } + + /** + * Get the annotations rollover appearance. + * + *

+ * This will get the annotations rollover appearance. If this is not + * existent an empty appearance entry will be created. + * + * @return the appearance entry representing the rollover appearance. + */ + PDAppearanceEntry getRolloverAppearance() + { + PDAppearanceDictionary appearanceDictionary = getAppearance(); + PDAppearanceEntry rolloverAppearanceEntry = appearanceDictionary.getRolloverAppearance(); + + if (rolloverAppearanceEntry.isSubDictionary()) + { + rolloverAppearanceEntry = new PDAppearanceEntry(createCOSStream()); + appearanceDictionary.setRolloverAppearance(rolloverAppearanceEntry); + } + + return rolloverAppearanceEntry; + } + + /** + * Get a padded rectangle. + * + *

Creates a new rectangle with padding applied to each side. + * . + * @param rectangle the rectangle. + * @param padding the padding to apply. + * @return the padded rectangle. + */ + PDRectangle getPaddedRectangle(PDRectangle rectangle, float padding) + { + return new PDRectangle(rectangle.getLowerLeftX() + padding, rectangle.getLowerLeftY() + padding, + rectangle.getWidth() - 2 * padding, rectangle.getHeight() - 2 * padding); + } + + /** + * Get a rectangle enlarged by the differences. + * + *

+ * Creates a new rectangle with differences added to each side. If there are no valid + * differences, then the original rectangle is returned. + * + * @param rectangle the rectangle. + * @param differences the differences to apply. + * @return the padded rectangle. + */ + PDRectangle addRectDifferences(PDRectangle rectangle, float[] differences) + { + if (differences == null || differences.length != 4) + { + return rectangle; + } + + return new PDRectangle(rectangle.getLowerLeftX() - differences[0], + rectangle.getLowerLeftY() - differences[1], + rectangle.getWidth() + differences[0] + differences[2], + rectangle.getHeight() + differences[1] + differences[3]); + } + + /** + * Get a rectangle with the differences applied to each side. + * + *

+ * Creates a new rectangle with differences added to each side. If there are no valid + * differences, then the original rectangle is returned. + * + * @param rectangle the rectangle. + * @param differences the differences to apply. + * @return the padded rectangle. + */ + PDRectangle applyRectDifferences(PDRectangle rectangle, float[] differences) + { + if (differences == null || differences.length != 4) + { + return rectangle; + } + return new PDRectangle(rectangle.getLowerLeftX() + differences[0], + rectangle.getLowerLeftY() + differences[1], + rectangle.getWidth() - differences[0] - differences[2], + rectangle.getHeight() - differences[1] - differences[3]); + } + + void setOpacity(PDAppearanceContentStream contentStream, float opacity) throws IOException + { + if (opacity < 1) + { + PDExtendedGraphicsState gs = new PDExtendedGraphicsState(); + gs.setStrokingAlphaConstant(opacity); + gs.setNonStrokingAlphaConstant(opacity); + + contentStream.setGraphicsStateParameters(gs); + } + } + + /** + * Draw a line ending style. + * + * @param style + * @param cs + * @param x + * @param y + * @param width + * @param hasStroke + * @param hasBackground + * @param ending false if left, true if right of an imagined horizontal line (important for + * arrows). + * + * @throws IOException + */ + void drawStyle(String style, final PDAppearanceContentStream cs, float x, float y, + float width, boolean hasStroke, boolean hasBackground, boolean ending) throws IOException + { + int sign = ending ? -1 : 1; + + if (PDAnnotationLine.LE_OPEN_ARROW.equals(style) || PDAnnotationLine.LE_CLOSED_ARROW.equals(style)) + { + drawArrow(cs, x + sign * width, y, sign * width * 9); + } + else if (PDAnnotationLine.LE_BUTT.equals(style)) + { + cs.moveTo(x, y - width * 3); + cs.lineTo(x, y + width * 3); + } + else if (PDAnnotationLine.LE_DIAMOND.equals(style)) + { + drawDiamond(cs, x, y, width * 3); + } + else if (PDAnnotationLine.LE_SQUARE.equals(style)) + { + cs.addRect(x - width * 3, y - width * 3, width * 6, width * 6); + } + else if (PDAnnotationLine.LE_CIRCLE.equals(style)) + { + drawCircle(cs, x, y, width * 3); + } + else if (PDAnnotationLine.LE_R_OPEN_ARROW.equals(style) || PDAnnotationLine.LE_R_CLOSED_ARROW.equals(style)) + { + drawArrow(cs, x + (0 - sign) * width, y, (0 - sign) * width * 9); + } + else if (PDAnnotationLine.LE_SLASH.equals(style)) + { + // the line is 18 x linewidth at an angle of 60° + cs.moveTo(x + (float) (Math.cos(Math.toRadians(60)) * width * 9), + y + (float) (Math.sin(Math.toRadians(60)) * width * 9)); + cs.lineTo(x + (float) (Math.cos(Math.toRadians(240)) * width * 9), + y + (float) (Math.sin(Math.toRadians(240)) * width * 9)); + } + + + + if (PDAnnotationLine.LE_R_CLOSED_ARROW.equals(style) || + PDAnnotationLine.LE_CLOSED_ARROW.equals(style)) + { + cs.closePath(); + } + cs.drawShape(width, hasStroke, + // make sure to only paint a background color (/IC value) + // for interior color styles, even if an /IC value is set. + INTERIOR_COLOR_STYLES.contains(style) ? hasBackground : false); + } + + /** + * Add the two arms of a horizontal arrow. + * + * @param cs Content stream + * @param x + * @param y + * @param len The arm length. Positive goes to the right, negative goes to the left. + * + * @throws IOException If the content stream could not be written + */ + void drawArrow(PDAppearanceContentStream cs, float x, float y, float len) throws IOException + { + // strategy for arrows: angle 30°, arrow arm length = 9 * line width + // cos(angle) = x position + // sin(angle) = y position + // this comes very close to what Adobe is doing + cs.moveTo(x + (float) (Math.cos(ARROW_ANGLE) * len), y + (float) (Math.sin(ARROW_ANGLE) * len)); + cs.lineTo(x, y); + cs.lineTo(x + (float) (Math.cos(ARROW_ANGLE) * len), y - (float) (Math.sin(ARROW_ANGLE) * len)); + } + + /** + * Add a square diamond shape (corner on top) to the path. + * + * @param cs Content stream + * @param x + * @param y + * @param r Radius (to a corner) + * + * @throws IOException If the content stream could not be written + */ + void drawDiamond(PDAppearanceContentStream cs, float x, float y, float r) throws IOException + { + cs.moveTo(x - r, y); + cs.lineTo(x, y + r); + cs.lineTo(x + r, y); + cs.lineTo(x, y - r); + cs.closePath(); + } + + /** + * Add a circle shape to the path in clockwise direction. + * + * @param cs Content stream + * @param x + * @param y + * @param r Radius + * + * @throws IOException If the content stream could not be written. + */ + void drawCircle(PDAppearanceContentStream cs, float x, float y, float r) throws IOException + { + // http://stackoverflow.com/a/2007782/535646 + float magic = r * 0.551784f; + cs.moveTo(x, y + r); + cs.curveTo(x + magic, y + r, x + r, y + magic, x + r, y); + cs.curveTo(x + r, y - magic, x + magic, y - r, x, y - r); + cs.curveTo(x - magic, y - r, x - r, y - magic, x - r, y); + cs.curveTo(x - r, y + magic, x - magic, y + r, x, y + r); + cs.closePath(); + } + + /** + * Add a circle shape to the path in counterclockwise direction. You'll need this e.g. when + * drawing a doughnut shape. See "Nonzero Winding Number Rule" for more information. + * + * @param cs Content stream + * @param x + * @param y + * @param r Radius + * + * @throws IOException If the content stream could not be written. + */ + void drawCircle2(PDAppearanceContentStream cs, float x, float y, float r) throws IOException + { + // http://stackoverflow.com/a/2007782/535646 + float magic = r * 0.551784f; + cs.moveTo(x, y + r); + cs.curveTo(x - magic, y + r, x - r, y + magic, x - r, y); + cs.curveTo(x - r, y - magic, x - magic, y - r, x, y - r); + cs.curveTo(x + magic, y - r, x + r, y - magic, x + r, y); + cs.curveTo(x + r, y + magic, x + magic, y + r, x, y + r); + cs.closePath(); + } + + private static Set createShortStyles() + { + Set shortStyles = new HashSet(); + shortStyles.add(PDAnnotationLine.LE_OPEN_ARROW); + shortStyles.add(PDAnnotationLine.LE_CLOSED_ARROW); + shortStyles.add(PDAnnotationLine.LE_SQUARE); + shortStyles.add(PDAnnotationLine.LE_CIRCLE); + shortStyles.add(PDAnnotationLine.LE_DIAMOND); + return Collections.unmodifiableSet(shortStyles); + } + + private static Set createInteriorColorStyles() + { + Set interiorColorStyles = new HashSet(); + interiorColorStyles.add(PDAnnotationLine.LE_CLOSED_ARROW); + interiorColorStyles.add(PDAnnotationLine.LE_CIRCLE); + interiorColorStyles.add(PDAnnotationLine.LE_DIAMOND); + interiorColorStyles.add(PDAnnotationLine.LE_R_CLOSED_ARROW); + interiorColorStyles.add(PDAnnotationLine.LE_SQUARE); + return Collections.unmodifiableSet(interiorColorStyles); + } + + private static Set createAngledStyles() + { + Set angledStyles = new HashSet(); + angledStyles.add(PDAnnotationLine.LE_CLOSED_ARROW); + angledStyles.add(PDAnnotationLine.LE_OPEN_ARROW); + angledStyles.add(PDAnnotationLine.LE_R_CLOSED_ARROW); + angledStyles.add(PDAnnotationLine.LE_R_OPEN_ARROW); + angledStyles.add(PDAnnotationLine.LE_BUTT); + angledStyles.add(PDAnnotationLine.LE_SLASH); + return Collections.unmodifiableSet(angledStyles); + } + + /** + * Get the annotations normal appearance. + * + *

+ * This will get the annotations normal appearance. If this is not existent + * an empty appearance entry will be created. + * + * @return the appearance entry representing the normal appearance. + */ + private PDAppearanceEntry getNormalAppearance() + { + PDAppearanceDictionary appearanceDictionary = getAppearance(); + PDAppearanceEntry normalAppearanceEntry = appearanceDictionary.getNormalAppearance(); + + if (normalAppearanceEntry == null || normalAppearanceEntry.isSubDictionary()) + { + normalAppearanceEntry = new PDAppearanceEntry(createCOSStream()); + appearanceDictionary.setNormalAppearance(normalAppearanceEntry); + } + + return normalAppearanceEntry; + } + + + private PDAppearanceContentStream getAppearanceEntryAsContentStream( + PDAppearanceEntry appearanceEntry, boolean compress) throws IOException + { + PDAppearanceStream appearanceStream = appearanceEntry.getAppearanceStream(); + setTransformationMatrix(appearanceStream); + + // ensure there are resources + PDResources resources = appearanceStream.getResources(); + if (resources == null) + { + resources = new PDResources(); + appearanceStream.setResources(resources); + } + + return new PDAppearanceContentStream(appearanceStream, compress); + } + + private void setTransformationMatrix(PDAppearanceStream appearanceStream) + { + PDRectangle bbox = getRectangle(); + appearanceStream.setBBox(bbox); + AffineTransform transform = AffineTransform.getTranslateInstance(-bbox.getLowerLeftX(), + -bbox.getLowerLeftY()); + appearanceStream.setMatrix(transform); + } + + PDRectangle handleBorderBox(PDAnnotationSquareCircle annotation, float lineWidth) + { + // There are two options. The handling is not part of the PDF specification but + // implementation specific to Adobe Reader + // - if /RD is set the border box is the /Rect entry inset by the respective + // border difference. + // - if /RD is not set the border box is defined by the /Rect entry. The /RD entry will + // be set to be the line width and the /Rect is enlarged by the /RD amount + PDRectangle borderBox; + float[] rectDifferences = annotation.getRectDifferences(); + if (rectDifferences.length == 0) + { + borderBox = getPaddedRectangle(getRectangle(), lineWidth / 2); + // the differences rectangle + annotation.setRectDifferences(lineWidth / 2); + annotation.setRectangle(addRectDifferences(getRectangle(), annotation.getRectDifferences())); + // when the normal appearance stream was generated BBox and Matrix have been set to the + // values of the original /Rect. As the /Rect was changed that needs to be adjusted too. + annotation.getNormalAppearanceStream().setBBox(getRectangle()); + AffineTransform transform = AffineTransform.getTranslateInstance(-getRectangle().getLowerLeftX(), -getRectangle().getLowerLeftY()); + annotation.getNormalAppearanceStream().setMatrix(transform); + } + else + { + borderBox = applyRectDifferences(getRectangle(), rectDifferences); + borderBox = getPaddedRectangle(borderBox, lineWidth / 2); + } + return borderBox; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDAppearanceHandler.java new file mode 100644 index 00000000000..0daca006209 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDAppearanceHandler.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +public interface PDAppearanceHandler +{ + void generateAppearanceStreams(); + + void generateNormalAppearance(); + + void generateRolloverAppearance(); + + void generateDownAppearance(); +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDCaretAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDCaretAppearanceHandler.java new file mode 100644 index 00000000000..4a5a11f0d61 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDCaretAppearanceHandler.java @@ -0,0 +1,129 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.util.Matrix; + +/** + * Handler to generate the caret annotations appearance. + * + * @author Tilman Hausherr + */ +public class PDCaretAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDCaretAppearanceHandler.class); + + public PDCaretAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDCaretAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationMarkup annotation = (PDAnnotationMarkup) getAnnotation(); + PDAppearanceContentStream contentStream = null; + + try + { + contentStream = getNormalAppearanceAsContentStream(); + + contentStream.setStrokingColor(getColor()); + contentStream.setNonStrokingColor(getColor()); + + setOpacity(contentStream, annotation.getConstantOpacity()); + + PDRectangle rect = getRectangle(); + PDRectangle bbox = new PDRectangle(rect.getWidth(), rect.getHeight()); + if (!annotation.getCOSObject().containsKey(COSName.RD)) + { + // Adobe creates the /RD entry with a number that is decided + // by dividing the height by 10, with a maximum result of 5. + // That number is then used to enlarge the bbox and the rectangle and added to the + // translation values in the matrix and also used for the line width + // (not here because it has no effect, see comment near fill() ). + // The curves are based on the original rectangle. + float rd = Math.min(rect.getHeight() / 10, 5); + annotation.setRectDifferences(rd); + bbox = new PDRectangle(-rd, -rd, rect.getWidth() + 2 * rd, rect.getHeight() + 2 * rd); + Matrix matrix = annotation.getNormalAppearanceStream().getMatrix(); + matrix.transformPoint(rd, rd); + annotation.getNormalAppearanceStream().setMatrix(matrix.createAffineTransform()); + PDRectangle rect2 = new PDRectangle(rect.getLowerLeftX() - rd, rect.getLowerLeftY() - rd, + rect.getWidth() + 2 * rd, rect.getHeight() + 2 * rd); + annotation.setRectangle(rect2); + } + annotation.getNormalAppearanceStream().setBBox(bbox); + + float halfX = rect.getWidth() / 2; + float halfY = rect.getHeight() / 2; + contentStream.moveTo(0, 0); + contentStream.curveTo(halfX, 0, + halfX, halfY, + halfX, rect.getHeight()); + contentStream.curveTo(halfX, halfY, + halfX, 0, + rect.getWidth(), 0); + contentStream.closePath(); + contentStream.fill(); + // Adobe has an additional stroke, but it has no effect + // because fill "consumes" the path. + } + catch (IOException e) + { + LOG.error(e); + } + finally + { + IOUtils.closeQuietly(contentStream); + } + } + + @Override + public void generateRolloverAppearance() + { + // TODO to be implemented + } + + @Override + public void generateDownAppearance() + { + // TODO to be implemented + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDCircleAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDCircleAppearanceHandler.java new file mode 100644 index 00000000000..9439b5133f6 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDCircleAppearanceHandler.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationSquareCircle; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderEffectDictionary; + +/** + * Handler to generate the circle annotations appearance. + * + */ +public class PDCircleAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDCircleAppearanceHandler.class); + + public PDCircleAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDCircleAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + float lineWidth = getLineWidth(); + PDAnnotationSquareCircle annotation = (PDAnnotationSquareCircle) getAnnotation(); + PDAppearanceContentStream contentStream = null; + + try + { + contentStream = getNormalAppearanceAsContentStream(); + boolean hasStroke = contentStream.setStrokingColorOnDemand(getColor()); + boolean hasBackground = contentStream + .setNonStrokingColorOnDemand(annotation.getInteriorColor()); + + setOpacity(contentStream, annotation.getConstantOpacity()); + + contentStream.setBorderLine(lineWidth, annotation.getBorderStyle(), annotation.getBorder()); + PDBorderEffectDictionary borderEffect = annotation.getBorderEffect(); + + if (borderEffect != null && borderEffect.getStyle().equals(PDBorderEffectDictionary.STYLE_CLOUDY)) + { + CloudyBorder cloudyBorder = new CloudyBorder(contentStream, + borderEffect.getIntensity(), lineWidth, getRectangle()); + cloudyBorder.createCloudyEllipse(annotation.getRectDifference()); + annotation.setRectangle(cloudyBorder.getRectangle()); + annotation.setRectDifference(cloudyBorder.getRectDifference()); + PDAppearanceStream appearanceStream = annotation.getNormalAppearanceStream(); + appearanceStream.setBBox(cloudyBorder.getBBox()); + appearanceStream.setMatrix(cloudyBorder.getMatrix()); + } + else + { + // Acrobat applies a padding to each side of the bbox so the line is completely within + // the bbox. + + PDRectangle borderBox = handleBorderBox(annotation, lineWidth); + + // lower left corner + float x0 = borderBox.getLowerLeftX(); + float y0 = borderBox.getLowerLeftY(); + // upper right corner + float x1 = borderBox.getUpperRightX(); + float y1 = borderBox.getUpperRightY(); + // mid points + float xm = x0 + borderBox.getWidth() / 2; + float ym = y0 + borderBox.getHeight() / 2; + // see http://spencermortensen.com/articles/bezier-circle/ + // the below number was calculated from sampling content streams + // generated using Adobe Reader + float magic = 0.55555417f; + // control point offsets + float vOffset = borderBox.getHeight() / 2 * magic; + float hOffset = borderBox.getWidth() / 2 * magic; + + contentStream.moveTo(xm, y1); + contentStream.curveTo((xm + hOffset), y1, x1, (ym + vOffset), x1, ym); + contentStream.curveTo(x1, (ym - vOffset), (xm + hOffset), y0, xm, y0); + contentStream.curveTo((xm - hOffset), y0, x0, (ym - vOffset), x0, ym); + contentStream.curveTo(x0, (ym + vOffset), (xm - hOffset), y1, xm, y1); + contentStream.closePath(); + } + + contentStream.drawShape(lineWidth, hasStroke, hasBackground); + } + catch (IOException e) + { + LOG.error(e); + } + finally{ + IOUtils.closeQuietly(contentStream); + } + } + + @Override + public void generateRolloverAppearance() + { + // TODO to be implemented + } + + @Override + public void generateDownAppearance() + { + // TODO to be implemented + } + + /** + * Get the line with of the border. + * + * Get the width of the line used to draw a border around the annotation. + * This may either be specified by the annotation dictionaries Border + * setting or by the W entry in the BS border style dictionary. If both are + * missing the default width is 1. + * + * @return the line width + */ + // TODO: according to the PDF spec the use of the BS entry is annotation + // specific + // so we will leave that to be implemented by individual handlers. + // If at the end all annotations support the BS entry this can be handled + // here and removed from the individual handlers. + float getLineWidth() + { + PDAnnotationMarkup annotation = (PDAnnotationMarkup) getAnnotation(); + + PDBorderStyleDictionary bs = annotation.getBorderStyle(); + + if (bs != null) + { + return bs.getWidth(); + } + + COSArray borderCharacteristics = annotation.getBorder(); + if (borderCharacteristics.size() >= 3) + { + COSBase base = borderCharacteristics.getObject(2); + if (base instanceof COSNumber) + { + return ((COSNumber) base).floatValue(); + } + } + + return 1; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDFreeTextAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDFreeTextAppearanceHandler.java new file mode 100644 index 00000000000..bf4f42a744e --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDFreeTextAppearanceHandler.java @@ -0,0 +1,508 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.util.Charsets; +import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdfparser.PDFStreamParser; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; +import static org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLine.LE_NONE; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderEffectDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.layout.AppearanceStyle; +import org.apache.pdfbox.pdmodel.interactive.annotation.layout.PlainText; +import org.apache.pdfbox.pdmodel.interactive.annotation.layout.PlainTextFormatter; +import org.apache.pdfbox.util.Matrix; + +public class PDFreeTextAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDFreeTextAppearanceHandler.class); + + private static final Pattern COLOR_PATTERN = + Pattern.compile(".*color\\:\\s*\\#([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]).*"); + + private float fontSize = 10; + private COSName fontName = COSName.HELV; + + public PDFreeTextAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDFreeTextAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationMarkup annotation = (PDAnnotationMarkup) getAnnotation(); + float[] pathsArray; + if (PDAnnotationMarkup.IT_FREE_TEXT_CALLOUT.equals(annotation.getIntent())) + { + pathsArray = annotation.getCallout(); + if (pathsArray == null || pathsArray.length != 4 && pathsArray.length != 6) + { + pathsArray = new float[0]; + } + } + else + { + pathsArray = new float[0]; + } + AnnotationBorder ab = AnnotationBorder.getAnnotationBorder(annotation, annotation.getBorderStyle()); + + PDAppearanceContentStream cs = null; + + try + { + cs = getNormalAppearanceAsContentStream(true); + + // The fill color is the /C entry, there is no /IC entry defined + boolean hasBackground = cs.setNonStrokingColorOnDemand(annotation.getColor()); + setOpacity(cs, annotation.getConstantOpacity()); + + // Adobe uses the last non stroking color from /DA as stroking color! + // But if there is a color in /DS, then that one is used for text. + PDColor strokingColor = extractNonStrokingColor(annotation); + boolean hasStroke = cs.setStrokingColorOnDemand(strokingColor); + PDColor textColor = strokingColor; + String defaultStyleString = annotation.getDefaultStyleString(); + if (defaultStyleString != null) + { + Matcher m = COLOR_PATTERN.matcher(defaultStyleString); + if (m.find()) + { + int color = Integer.parseInt(m.group(1), 16); + float r = ((color >> 16) & 0xFF) / 255f; + float g = ((color >> 8) & 0xFF) / 255f; + float b = (color & 0xFF) / 255f; + textColor = new PDColor( new float[] { r, g, b }, PDDeviceRGB.INSTANCE); + } + } + + if (ab.dashArray != null) + { + cs.setLineDashPattern(ab.dashArray, 0); + } + cs.setLineWidth(ab.width); + + // draw callout line(s) + // must be done before retangle paint to avoid a line cutting through cloud + // see CTAN-example-Annotations.pdf + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + if (i == 0) + { + if (SHORT_STYLES.contains(annotation.getLineEndingStyle())) + { + // modify coordinate to shorten the segment + // https://stackoverflow.com/questions/7740507/extend-a-line-segment-a-specific-distance + float x1 = pathsArray[2]; + float y1 = pathsArray[3]; + float len = (float) (Math.sqrt(Math.pow(x - x1, 2) + Math.pow(y - y1, 2))); + if (Float.compare(len, 0) != 0) + { + x += (x1 - x) / len * ab.width; + y += (y1 - y) / len * ab.width; + } + } + cs.moveTo(x, y); + } + else + { + cs.lineTo(x, y); + } + } + if (pathsArray.length > 0) + { + cs.stroke(); + } + + // paint the styles here and after line(s) draw, to avoid line crossing a filled shape + if (PDAnnotationMarkup.IT_FREE_TEXT_CALLOUT.equals(annotation.getIntent()) + // check only needed to avoid q cm Q if LE_NONE + && !LE_NONE.equals(annotation.getLineEndingStyle()) + && pathsArray.length >= 4) + { + float x2 = pathsArray[2]; + float y2 = pathsArray[3]; + float x1 = pathsArray[0]; + float y1 = pathsArray[1]; + cs.saveGraphicsState(); + if (ANGLED_STYLES.contains(annotation.getLineEndingStyle())) + { + // do a transform so that first "arm" is imagined flat, + // like in line handler. + // The alternative would be to apply the transform to the + // LE shape coordinates directly, which would be more work + // and produce code difficult to understand + double angle = Math.atan2(y2 - y1, x2 - x1); + cs.transform(Matrix.getRotateInstance(angle, x1, y1)); + } + else + { + cs.transform(Matrix.getTranslateInstance(x1, y1)); + } + drawStyle(annotation.getLineEndingStyle(), cs, 0, 0, ab.width, hasStroke, hasBackground, false); + cs.restoreGraphicsState(); + } + + PDRectangle borderBox; + PDBorderEffectDictionary borderEffect = annotation.getBorderEffect(); + if (borderEffect != null && borderEffect.getStyle().equals(PDBorderEffectDictionary.STYLE_CLOUDY)) + { + // Adobe draws the text with the original rectangle in mind. + // but if there is an /RD, then writing area get smaller. + // do this here because /RD is overwritten in a few lines + borderBox = applyRectDifferences(getRectangle(), annotation.getRectDifferences()); + + //TODO this segment was copied from square handler. Refactor? + CloudyBorder cloudyBorder = new CloudyBorder(cs, + borderEffect.getIntensity(), ab.width, getRectangle()); + cloudyBorder.createCloudyRectangle(annotation.getRectDifference()); + annotation.setRectangle(cloudyBorder.getRectangle()); + annotation.setRectDifference(cloudyBorder.getRectDifference()); + PDAppearanceStream appearanceStream = annotation.getNormalAppearanceStream(); + appearanceStream.setBBox(cloudyBorder.getBBox()); + appearanceStream.setMatrix(cloudyBorder.getMatrix()); + } + else + { + // handle the border box + // + // There are two options. The handling is not part of the PDF specification but + // implementation specific to Adobe Reader + // - if /RD is set the border box is the /Rect entry inset by the respective + // border difference. + // - if /RD is not set then we don't touch /RD etc because Adobe doesn't either. + borderBox = applyRectDifferences(getRectangle(), annotation.getRectDifferences()); + annotation.getNormalAppearanceStream().setBBox(borderBox); + + // note that borderBox is not modified + PDRectangle paddedRectangle = getPaddedRectangle(borderBox, ab.width / 2); + cs.addRect(paddedRectangle.getLowerLeftX(), paddedRectangle.getLowerLeftY(), + paddedRectangle.getWidth(), paddedRectangle.getHeight()); + } + cs.drawShape(ab.width, hasStroke, hasBackground); + + // rotation is an undocumented feature, but Adobe uses it. Examples can be found + // in pdf_commenting_new.pdf file, page 3. + int rotation = annotation.getCOSObject().getInt(COSName.ROTATE, 0); + cs.transform(Matrix.getRotateInstance(Math.toRadians(rotation), 0, 0)); + float xOffset; + float yOffset; + float width = rotation == 90 || rotation == 270 ? borderBox.getHeight() : borderBox.getWidth(); + // strategy to write formatted text is somewhat inspired by + // AppearanceGeneratorHelper.insertGeneratedAppearance() + PDFont font = PDType1Font.HELVETICA; + float clipY; + float clipWidth = width - ab.width * 4; + float clipHeight = rotation == 90 || rotation == 270 ? + borderBox.getWidth() - ab.width * 4 : borderBox.getHeight() - ab.width * 4; + extractFontDetails(annotation); + if (document != null && document.getDocumentCatalog().getAcroForm() != null) + { + // Try to get font from AcroForm default resources + // Sample file: https://gitlab.freedesktop.org/poppler/poppler/issues/6 + PDResources defaultResources = document.getDocumentCatalog().getAcroForm().getDefaultResources(); + if (defaultResources != null) + { + PDFont defaultResourcesFont = defaultResources.getFont(fontName); + if (defaultResourcesFont != null) + { + font = defaultResourcesFont; + } + } + } + + // value used by Adobe, no idea where it comes from, actual font bbox max y is 0.931 + // gathered by creating an annotation with width 0. + float yDelta = 0.7896f; + switch (rotation) + { + case 180: + xOffset = - borderBox.getUpperRightX() + ab.width * 2; + yOffset = - borderBox.getLowerLeftY() - ab.width * 2 - yDelta * fontSize; + clipY = - borderBox.getUpperRightY() + ab.width * 2; + break; + case 90: + xOffset = borderBox.getLowerLeftY() + ab.width * 2; + yOffset = - borderBox.getLowerLeftX() - ab.width * 2 - yDelta * fontSize; + clipY = - borderBox.getUpperRightX() + ab.width * 2; + break; + case 270: + xOffset = - borderBox.getUpperRightY() + ab.width * 2; + yOffset = borderBox.getUpperRightX() - ab.width * 2 - yDelta * fontSize; + clipY = borderBox.getLowerLeftX() + ab.width * 2; + break; + case 0: + default: + xOffset = borderBox.getLowerLeftX() + ab.width * 2; + yOffset = borderBox.getUpperRightY() - ab.width * 2 - yDelta * fontSize; + clipY = borderBox.getLowerLeftY() + ab.width * 2; + break; + } + + // clip writing area + cs.addRect(xOffset, clipY, clipWidth, clipHeight); + cs.clip(); + + if (annotation.getContents() != null) + { + cs.beginText(); + cs.setFont(font, fontSize); + cs.setNonStrokingColor(textColor.getComponents()); + AppearanceStyle appearanceStyle = new AppearanceStyle(); + appearanceStyle.setFont(font); + appearanceStyle.setFontSize(fontSize); + PlainTextFormatter formatter = new PlainTextFormatter.Builder(cs) + .style(appearanceStyle) + .text(new PlainText(annotation.getContents())) + .width(width - ab.width * 4) + .wrapLines(true) + .initialOffset(xOffset, yOffset) + // Adobe ignores the /Q + //.textAlign(annotation.getQ()) + .build(); + try + { + formatter.format(); + } + catch (IllegalArgumentException ex) + { + throw new IOException(ex); + } + finally + { + cs.endText(); + } + } + + if (pathsArray.length > 0) + { + PDRectangle rect = getRectangle(); + + // Adjust rectangle + // important to do this after the rectangle has been painted, because the + // final rectangle will be bigger due to callout + // CTAN-example-Annotations.pdf p1 + //TODO in a class structure this should be overridable + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + // arrow length is 9 * width at about 30° => 10 * width seems to be enough + rect.setLowerLeftX(Math.min(minX - ab.width * 10, rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - ab.width * 10, rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + ab.width * 10, rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + ab.width * 10, rect.getUpperRightY())); + annotation.setRectangle(rect); + + // need to set the BBox too, because rectangle modification came later + annotation.getNormalAppearanceStream().setBBox(getRectangle()); + + //TODO when callout is used, /RD should be so that the result is the writable part + } + } + catch (IOException ex) + { + LOG.error(ex); + } + finally + { + IOUtils.closeQuietly(cs); + } + } + + // get the last non stroking color from the /DA entry + private PDColor extractNonStrokingColor(PDAnnotationMarkup annotation) + { + // It could also work with a regular expression, but that should be written so that + // "/LucidaConsole 13.94766 Tf .392 .585 .93 rg" does not produce "2 .585 .93 rg" as result + // Another alternative might be to create a PDDocument and a PDPage with /DA content as /Content, + // process the whole thing and then get the non stroking color. + + PDColor strokingColor = new PDColor(new float[]{0}, PDDeviceGray.INSTANCE); + String defaultAppearance = annotation.getDefaultAppearance(); + if (defaultAppearance == null) + { + return strokingColor; + } + + try + { + // not sure if charset is correct, but we only need numbers and simple characters + PDFStreamParser parser = new PDFStreamParser(defaultAppearance.getBytes(Charsets.US_ASCII)); + COSArray arguments = new COSArray(); + COSArray colors = null; + Operator graphicOp = null; + for (Object token = parser.parseNextToken(); token != null; token = parser.parseNextToken()) + { + if (token instanceof Operator) + { + Operator op = (Operator) token; + String name = op.getName(); + if (OperatorName.NON_STROKING_GRAY.equals(name) || + OperatorName.NON_STROKING_RGB.equals(name) || + OperatorName.NON_STROKING_CMYK.equals(name)) + { + graphicOp = op; + colors = arguments; + } + arguments = new COSArray(); + } + else + { + arguments.add((COSBase) token); + } + } + if (graphicOp != null) + { + String graphicOpName = graphicOp.getName(); + if (OperatorName.NON_STROKING_GRAY.equals(graphicOpName)) + { + strokingColor = new PDColor(colors, PDDeviceGray.INSTANCE); + } + else if (OperatorName.NON_STROKING_RGB.equals(graphicOpName)) + { + strokingColor = new PDColor(colors, PDDeviceRGB.INSTANCE); + } + else if (OperatorName.NON_STROKING_CMYK.equals(graphicOpName)) + { + strokingColor = new PDColor(colors, PDDeviceCMYK.INSTANCE); + } + } + } + catch (IOException ex) + { + LOG.warn("Problem parsing /DA, will use default black", ex); + } + return strokingColor; + } + + //TODO extractNonStrokingColor and extractFontDetails + // might somehow be replaced with PDDefaultAppearanceString, which is quite similar. + private void extractFontDetails(PDAnnotationMarkup annotation) + { + String defaultAppearance = annotation.getDefaultAppearance(); + if (defaultAppearance == null && document != null && + document.getDocumentCatalog().getAcroForm() != null) + { + defaultAppearance = document.getDocumentCatalog().getAcroForm().getDefaultAppearance(); + } + if (defaultAppearance == null) + { + return; + } + + try + { + // not sure if charset is correct, but we only need numbers and simple characters + PDFStreamParser parser = new PDFStreamParser(defaultAppearance.getBytes(Charsets.US_ASCII)); + COSArray arguments = new COSArray(); + COSArray fontArguments = new COSArray(); + for (Object token = parser.parseNextToken(); token != null; token = parser.parseNextToken()) + { + if (token instanceof Operator) + { + Operator op = (Operator) token; + String name = op.getName(); + if (OperatorName.SET_FONT_AND_SIZE.equals(name)) + { + fontArguments = arguments; + } + arguments = new COSArray(); + } + else + { + arguments.add((COSBase) token); + } + } + if (fontArguments.size() >= 2) + { + COSBase base = fontArguments.get(0); + if (base instanceof COSName) + { + fontName = (COSName) base; + } + base = fontArguments.get(1); + if (base instanceof COSNumber) + { + fontSize = ((COSNumber) base).floatValue(); + } + } + } + catch (IOException ex) + { + LOG.warn("Problem parsing /DA, will use default 'Helv 10'", ex); + } + } + + @Override + public void generateRolloverAppearance() + { + // TODO to be implemented + } + + @Override + public void generateDownAppearance() + { + // TODO to be implemented + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDHighlightAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDHighlightAppearanceHandler.java new file mode 100644 index 00000000000..c083da570ca --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDHighlightAppearanceHandler.java @@ -0,0 +1,257 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDFormContentStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; + +/** + * + * @author Tilman Hausherr + */ +public class PDHighlightAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDHighlightAppearanceHandler.class); + + public PDHighlightAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDHighlightAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationTextMarkup annotation = (PDAnnotationTextMarkup) getAnnotation(); + PDRectangle rect = annotation.getRectangle(); + float[] pathsArray = annotation.getQuadPoints(); + if (pathsArray == null) + { + return; + } + AnnotationBorder ab = AnnotationBorder.getAnnotationBorder(annotation, annotation.getBorderStyle()); + PDColor color = annotation.getColor(); + if (color == null || color.getComponents().length == 0) + { + return; + } + + // Adjust rectangle even if not empty, see PLPDF.com-MarkupAnnotations.pdf + //TODO in a class structure this should be overridable + // this is similar to polyline but different data type + //TODO padding should consider the curves too; needs to know in advance where the curve is + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + + // get the delta used for curves and use it for padding + float maxDelta = 0; + for (int i = 0; i < pathsArray.length / 8; ++i) + { + // one of the two is 0, depending whether the rectangle is + // horizontal or vertical + // if it is diagonal then... uh... + float delta = Math.max((pathsArray[i + 0] - pathsArray[i + 4]) / 4, + (pathsArray[i + 1] - pathsArray[i + 5]) / 4); + maxDelta = Math.max(delta, maxDelta); + } + + rect.setLowerLeftX(Math.min(minX - ab.width / 2 - maxDelta, rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - ab.width / 2 - maxDelta, rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + ab.width + maxDelta, rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + ab.width + maxDelta, rect.getUpperRightY())); + annotation.setRectangle(rect); + + PDAppearanceContentStream cs = null; + + try + { + + cs = getNormalAppearanceAsContentStream(); + + PDExtendedGraphicsState r0 = new PDExtendedGraphicsState(); + PDExtendedGraphicsState r1 = new PDExtendedGraphicsState(); + r0.setAlphaSourceFlag(false); + r0.setStrokingAlphaConstant(annotation.getConstantOpacity()); + r0.setNonStrokingAlphaConstant(annotation.getConstantOpacity()); + r1.setAlphaSourceFlag(false); + r1.setBlendMode(BlendMode.MULTIPLY); + cs.setGraphicsStateParameters(r0); + cs.setGraphicsStateParameters(r1); + PDFormXObject frm1 = new PDFormXObject(createCOSStream()); + PDFormXObject frm2 = new PDFormXObject(createCOSStream()); + frm1.setResources(new PDResources()); + + PDFormContentStream mwfofrmCS =null; + try + { + mwfofrmCS = new PDFormContentStream(frm1); + mwfofrmCS.drawForm(frm2); + } + finally + { + IOUtils.closeQuietly(mwfofrmCS); + } + frm1.setBBox(annotation.getRectangle()); + COSDictionary groupDict = new COSDictionary(); + groupDict.setItem(COSName.S, COSName.TRANSPARENCY); + //TODO PDFormXObject.setGroup() is missing + frm1.getCOSObject().setItem(COSName.GROUP, groupDict); + cs.drawForm(frm1); + frm2.setBBox(annotation.getRectangle()); + + PDFormContentStream frm2CS = null; + + try + { + frm2CS = new PDFormContentStream(frm2); + frm2CS.setNonStrokingColor(color); + int of = 0; + while (of + 7 < pathsArray.length) + { + // quadpoints spec sequence is incorrect, correct one is (4,5 0,1 2,3 6,7) + // https://stackoverflow.com/questions/9855814/pdf-spec-vs-acrobat-creation-quadpoints + + // for "curvy" highlighting, two Bézier control points are used that seem to have a + // distance of about 1/4 of the height. + // note that curves won't appear if outside of the rectangle + float delta = 0; + if (Float.compare(pathsArray[of + 0], pathsArray[of + 4]) == 0 && + Float.compare(pathsArray[of + 1], pathsArray[of + 3]) == 0 && + Float.compare(pathsArray[of + 2], pathsArray[of + 6]) == 0 && + Float.compare(pathsArray[of + 5], pathsArray[of + 7]) == 0) + { + // horizontal highlight + delta = (pathsArray[of + 1] - pathsArray[of + 5]) / 4; + } + else if (Float.compare(pathsArray[of + 1], pathsArray[of + 5]) == 0 && + Float.compare(pathsArray[of + 0], pathsArray[of + 2]) == 0 && + Float.compare(pathsArray[of + 3], pathsArray[of + 7]) == 0 && + Float.compare(pathsArray[of + 4], pathsArray[of + 6]) == 0) + { + // vertical highlight + delta = (pathsArray[of + 0] - pathsArray[of + 4]) / 4; + } + + frm2CS.moveTo(pathsArray[of + 4], pathsArray[of + 5]); + + if (Float.compare(pathsArray[of + 0], pathsArray[of + 4]) == 0) + { + // horizontal highlight + frm2CS.curveTo(pathsArray[of + 4] - delta, pathsArray[of + 5] + delta, + pathsArray[of + 0] - delta, pathsArray[of + 1] - delta, + pathsArray[of + 0], pathsArray[of + 1]); + } + else if (Float.compare(pathsArray[of + 5], pathsArray[of + 1]) == 0) + { + // vertical highlight + frm2CS.curveTo(pathsArray[of + 4] + delta, pathsArray[of + 5] + delta, + pathsArray[of + 0] - delta, pathsArray[of + 1] + delta, + pathsArray[of + 0], pathsArray[of + 1]); + } + else + { + frm2CS.lineTo(pathsArray[of + 0], pathsArray[of + 1]); + } + frm2CS.lineTo(pathsArray[of + 2], pathsArray[of + 3]); + if (Float.compare(pathsArray[of + 2], pathsArray[of + 6]) == 0) + { + // horizontal highlight + frm2CS.curveTo(pathsArray[of + 2] + delta, pathsArray[of + 3] - delta, + pathsArray[of + 6] + delta, pathsArray[of + 7] + delta, + pathsArray[of + 6], pathsArray[of + 7]); + } + else if (Float.compare(pathsArray[of + 3], pathsArray[of + 7]) == 0) + { + // vertical highlight + frm2CS.curveTo(pathsArray[of + 2] - delta, pathsArray[of + 3] - delta, + pathsArray[of + 6] + delta, pathsArray[of + 7] - delta, + pathsArray[of + 6], pathsArray[of + 7]); + } + else + { + frm2CS.lineTo(pathsArray[of + 6], pathsArray[of + 7]); + } + + frm2CS.fill(); + of += 8; + } + } + finally + { + IOUtils.closeQuietly(frm2CS); + } + } + catch (IOException ex) + { + LOG.error(ex); + } + finally + { + IOUtils.closeQuietly(cs); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDInkAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDInkAppearanceHandler.java new file mode 100644 index 00000000000..d40933bcc56 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDInkAppearanceHandler.java @@ -0,0 +1,154 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.common.PDRectangle; + +/** + * Handler to generate the ink annotations appearance. + * + */ +public class PDInkAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDInkAppearanceHandler.class); + + public PDInkAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDInkAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationMarkup ink = (PDAnnotationMarkup) getAnnotation(); + // PDF spec does not mention /Border for ink annotations, but it is used if /BS is not available + AnnotationBorder ab = AnnotationBorder.getAnnotationBorder(ink, ink.getBorderStyle()); + PDColor color = ink.getColor(); + if (color == null || color.getComponents().length == 0 || Float.compare(ab.width, 0) == 0) + { + return; + } + + // Adjust rectangle even if not empty + // file from PDF.js issue 13447 + //TODO in a class structure this should be overridable + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + for (float[] pathArray : ink.getInkList()) + { + int nPoints = pathArray.length / 2; + for (int i = 0; i < nPoints; ++i) + { + float x = pathArray[i * 2]; + float y = pathArray[i * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + } + PDRectangle rect = ink.getRectangle(); + rect.setLowerLeftX(Math.min(minX - ab.width * 2, rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - ab.width * 2, rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + ab.width * 2, rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + ab.width * 2, rect.getUpperRightY())); + ink.setRectangle(rect); + + PDAppearanceContentStream cs = null; + + try + { + cs = getNormalAppearanceAsContentStream(); + + setOpacity(cs, ink.getConstantOpacity()); + + cs.setStrokingColor(color); + if (ab.dashArray != null) + { + cs.setLineDashPattern(ab.dashArray, 0); + } + cs.setLineWidth(ab.width); + + for (float[] pathArray : ink.getInkList()) + { + int nPoints = pathArray.length / 2; + + // "When drawn, the points shall be connected by straight lines or curves + // in an implementation-dependent way" - we do lines. + for (int i = 0; i < nPoints; ++i) + { + float x = pathArray[i * 2]; + float y = pathArray[i * 2 + 1]; + + if (i == 0) + { + cs.moveTo(x, y); + } + else + { + cs.lineTo(x, y); + } + } + cs.stroke(); + } + } + catch (IOException ex) + { + LOG.error(ex); + } + finally + { + IOUtils.closeQuietly(cs); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDLineAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDLineAppearanceHandler.java new file mode 100644 index 00000000000..9373e62b233 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDLineAppearanceHandler.java @@ -0,0 +1,350 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLine; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import static org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLine.LE_NONE; +import org.apache.pdfbox.util.Matrix; + +/** + * + */ +public class PDLineAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDLineAppearanceHandler.class); + + static final int FONT_SIZE = 9; + + public PDLineAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDLineAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationLine annotation = (PDAnnotationLine) getAnnotation(); + PDRectangle rect = annotation.getRectangle(); + float[] pathsArray = annotation.getLine(); + if (pathsArray == null) + { + return; + } + AnnotationBorder ab = AnnotationBorder.getAnnotationBorder(annotation, annotation.getBorderStyle()); + PDColor color = annotation.getColor(); + if (color == null || color.getComponents().length == 0) + { + return; + } + float ll = annotation.getLeaderLineLength(); + float lle = annotation.getLeaderLineExtensionLength(); + float llo = annotation.getLeaderLineOffsetLength(); + + // Adjust rectangle even if not empty, see PLPDF.com-MarkupAnnotations.pdf + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + + // Leader lines + if (ll < 0) + { + // /LLO and /LLE go in the same direction as /LL + llo = -llo; + lle = -lle; + } + + // observed with diagonal line of AnnotationSample.Standard.pdf + // for line endings, very small widths must be treated as size 1. + // However the border of the line ending shapes is not drawn. + float lineEndingSize = (ab.width < 1e-5) ? 1 : ab.width; + + // add/subtract with, font height, and arrows + // arrow length is 9 * width at about 30° => 10 * width seems to be enough + // but need to consider /LL, /LLE and /LLO too + //TODO find better way to calculate padding + rect.setLowerLeftX(Math.min(minX - Math.max(lineEndingSize * 10, Math.abs(llo+ll+lle)), rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - Math.max(lineEndingSize * 10, Math.abs(llo+ll+lle)), rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + Math.max(lineEndingSize * 10, Math.abs(llo+ll+lle)), rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + Math.max(lineEndingSize * 10, Math.abs(llo+ll+lle)), rect.getUpperRightY())); + + annotation.setRectangle(rect); + + PDAppearanceContentStream cs = null; + + try + { + cs = getNormalAppearanceAsContentStream(); + + setOpacity(cs, annotation.getConstantOpacity()); + + // Tested with Adobe Reader: + // text is written first (TODO) + // width 0 is used by Adobe as such (but results in a visible line in rendering) + // empty color array results in an invisible line ("n" operator) but the rest is visible + // empty content is like no caption + + boolean hasStroke = cs.setStrokingColorOnDemand(color); + + if (ab.dashArray != null) + { + cs.setLineDashPattern(ab.dashArray, 0); + } + cs.setLineWidth(ab.width); + + float x1 = pathsArray[0]; + float y1 = pathsArray[1]; + float x2 = pathsArray[2]; + float y2 = pathsArray[3]; + + // if there are leader lines, then the /L coordinates represent + // the endpoints of the leader lines rather than the endpoints of the line itself. + // so for us, llo + ll is the vertical offset for the line. + float y = llo + ll; + + String contents = annotation.getContents(); + if (contents == null) + { + contents = ""; + } + + cs.saveGraphicsState(); + double angle = Math.atan2(y2 - y1, x2 - x1); + cs.transform(Matrix.getRotateInstance(angle, x1, y1)); + float lineLength = (float) Math.sqrt(((x2 - x1) * (x2 - x1)) + ((y2 - y1) * (y2 - y1))); + + // Leader lines + cs.moveTo(0, llo); + cs.lineTo(0, llo + ll + lle); + cs.moveTo(lineLength, llo); + cs.lineTo(lineLength, llo + ll + lle); + + if (annotation.getCaption() && !contents.isEmpty()) + { + // Note that Adobe places the text as a caption even if /CP is not set + // when the text is so long that it would cross arrows, but we ignore this for now + // and stick to the specification. + + PDType1Font font = PDType1Font.HELVETICA; + // TODO: support newlines!!!!! + // see https://www.pdfill.com/example/pdf_commenting_new.pdf + float contentLength = 0; + try + { + contentLength = font.getStringWidth(annotation.getContents()) / 1000 * FONT_SIZE; + + //TODO How to decide the size of the font? + // 9 seems to be standard, but if the text doesn't fit, a scaling is done + // see AnnotationSample.Standard.pdf, diagonal line + } + catch (IllegalArgumentException ex) + { + // Adobe Reader displays placeholders instead + LOG.error("line text '" + annotation.getContents() + "' can't be shown", ex); + } + float xOffset = (lineLength - contentLength) / 2; + float yOffset; + + String captionPositioning = annotation.getCaptionPositioning(); + + // draw the line horizontally, using the rotation CTM to get to correct final position + // that's the easiest way to calculate the positions for the line before and after inline caption + if (SHORT_STYLES.contains(annotation.getStartPointEndingStyle())) + { + cs.moveTo(lineEndingSize, y); + } + else + { + cs.moveTo(0, y); + } + if ("Top".equals(captionPositioning)) + { + // this arbitrary number is from Adobe + yOffset = 1.908f; + } + else + { + // Inline + // this arbitrary number is from Adobe + yOffset = -2.6f; + + cs.lineTo(xOffset - lineEndingSize, y); + cs.moveTo(lineLength - xOffset + lineEndingSize, y); + } + if (SHORT_STYLES.contains(annotation.getEndPointEndingStyle())) + { + cs.lineTo(lineLength - lineEndingSize, y); + } + else + { + cs.lineTo(lineLength, y); + } + cs.drawShape(lineEndingSize, hasStroke, false); + + // /CO entry (caption offset) + float captionHorizontalOffset = annotation.getCaptionHorizontalOffset(); + float captionVerticalOffset = annotation.getCaptionVerticalOffset(); + + // check contentLength so we don't show if there was trouble before + if (contentLength > 0) + { + cs.beginText(); + cs.setFont(font, FONT_SIZE); + cs.newLineAtOffset(xOffset + captionHorizontalOffset, + y + yOffset + captionVerticalOffset); + cs.showText(annotation.getContents()); + cs.endText(); + } + + if (Float.compare(captionVerticalOffset, 0) != 0) + { + // Adobe paints vertical bar to the caption + cs.moveTo(0 + lineLength / 2, y); + cs.lineTo(0 + lineLength / 2, y + captionVerticalOffset); + cs.drawShape(lineEndingSize, hasStroke, false); + } + } + else + { + if (SHORT_STYLES.contains(annotation.getStartPointEndingStyle())) + { + cs.moveTo(lineEndingSize, y); + } + else + { + cs.moveTo(0, y); + } + if (SHORT_STYLES.contains(annotation.getEndPointEndingStyle())) + { + cs.lineTo(lineLength - lineEndingSize, y); + } + else + { + cs.lineTo(lineLength, y); + } + cs.drawShape(lineEndingSize, hasStroke, false); + } + cs.restoreGraphicsState(); + + // paint the styles here and not before showing the text, or the text would appear + // with the interior color + boolean hasBackground = cs.setNonStrokingColorOnDemand(annotation.getInteriorColor()); + + // observed with diagonal line of file AnnotationSample.Standard.pdf + // when width is very small, the border of the line ending shapes + // is not drawn. + if (ab.width < 1e-5) + { + hasStroke = false; + } + + // check for LE_NONE only needed to avoid q cm Q for that case + if (!LE_NONE.equals(annotation.getStartPointEndingStyle())) + { + cs.saveGraphicsState(); + if (ANGLED_STYLES.contains(annotation.getStartPointEndingStyle())) + { + cs.transform(Matrix.getRotateInstance(angle, x1, y1)); + drawStyle(annotation.getStartPointEndingStyle(), cs, 0, y, lineEndingSize, hasStroke, hasBackground, false); + } + else + { + // Support of non-angled styles is more difficult than in the other handlers + // because the lines do not always go from (x1,y1) to (x2,y2) due to the leader lines + // when the "y" value above is not 0. + // We use the angle we already know and the distance y to translate to the new coordinate. + float xx1 = x1 - (float) (y * Math.sin(angle)); + float yy1 = y1 + (float) (y * Math.cos(angle)); + drawStyle(annotation.getStartPointEndingStyle(), cs, xx1, yy1, lineEndingSize, hasStroke, hasBackground, false); + } + cs.restoreGraphicsState(); + } + + // check for LE_NONE only needed to avoid q cm Q for that case + if (!LE_NONE.equals(annotation.getEndPointEndingStyle())) + { + // save / restore not needed because it's the last one + if (ANGLED_STYLES.contains(annotation.getEndPointEndingStyle())) + { + cs.transform(Matrix.getRotateInstance(angle, x2, y2)); + drawStyle(annotation.getEndPointEndingStyle(), cs, 0, y, lineEndingSize, hasStroke, hasBackground, true); + } + else + { + // Support of non-angled styles is more difficult than in the other handlers + // because the lines do not always go from (x1,y1) to (x2,y2) due to the leader lines + // when the "y" value above is not 0. + // We use the angle we already know and the distance y to translate to the new coordinate. + float xx2 = x2 - (float) (y * Math.sin(angle)); + float yy2 = y2 + (float) (y * Math.cos(angle)); + drawStyle(annotation.getEndPointEndingStyle(), cs, xx2, yy2, lineEndingSize, hasStroke, hasBackground, true); + } + } + } + catch (IOException ex) + { + LOG.error(ex); + } + finally{ + IOUtils.closeQuietly(cs); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDLinkAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDLinkAppearanceHandler.java new file mode 100644 index 00000000000..1acbcafea9d --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDLinkAppearanceHandler.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; + +/** + * Handler to generate the link annotations appearance. + * + */ +public class PDLinkAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDLinkAppearanceHandler.class); + + public PDLinkAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDLinkAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationLink annotation = (PDAnnotationLink) getAnnotation(); + if (annotation.getRectangle() == null) + { + // 660402-p1-AnnotationEmptyRect.pdf has /Rect entry with 0 elements + return; + } + + // Adobe doesn't generate an appearance for a link annotation + float lineWidth = getLineWidth(); + + PDAppearanceContentStream contentStream = null; + + try + { + contentStream = getNormalAppearanceAsContentStream(); + + PDColor color = annotation.getColor(); + if (color == null) + { + // spec is unclear, but black is what Adobe does + color = new PDColor(new float[] { 0 }, PDDeviceGray.INSTANCE); + } + boolean hasStroke = contentStream.setStrokingColorOnDemand(color); + + contentStream.setBorderLine(lineWidth, annotation.getBorderStyle(), annotation.getBorder()); + + // Acrobat applies a padding to each side of the bbox so the line is completely within + // the bbox. + PDRectangle borderEdge = getPaddedRectangle(getRectangle(),lineWidth/2); + + float[] pathsArray = annotation.getQuadPoints(); + + if (pathsArray != null) + { + // QuadPoints shall be ignored if any coordinate in the array lies outside + // the region specified by Rect. + PDRectangle rect = annotation.getRectangle(); + for (int i = 0; i < pathsArray.length / 2; ++i) + { + if (!rect.contains(pathsArray[i * 2], pathsArray[i * 2 + 1])) + { + LOG.warn("At least one /QuadPoints entry (" + + pathsArray[i * 2] + ";" + pathsArray[i * 2 + 1] + + ") is outside of rectangle, " + rect + + ", /QuadPoints are ignored and /Rect is used instead"); + pathsArray = null; + break; + } + } + } + + if (pathsArray == null) + { + // Convert rectangle coordinates as if it was a /QuadPoints entry + pathsArray = new float[8]; + pathsArray[0] = borderEdge.getLowerLeftX(); + pathsArray[1] = borderEdge.getLowerLeftY(); + pathsArray[2] = borderEdge.getUpperRightX(); + pathsArray[3] = borderEdge.getLowerLeftY(); + pathsArray[4] = borderEdge.getUpperRightX(); + pathsArray[5] = borderEdge.getUpperRightY(); + pathsArray[6] = borderEdge.getLowerLeftX(); + pathsArray[7] = borderEdge.getUpperRightY(); + } + + int of = 0; + while (of + 7 < pathsArray.length) + { + if (annotation.getBorderStyle() != null && + annotation.getBorderStyle().getStyle().equals(PDBorderStyleDictionary.STYLE_UNDERLINE)) + { + contentStream.moveTo(pathsArray[of], pathsArray[of + 1]); + contentStream.lineTo(pathsArray[of + 2], pathsArray[of + 3]); + } + else + { + contentStream.moveTo(pathsArray[of], pathsArray[of + 1]); + contentStream.lineTo(pathsArray[of + 2], pathsArray[of + 3]); + contentStream.lineTo(pathsArray[of + 4], pathsArray[of + 5]); + contentStream.lineTo(pathsArray[of + 6], pathsArray[of + 7]); + contentStream.closePath(); + } + of += 8; + } + + contentStream.drawShape(lineWidth, hasStroke, false); + } + catch (IOException e) + { + LOG.error(e); + } + finally{ + IOUtils.closeQuietly(contentStream); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated for a link annotation + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated for a link annotation + } + + /** + * Get the line with of the border. + * + * Get the width of the line used to draw a border around the annotation. + * This may either be specified by the annotation dictionaries Border + * setting or by the W entry in the BS border style dictionary. If both are + * missing the default width is 1. + * + * @return the line width + */ + // TODO: according to the PDF spec the use of the BS entry is annotation + // specific + // so we will leave that to be implemented by individual handlers. + // If at the end all annotations support the BS entry this can be handled + // here and removed from the individual handlers. + float getLineWidth() + { + PDAnnotationLink annotation = (PDAnnotationLink) getAnnotation(); + + PDBorderStyleDictionary bs = annotation.getBorderStyle(); + + if (bs != null) + { + return bs.getWidth(); + } + + COSArray borderCharacteristics = annotation.getBorder(); + if (borderCharacteristics.size() >= 3) + { + COSBase base = borderCharacteristics.getObject(2); + if (base instanceof COSNumber) + { + return ((COSNumber) base).floatValue(); + } + } + + return 1; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDPolygonAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDPolygonAppearanceHandler.java new file mode 100644 index 00000000000..2def432c24d --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDPolygonAppearanceHandler.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderEffectDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; + +/** + * Handler to generate the polygon annotations appearance. + * + */ +public class PDPolygonAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDPolygonAppearanceHandler.class); + + public PDPolygonAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDPolygonAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationMarkup annotation = (PDAnnotationMarkup) getAnnotation(); + float lineWidth = getLineWidth(); + PDRectangle rect = annotation.getRectangle(); + + // Adjust rectangle even if not empty + // CTAN-example-Annotations.pdf p2 + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + + float[][] pathArray = getPathArray(annotation); + if (pathArray == null) + { + return; + } + for (int i = 0; i < pathArray.length; ++i) + { + for (int j = 0; j < pathArray[i].length / 2; ++j) + { + float x = pathArray[i][j * 2]; + float y = pathArray[i][j * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + } + + rect.setLowerLeftX(Math.min(minX - lineWidth, rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - lineWidth, rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + lineWidth, rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + lineWidth, rect.getUpperRightY())); + annotation.setRectangle(rect); + + PDAppearanceContentStream contentStream = null; + + try + { + contentStream = getNormalAppearanceAsContentStream(); + + boolean hasStroke = contentStream.setStrokingColorOnDemand(getColor()); + + boolean hasBackground = contentStream + .setNonStrokingColorOnDemand(annotation.getInteriorColor()); + + setOpacity(contentStream, annotation.getConstantOpacity()); + + contentStream.setBorderLine(lineWidth, annotation.getBorderStyle(), annotation.getBorder()); + + PDBorderEffectDictionary borderEffect = annotation.getBorderEffect(); + if (borderEffect != null && borderEffect.getStyle().equals(PDBorderEffectDictionary.STYLE_CLOUDY)) + { + CloudyBorder cloudyBorder = new CloudyBorder(contentStream, + borderEffect.getIntensity(), lineWidth, getRectangle()); + cloudyBorder.createCloudyPolygon(pathArray); + annotation.setRectangle(cloudyBorder.getRectangle()); + PDAppearanceStream appearanceStream = annotation.getNormalAppearanceStream(); + appearanceStream.setBBox(cloudyBorder.getBBox()); + appearanceStream.setMatrix(cloudyBorder.getMatrix()); + } + else + { + // Acrobat applies a padding to each side of the bbox so the line is + // completely within the bbox. + + for (int i = 0; i < pathArray.length; i++) + { + float[] pointsArray = pathArray[i]; + // first array shall be of size 2 and specify the moveto operator + if (i == 0 && pointsArray.length == 2) + { + contentStream.moveTo(pointsArray[0], pointsArray[1]); + } + else + { + // entries of length 2 shall be treated as lineto operator + if (pointsArray.length == 2) + { + contentStream.lineTo(pointsArray[0], pointsArray[1]); + } + else if (pointsArray.length == 6) + { + contentStream.curveTo(pointsArray[0], pointsArray[1], + pointsArray[2], pointsArray[3], + pointsArray[4], pointsArray[5]); + } + } + } + contentStream.closePath(); + } + contentStream.drawShape(lineWidth, hasStroke, hasBackground); + } + catch (IOException e) + { + LOG.error(e); + } + finally + { + IOUtils.closeQuietly(contentStream); + } + } + + private float[][] getPathArray(PDAnnotationMarkup annotation) + { + // PDF 2.0: Path takes priority over Vertices + float[][] pathArray = annotation.getPath(); + if (pathArray == null) + { + // convert PDF 1.* array to PDF 2.0 array + float[] verticesArray = annotation.getVertices(); + if (verticesArray == null) + { + return null; + } + int points = verticesArray.length / 2; + pathArray = new float[points][2]; + for (int i = 0; i < points; ++i) + { + pathArray[i][0] = verticesArray[i * 2]; + pathArray[i][1] = verticesArray[i * 2 + 1]; + } + } + return pathArray; + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated for a polygon annotation + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated for a polygon annotation + } + + /** + * Get the line with of the border. + * + * Get the width of the line used to draw a border around the annotation. + * This may either be specified by the annotation dictionaries Border + * setting or by the W entry in the BS border style dictionary. If both are + * missing the default width is 1. + * + * @return the line width + */ + // TODO: according to the PDF spec the use of the BS entry is annotation + // specific + // so we will leave that to be implemented by individual handlers. + // If at the end all annotations support the BS entry this can be handled + // here and removed from the individual handlers. + float getLineWidth() + { + PDAnnotationMarkup annotation = (PDAnnotationMarkup) getAnnotation(); + + PDBorderStyleDictionary bs = annotation.getBorderStyle(); + + if (bs != null) + { + return bs.getWidth(); + } + + COSArray borderCharacteristics = annotation.getBorder(); + if (borderCharacteristics.size() >= 3) + { + COSBase base = borderCharacteristics.getObject(2); + if (base instanceof COSNumber) + { + return ((COSNumber) base).floatValue(); + } + } + + return 1; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDPolylineAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDPolylineAppearanceHandler.java new file mode 100644 index 00000000000..208357761b7 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDPolylineAppearanceHandler.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import static org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLine.LE_NONE; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; +import org.apache.pdfbox.util.Matrix; + +/** + * Handler to generate the polyline annotations appearance. + * + */ +public class PDPolylineAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDPolylineAppearanceHandler.class); + + public PDPolylineAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDPolylineAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationMarkup annotation = (PDAnnotationMarkup) getAnnotation(); + PDRectangle rect = annotation.getRectangle(); + float[] pathsArray = annotation.getVertices(); + if (pathsArray == null || pathsArray.length < 4) + { + return; + } + AnnotationBorder ab = AnnotationBorder.getAnnotationBorder(annotation, annotation.getBorderStyle()); + PDColor color = annotation.getColor(); + if (color == null || color.getComponents().length == 0 || Float.compare(ab.width, 0) == 0) + { + return; + } + + // Adjust rectangle even if not empty + // CTAN-example-Annotations.pdf and pdf_commenting_new.pdf p11 + //TODO in a class structure this should be overridable + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + // arrow length is 9 * width at about 30° => 10 * width seems to be enough + rect.setLowerLeftX(Math.min(minX - ab.width * 10, rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - ab.width * 10, rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + ab.width * 10, rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + ab.width * 10, rect.getUpperRightY())); + annotation.setRectangle(rect); + + PDAppearanceContentStream cs = null; + + try + { + cs = getNormalAppearanceAsContentStream(); + + boolean hasBackground = cs.setNonStrokingColorOnDemand(annotation.getInteriorColor()); + setOpacity(cs, annotation.getConstantOpacity()); + boolean hasStroke = cs.setStrokingColorOnDemand(color); + + if (ab.dashArray != null) + { + cs.setLineDashPattern(ab.dashArray, 0); + } + cs.setLineWidth(ab.width); + + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + if (i == 0) + { + if (SHORT_STYLES.contains(annotation.getStartPointEndingStyle())) + { + // modify coordinate to shorten the segment + // https://stackoverflow.com/questions/7740507/extend-a-line-segment-a-specific-distance + float x1 = pathsArray[2]; + float y1 = pathsArray[3]; + float len = (float) (Math.sqrt(Math.pow(x - x1, 2) + Math.pow(y - y1, 2))); + if (Float.compare(len, 0) != 0) + { + x += (x1 - x) / len * ab.width; + y += (y1 - y) / len * ab.width; + } + } + cs.moveTo(x, y); + } + else + { + if (i == pathsArray.length / 2 - 1 && + SHORT_STYLES.contains(annotation.getEndPointEndingStyle())) + { + // modify coordinate to shorten the segment + // https://stackoverflow.com/questions/7740507/extend-a-line-segment-a-specific-distance + float x0 = pathsArray[pathsArray.length - 4]; + float y0 = pathsArray[pathsArray.length - 3]; + float len = (float) (Math.sqrt(Math.pow(x0 - x, 2) + Math.pow(y0 - y, 2))); + if (Float.compare(len, 0) != 0) + { + x -= (x - x0) / len * ab.width; + y -= (y - y0) / len * ab.width; + } + } + cs.lineTo(x, y); + } + } + cs.stroke(); + + // do a transform so that first and last "arms" are imagined flat, like in line handler + // the alternative would be to apply the transform to the LE shapes directly, + // which would be more work and produce code difficult to understand + + // paint the styles here and after polyline draw, to avoid line crossing a filled shape + if (!LE_NONE.equals(annotation.getStartPointEndingStyle())) + { + // check only needed to avoid q cm Q if LE_NONE + float x2 = pathsArray[2]; + float y2 = pathsArray[3]; + float x1 = pathsArray[0]; + float y1 = pathsArray[1]; + cs.saveGraphicsState(); + if (ANGLED_STYLES.contains(annotation.getStartPointEndingStyle())) + { + double angle = Math.atan2(y2 - y1, x2 - x1); + cs.transform(Matrix.getRotateInstance(angle, x1, y1)); + } + else + { + cs.transform(Matrix.getTranslateInstance(x1, y1)); + } + drawStyle(annotation.getStartPointEndingStyle(), cs, 0, 0, ab.width, hasStroke, hasBackground, false); + cs.restoreGraphicsState(); + } + + if (!LE_NONE.equals(annotation.getEndPointEndingStyle())) + { + // check only needed to avoid q cm Q if LE_NONE + float x1 = pathsArray[pathsArray.length - 4]; + float y1 = pathsArray[pathsArray.length - 3]; + float x2 = pathsArray[pathsArray.length - 2]; + float y2 = pathsArray[pathsArray.length - 1]; + // save / restore not needed because it's the last one + if (ANGLED_STYLES.contains(annotation.getEndPointEndingStyle())) + { + double angle = Math.atan2(y2 - y1, x2 - x1); + cs.transform(Matrix.getRotateInstance(angle, x2, y2)); + } + else + { + cs.transform(Matrix.getTranslateInstance(x2, y2)); + } + drawStyle(annotation.getEndPointEndingStyle(), cs, 0, 0, ab.width, hasStroke, hasBackground, true); + } + } + catch (IOException ex) + { + LOG.error(ex); + } + finally + { + IOUtils.closeQuietly(cs); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated for a polyline annotation + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated for a polyline annotation + } + + //TODO DRY, this code is from polygonAppearanceHandler so it's double + + /** + * Get the line with of the border. + * + * Get the width of the line used to draw a border around the annotation. + * This may either be specified by the annotation dictionaries Border + * setting or by the W entry in the BS border style dictionary. If both are + * missing the default width is 1. + * + * @return the line width + */ + // TODO: according to the PDF spec the use of the BS entry is annotation + // specific + // so we will leave that to be implemented by individual handlers. + // If at the end all annotations support the BS entry this can be handled + // here and removed from the individual handlers. + float getLineWidth() + { + PDAnnotationMarkup annotation = (PDAnnotationMarkup) getAnnotation(); + + PDBorderStyleDictionary bs = annotation.getBorderStyle(); + + if (bs != null) + { + return bs.getWidth(); + } + + COSArray borderCharacteristics = annotation.getBorder(); + if (borderCharacteristics.size() >= 3) + { + COSBase base = borderCharacteristics.getObject(2); + if (base instanceof COSNumber) + { + return ((COSNumber) base).floatValue(); + } + } + + return 1; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSoundAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSoundAppearanceHandler.java new file mode 100644 index 00000000000..09c0439a2b4 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSoundAppearanceHandler.java @@ -0,0 +1,58 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; + +public class PDSoundAppearanceHandler extends PDAbstractAppearanceHandler +{ + public PDSoundAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDSoundAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + // TODO to be implemented + } + + @Override + public void generateRolloverAppearance() + { + // TODO to be implemented + } + + @Override + public void generateDownAppearance() + { + // TODO to be implemented + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSquareAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSquareAppearanceHandler.java new file mode 100644 index 00000000000..50690c83fc3 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSquareAppearanceHandler.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationSquareCircle; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderEffectDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; + +/** + * Handler to generate the square annotations appearance. + * + */ +public class PDSquareAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDSquareAppearanceHandler.class); + + public PDSquareAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDSquareAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + float lineWidth = getLineWidth(); + PDAnnotationSquareCircle annotation = (PDAnnotationSquareCircle) getAnnotation(); + PDAppearanceContentStream contentStream = null; + + try + { + contentStream = getNormalAppearanceAsContentStream(); + boolean hasStroke = contentStream.setStrokingColorOnDemand(getColor()); + boolean hasBackground = contentStream + .setNonStrokingColorOnDemand(annotation.getInteriorColor()); + + setOpacity(contentStream, annotation.getConstantOpacity()); + + contentStream.setBorderLine(lineWidth, annotation.getBorderStyle(), annotation.getBorder()); + PDBorderEffectDictionary borderEffect = annotation.getBorderEffect(); + + if (borderEffect != null && borderEffect.getStyle().equals(PDBorderEffectDictionary.STYLE_CLOUDY)) + { + CloudyBorder cloudyBorder = new CloudyBorder(contentStream, + borderEffect.getIntensity(), lineWidth, getRectangle()); + cloudyBorder.createCloudyRectangle(annotation.getRectDifference()); + annotation.setRectangle(cloudyBorder.getRectangle()); + annotation.setRectDifference(cloudyBorder.getRectDifference()); + PDAppearanceStream appearanceStream = annotation.getNormalAppearanceStream(); + appearanceStream.setBBox(cloudyBorder.getBBox()); + appearanceStream.setMatrix(cloudyBorder.getMatrix()); + } + else + { + PDRectangle borderBox = handleBorderBox(annotation, lineWidth); + + contentStream.addRect(borderBox.getLowerLeftX(), borderBox.getLowerLeftY(), + borderBox.getWidth(), borderBox.getHeight()); + } + + contentStream.drawShape(lineWidth, hasStroke, hasBackground); + } + catch (IOException e) + { + LOG.error(e); + } + finally{ + IOUtils.closeQuietly(contentStream); + } + } + + @Override + public void generateRolloverAppearance() + { + // TODO to be implemented + } + + @Override + public void generateDownAppearance() + { + // TODO to be implemented + } + + /** + * Get the line with of the border. + * + * Get the width of the line used to draw a border around the annotation. + * This may either be specified by the annotation dictionaries Border + * setting or by the W entry in the BS border style dictionary. If both are + * missing the default width is 1. + * + * @return the line width + */ + // TODO: according to the PDF spec the use of the BS entry is annotation + // specific + // so we will leave that to be implemented by individual handlers. + // If at the end all annotations support the BS entry this can be handled + // here and removed from the individual handlers. + float getLineWidth() + { + PDAnnotationMarkup annotation = (PDAnnotationMarkup) getAnnotation(); + + PDBorderStyleDictionary bs = annotation.getBorderStyle(); + + if (bs != null) + { + return bs.getWidth(); + } + + COSArray borderCharacteristics = annotation.getBorder(); + if (borderCharacteristics.size() >= 3) + { + COSBase base = borderCharacteristics.getObject(2); + if (base instanceof COSNumber) + { + return ((COSNumber) base).floatValue(); + } + } + + return 1; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSquigglyAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSquigglyAppearanceHandler.java new file mode 100644 index 00000000000..c6b989157a1 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDSquigglyAppearanceHandler.java @@ -0,0 +1,212 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.awt.geom.AffineTransform; +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDFormContentStream; +import org.apache.pdfbox.pdmodel.PDPatternContentStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.graphics.color.PDPattern; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; +import org.apache.pdfbox.util.Matrix; + +/** + * + */ +public class PDSquigglyAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDSquigglyAppearanceHandler.class); + + public PDSquigglyAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDSquigglyAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationTextMarkup annotation = (PDAnnotationTextMarkup) getAnnotation(); + PDRectangle rect = annotation.getRectangle(); + float[] pathsArray = annotation.getQuadPoints(); + if (pathsArray == null) + { + return; + } + AnnotationBorder ab = AnnotationBorder.getAnnotationBorder(annotation, annotation.getBorderStyle()); + PDColor color = annotation.getColor(); + if (color == null || color.getComponents().length == 0) + { + return; + } + if (Float.compare(ab.width, 0) == 0) + { + // value found in adobe reader + ab.width = 1.5f; + } + + // Adjust rectangle even if not empty, see PLPDF.com-MarkupAnnotations.pdf + //TODO in a class structure this should be overridable + // this is similar to polyline but different data type + // all coordinates (unlike painting) are used because I'm lazy + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + rect.setLowerLeftX(Math.min(minX - ab.width / 2, rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - ab.width / 2, rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + ab.width / 2, rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + ab.width / 2, rect.getUpperRightY())); + annotation.setRectangle(rect); + + PDAppearanceContentStream cs = null; + + try + { + cs = getNormalAppearanceAsContentStream(); + + setOpacity(cs, annotation.getConstantOpacity()); + + cs.setStrokingColor(color); + + //TODO we ignore dash pattern and line width for now. Do they have any effect? + + + // quadpoints spec is incorrect + // https://stackoverflow.com/questions/9855814/pdf-spec-vs-acrobat-creation-quadpoints + for (int i = 0; i < pathsArray.length / 8; ++i) + { + // Adobe uses a fixed pattern that assumes a height of 40, and it transforms to that height + // horizontally and the same / 1.8 vertically. + // translation apparently based on bottom left, but slightly different in Adobe + //TODO what if the annotation is not horizontal? + float height = pathsArray[i * 8 + 1] - pathsArray[i * 8 + 5]; + cs.transform(new Matrix(height / 40f, 0, 0, height / 40f / 1.8f, pathsArray[i * 8 + 4], pathsArray[i * 8 + 5])); + + // Create form, BBox is mostly fixed, except for the horizontal size which is + // horizontal size divided by the horizontal transform factor from above + // (almost) + PDFormXObject form = new PDFormXObject(createCOSStream()); + form.setBBox(new PDRectangle(-0.5f, -0.5f, (pathsArray[i * 8 + 2] - pathsArray[i * 8]) / height * 40f + 0.5f, 13)); + form.setResources(new PDResources()); + form.setMatrix(AffineTransform.getTranslateInstance(0.5f, 0.5f)); + cs.drawForm(form); + + PDFormContentStream formCS = null; + + try + { + formCS = new PDFormContentStream(form); + PDTilingPattern pattern = new PDTilingPattern(); + pattern.setBBox(new PDRectangle(0, 0, 10, 12)); + pattern.setXStep(10); + pattern.setYStep(13); + pattern.setTilingType(PDTilingPattern.TILING_CONSTANT_SPACING_FASTER_TILING); + pattern.setPaintType(PDTilingPattern.PAINT_UNCOLORED); + + PDPatternContentStream patternCS = null; + + try + { + patternCS = new PDPatternContentStream(pattern); + // from Adobe + patternCS.setLineCapStyle(1); + patternCS.setLineJoinStyle(1); + patternCS.setLineWidth(1); + patternCS.setMiterLimit(10); + patternCS.moveTo(0, 1); + patternCS.lineTo(5, 11); + patternCS.lineTo(10, 1); + patternCS.stroke(); + } + finally + { + IOUtils.closeQuietly(patternCS); + } + + COSName patternName = form.getResources().add(pattern); + PDColorSpace patternColorSpace = new PDPattern(null, PDDeviceRGB.INSTANCE); + PDColor patternColor = new PDColor(color.getComponents(), patternName, patternColorSpace); + formCS.setNonStrokingColor(patternColor); + + // With Adobe, the horizontal size is slightly different, don't know why + formCS.addRect(0, 0, (pathsArray[i * 8 + 2] - pathsArray[i * 8]) / height * 40f, 12); + formCS.fill(); + } + finally + { + IOUtils.closeQuietly(formCS); + } + } + } + catch (IOException ex) + { + LOG.error(ex); + } + finally + { + IOUtils.closeQuietly(cs); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDStrikeoutAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDStrikeoutAppearanceHandler.java new file mode 100644 index 00000000000..8779f49d903 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDStrikeoutAppearanceHandler.java @@ -0,0 +1,169 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; + +/** + * + */ +public class PDStrikeoutAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDStrikeoutAppearanceHandler.class); + + public PDStrikeoutAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDStrikeoutAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationTextMarkup annotation = (PDAnnotationTextMarkup) getAnnotation(); + PDRectangle rect = annotation.getRectangle(); + float[] pathsArray = annotation.getQuadPoints(); + if (pathsArray == null) + { + return; + } + AnnotationBorder ab = AnnotationBorder.getAnnotationBorder(annotation, annotation.getBorderStyle()); + PDColor color = annotation.getColor(); + if (color == null || color.getComponents().length == 0) + { + return; + } + if (Float.compare(ab.width, 0) == 0) + { + // value found in adobe reader + ab.width = 1.5f; + } + + // Adjust rectangle even if not empty, see PLPDF.com-MarkupAnnotations.pdf + //TODO in a class structure this should be overridable + // this is similar to polyline but different data type + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + rect.setLowerLeftX(Math.min(minX - ab.width / 2, rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - ab.width / 2, rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + ab.width / 2, rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + ab.width / 2, rect.getUpperRightY())); + annotation.setRectangle(rect); + + PDAppearanceContentStream cs = null; + + try + { + cs = getNormalAppearanceAsContentStream(); + + setOpacity(cs, annotation.getConstantOpacity()); + + cs.setStrokingColor(color); + if (ab.dashArray != null) + { + cs.setLineDashPattern(ab.dashArray, 0); + } + cs.setLineWidth(ab.width); + + // spec is incorrect + // https://stackoverflow.com/questions/9855814/pdf-spec-vs-acrobat-creation-quadpoints + for (int i = 0; i < pathsArray.length / 8; ++i) + { + // get mid point between bounds, subtract the line width to approximate what Adobe is doing + // See e.g. CTAN-example-Annotations.pdf and PLPDF.com-MarkupAnnotations.pdf + // and https://bugs.ghostscript.com/show_bug.cgi?id=693664 + // do the math for diagonal annotations with this weird old trick: + // https://stackoverflow.com/questions/7740507/extend-a-line-segment-a-specific-distance + float len0 = (float) (Math.sqrt(Math.pow(pathsArray[i * 8] - pathsArray[i * 8 + 4], 2) + + Math.pow(pathsArray[i * 8 + 1] - pathsArray[i * 8 + 5], 2))); + float x0 = pathsArray[i * 8 + 4]; + float y0 = pathsArray[i * 8 + 5]; + if (Float.compare(len0, 0) != 0) + { + // only if both coordinates are not identical to avoid divide by zero + x0 += (pathsArray[i * 8] - pathsArray[i * 8 + 4]) / len0 * (len0 / 2 - ab.width); + y0 += (pathsArray[i * 8 + 1] - pathsArray[i * 8 + 5]) / len0 * (len0 / 2 - ab.width); + } + float len1 = (float) (Math.sqrt(Math.pow(pathsArray[i * 8 + 2] - pathsArray[i * 8 + 6], 2) + + Math.pow(pathsArray[i * 8 + 3] - pathsArray[i * 8 + 7], 2))); + float x1 = pathsArray[i * 8 + 6]; + float y1 = pathsArray[i * 8 + 7]; + if (Float.compare(len1, 0) != 0) + { + // only if both coordinates are not identical to avoid divide by zero + x1 += (pathsArray[i * 8 + 2] - pathsArray[i * 8 + 6]) / len1 * (len1 / 2 - ab.width); + y1 += (pathsArray[i * 8 + 3] - pathsArray[i * 8 + 7]) / len1 * (len1 / 2 - ab.width); + } + cs.moveTo(x0, y0); + cs.lineTo(x1, y1); + } + cs.stroke(); + } + catch (IOException ex) + { + LOG.error(ex); + } + finally + { + IOUtils.closeQuietly(cs); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDTextAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDTextAppearanceHandler.java new file mode 100644 index 00000000000..174dd8b7535 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDTextAppearanceHandler.java @@ -0,0 +1,774 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.awt.geom.AffineTransform; +import java.awt.geom.GeneralPath; +import java.awt.geom.PathIterator; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText; +import org.apache.pdfbox.util.Matrix; + +/** + * + * @author Tilman Hausherr + */ +public class PDTextAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDTextAppearanceHandler.class); + + private static final Set SUPPORTED_NAMES = new HashSet(); + + static + { + SUPPORTED_NAMES.add(PDAnnotationText.NAME_NOTE); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_INSERT); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_CROSS); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_HELP); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_CIRCLE); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_PARAGRAPH); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_NEW_PARAGRAPH); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_CHECK); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_STAR); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_RIGHT_ARROW); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_RIGHT_POINTER); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_CROSS_HAIRS); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_UP_ARROW); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_UP_LEFT_ARROW); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_COMMENT); + SUPPORTED_NAMES.add(PDAnnotationText.NAME_KEY); + } + + public PDTextAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDTextAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationText annotation = (PDAnnotationText) getAnnotation(); + if (!SUPPORTED_NAMES.contains(annotation.getName())) + { + return; + } + + PDAppearanceContentStream contentStream = null; + + try + { + contentStream = getNormalAppearanceAsContentStream(); + + PDColor bgColor = getColor(); + if (bgColor == null) + { + // White is used by Adobe when /C entry is missing + contentStream.setNonStrokingColor(1f); + } + else + { + contentStream.setNonStrokingColor(bgColor); + } + // stroking color is always black which is the PDF default + + setOpacity(contentStream, annotation.getConstantOpacity()); + + String annotationTypeName = annotation.getName(); + + if (PDAnnotationText.NAME_NOTE.equals(annotationTypeName)) + { + drawNote(annotation, contentStream); + } + else if (PDAnnotationText.NAME_CROSS.equals(annotationTypeName)) + { + drawCross(annotation, contentStream); + } + else if (PDAnnotationText.NAME_CIRCLE.equals(annotationTypeName)) + { + drawCircles(annotation, contentStream); + } + else if (PDAnnotationText.NAME_INSERT.equals(annotationTypeName)) + { + drawInsert(annotation, contentStream); + } + else if (PDAnnotationText.NAME_HELP.equals(annotationTypeName)) + { + drawHelp(annotation, contentStream); + } + else if (PDAnnotationText.NAME_PARAGRAPH.equals(annotationTypeName)) + { + drawParagraph(annotation, contentStream); + } + else if (PDAnnotationText.NAME_NEW_PARAGRAPH.equals(annotationTypeName)) + { + drawNewParagraph(annotation, contentStream); + } + else if (PDAnnotationText.NAME_STAR.equals(annotationTypeName)) + { + drawStar(annotation, contentStream); + } + else if (PDAnnotationText.NAME_CHECK.equals(annotationTypeName)) + { + drawCheck(annotation, contentStream); + } + else if (PDAnnotationText.NAME_RIGHT_ARROW.equals(annotationTypeName)) + { + drawRightArrow(annotation, contentStream); + } + else if (PDAnnotationText.NAME_RIGHT_POINTER.equals(annotationTypeName)) + { + drawRightPointer(annotation, contentStream); + } + else if (PDAnnotationText.NAME_CROSS_HAIRS.equals(annotationTypeName)) + { + drawCrossHairs(annotation, contentStream); + } + else if (PDAnnotationText.NAME_UP_ARROW.equals(annotationTypeName)) + { + drawUpArrow(annotation, contentStream); + } + else if (PDAnnotationText.NAME_UP_LEFT_ARROW.equals(annotationTypeName)) + { + drawUpLeftArrow(annotation, contentStream); + } + else if (PDAnnotationText.NAME_COMMENT.equals(annotationTypeName)) + { + drawComment(annotation, contentStream); + } + else if (PDAnnotationText.NAME_KEY.equals(annotationTypeName)) + { + drawKey(annotation, contentStream); + } + } + catch (IOException e) + { + LOG.error(e); + } + finally + { + IOUtils.closeQuietly(contentStream); + } + } + + private PDRectangle adjustRectAndBBox(PDAnnotationText annotation, float width, float height) + { + // For /Note (other types have different values): + // Adobe takes the left upper bound as anchor, and adjusts the rectangle to 18 x 20. + // Observed with files 007071.pdf, 038785.pdf, 038787.pdf, + // but not with 047745.pdf p133 and 084374.pdf p48, both have the NoZoom flag. + // there the BBox is also set to fixed values, but the rectangle is left untouched. + // When no flags are there, Adobe sets /F 24 = NoZoom NoRotate. + + PDRectangle rect = getRectangle(); + PDRectangle bbox; + if (!annotation.isNoZoom()) + { + rect.setUpperRightX(rect.getLowerLeftX() + width); + rect.setLowerLeftY(rect.getUpperRightY() - height); + annotation.setRectangle(rect); + } + if (!annotation.getCOSObject().containsKey(COSName.F)) + { + // We set these flags because Adobe does so, but PDFBox doesn't support them when rendering. + annotation.setNoRotate(true); + annotation.setNoZoom(true); + } + bbox = new PDRectangle(width, height); + annotation.getNormalAppearanceStream().setBBox(bbox); + return bbox; + } + + private void drawNote(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 18, 20); + contentStream.setMiterLimit(4); + + // get round edge the easy way. Adobe uses 4 lines with 4 arcs of radius 0.785 which is bigger. + contentStream.setLineJoinStyle(1); + + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.61f); // value from Adobe + contentStream.addRect(1, 1, bbox.getWidth() - 2, bbox.getHeight() - 2); + contentStream.moveTo(bbox.getWidth() / 4, bbox.getHeight() / 7 * 2); + contentStream.lineTo(bbox.getWidth() * 3 / 4 - 1, bbox.getHeight() / 7 * 2); + contentStream.moveTo(bbox.getWidth() / 4, bbox.getHeight() / 7 * 3); + contentStream.lineTo(bbox.getWidth() * 3 / 4 - 1, bbox.getHeight() / 7 * 3); + contentStream.moveTo(bbox.getWidth() / 4, bbox.getHeight() / 7 * 4); + contentStream.lineTo(bbox.getWidth() * 3 / 4 - 1, bbox.getHeight() / 7 * 4); + contentStream.moveTo(bbox.getWidth() / 4, bbox.getHeight() / 7 * 5); + contentStream.lineTo(bbox.getWidth() * 3 / 4 - 1, bbox.getHeight() / 7 * 5); + contentStream.fillAndStroke(); + } + + private void drawCircles(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 20, 20); + + // strategy used by Adobe: + // 1) add small circle in white using /ca /CA 0.6 and width 1 + // 2) fill + // 3) add small circle in one direction + // 4) add large circle in other direction + // 5) stroke + fill + // with square width 20 small r = 6.36, large r = 9.756 + + float smallR = 6.36f; + float largeR = 9.756f; + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.saveGraphicsState(); + contentStream.setLineWidth(1); + PDExtendedGraphicsState gs = new PDExtendedGraphicsState(); + gs.setAlphaSourceFlag(false); + gs.setStrokingAlphaConstant(0.6f); + gs.setNonStrokingAlphaConstant(0.6f); + gs.setBlendMode(BlendMode.NORMAL); + contentStream.setGraphicsStateParameters(gs); + contentStream.setNonStrokingColor(1f); + drawCircle(contentStream, bbox.getWidth() / 2, bbox.getHeight() / 2, smallR); + contentStream.fill(); + contentStream.restoreGraphicsState(); + + contentStream.setLineWidth(0.59f); // value from Adobe + drawCircle(contentStream, bbox.getWidth() / 2, bbox.getHeight() / 2, smallR); + drawCircle2(contentStream, bbox.getWidth() / 2, bbox.getHeight() / 2, largeR); + contentStream.fillAndStroke(); + } + + private void drawInsert(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 17, 20); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(0); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + contentStream.moveTo(bbox.getWidth() / 2 - 1, bbox.getHeight() - 2); + contentStream.lineTo(1, 1); + contentStream.lineTo(bbox.getWidth() - 2, 1); + contentStream.closeAndFillAndStroke(); + } + + private void drawCross(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 19, 19); + + // should be a square, but who knows... + float min = Math.min(bbox.getWidth(), bbox.getHeight()); + + // small = offset nearest bbox edge + // large = offset second nearest bbox edge + float small = min / 10; + float large = min / 5; + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + contentStream.moveTo(small, large); + contentStream.lineTo(large, small); + contentStream.lineTo(min / 2, min / 2 - small); + contentStream.lineTo(min - large, small); + contentStream.lineTo(min - small, large); + contentStream.lineTo(min / 2 + small, min / 2); + contentStream.lineTo(min - small, min - large); + contentStream.lineTo(min - large, min - small); + contentStream.lineTo(min / 2, min / 2 + small); + contentStream.lineTo(large, min - small); + contentStream.lineTo(small, min - large); + contentStream.lineTo(min / 2 - small, min / 2); + contentStream.closeAndFillAndStroke(); + + // alternatively, this could also be drawn with Zapf Dingbats "a21" + // see DrawStar() + } + + private void drawHelp(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 20, 20); + + float min = Math.min(bbox.getWidth(), bbox.getHeight()); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + // Adobe first fills a white circle with CA ca 0.6, so do we + contentStream.saveGraphicsState(); + contentStream.setLineWidth(1); + PDExtendedGraphicsState gs = new PDExtendedGraphicsState(); + gs.setAlphaSourceFlag(false); + gs.setStrokingAlphaConstant(0.6f); + gs.setNonStrokingAlphaConstant(0.6f); + gs.setBlendMode(BlendMode.NORMAL); + contentStream.setGraphicsStateParameters(gs); + contentStream.setNonStrokingColor(1f); + drawCircle2(contentStream, min / 2, min / 2, min / 2 - 1); + contentStream.fill(); + contentStream.restoreGraphicsState(); + + contentStream.saveGraphicsState(); + // rescale so that "?" fits into circle and move "?" to circle center + // values gathered by trial and error + contentStream.transform(Matrix.getScaleInstance(0.001f * min / 2.25f, 0.001f * min / 2.25f)); + contentStream.transform(Matrix.getTranslateInstance(500, 375)); + + // we get the shape of an Helvetica bold "?" and use that one. + // Adobe uses a different font (which one?), or created the shape from scratch. + GeneralPath path = PDType1Font.HELVETICA_BOLD.getPath("question"); + addPath(contentStream, path); + contentStream.restoreGraphicsState(); + // draw the outer circle counterclockwise to fill area between circle and "?" + drawCircle2(contentStream, min / 2, min / 2, min / 2 - 1); + contentStream.fillAndStroke(); + } + + private void drawParagraph(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 20, 20); + + float min = Math.min(bbox.getWidth(), bbox.getHeight()); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + // Adobe first fills a white circle with CA ca 0.6, so do we + contentStream.saveGraphicsState(); + contentStream.setLineWidth(1); + PDExtendedGraphicsState gs = new PDExtendedGraphicsState(); + gs.setAlphaSourceFlag(false); + gs.setStrokingAlphaConstant(0.6f); + gs.setNonStrokingAlphaConstant(0.6f); + gs.setBlendMode(BlendMode.NORMAL); + contentStream.setGraphicsStateParameters(gs); + contentStream.setNonStrokingColor(1f); + drawCircle2(contentStream, min / 2, min / 2, min / 2 - 1); + contentStream.fill(); + contentStream.restoreGraphicsState(); + + contentStream.saveGraphicsState(); + // rescale so that "?" fits into circle and move "?" to circle center + // values gathered by trial and error + contentStream.transform(Matrix.getScaleInstance(0.001f * min / 3, 0.001f * min / 3)); + contentStream.transform(Matrix.getTranslateInstance(850, 900)); + + // we get the shape of an Helvetica "?" and use that one. + // Adobe uses a different font (which one?), or created the shape from scratch. + GeneralPath path = PDType1Font.HELVETICA.getPath("paragraph"); + addPath(contentStream, path); + contentStream.restoreGraphicsState(); + contentStream.fillAndStroke(); + drawCircle(contentStream, min / 2, min / 2, min / 2 - 1); + contentStream.stroke(); + } + + private void drawNewParagraph(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + adjustRectAndBBox(annotation, 13, 20); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(0); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + // small triangle (values from Adobe) + contentStream.moveTo(6.4995f, 20); + contentStream.lineTo(0.295f, 7.287f); + contentStream.lineTo(12.705f, 7.287f); + contentStream.closeAndFillAndStroke(); + + // rescale and translate so that "NP" fits below the triangle + // values gathered by trial and error + contentStream.transform(Matrix.getScaleInstance(0.001f * 4, 0.001f * 4)); + contentStream.transform(Matrix.getTranslateInstance(200, 0)); + addPath(contentStream, PDType1Font.HELVETICA_BOLD.getPath("N")); + contentStream.transform(Matrix.getTranslateInstance(1300, 0)); + addPath(contentStream, PDType1Font.HELVETICA_BOLD.getPath("P")); + contentStream.fill(); + } + + private void drawStar(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 20, 19); + + float min = Math.min(bbox.getWidth(), bbox.getHeight()); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + contentStream.transform(Matrix.getScaleInstance(0.001f * min / 0.8f, 0.001f * min / 0.8f)); + + // we get the shape of a Zapf Dingbats star (0x2605) and use that one. + // Adobe uses a different font (which one?), or created the shape from scratch. + GeneralPath path = PDType1Font.ZAPF_DINGBATS.getPath("a35"); + addPath(contentStream, path); + contentStream.fillAndStroke(); + } + + //TODO this is mostly identical to drawStar, except for scale, translation and symbol + // maybe use a table with all values and draw from there + // this could also optionally use outer circle + private void drawCheck(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 20, 19); + + float min = Math.min(bbox.getWidth(), bbox.getHeight()); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + contentStream.transform(Matrix.getScaleInstance(0.001f * min / 0.8f, 0.001f * min / 0.8f)); + contentStream.transform(Matrix.getTranslateInstance(0, 50)); + + // we get the shape of a Zapf Dingbats check (0x2714) and use that one. + // Adobe uses a different font (which one?), or created the shape from scratch. + GeneralPath path = PDType1Font.ZAPF_DINGBATS.getPath("a20"); + addPath(contentStream, path); + contentStream.fillAndStroke(); + } + + //TODO this is mostly identical to drawStar, except for scale, translation and symbol + private void drawRightPointer(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 20, 17); + + float min = Math.min(bbox.getWidth(), bbox.getHeight()); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + contentStream.transform(Matrix.getScaleInstance(0.001f * min / 0.8f, 0.001f * min / 0.8f)); + contentStream.transform(Matrix.getTranslateInstance(0, 50)); + + // we get the shape of a Zapf Dingbats right pointer (0x27A4) and use that one. + // Adobe uses a different font (which one?), or created the shape from scratch. + GeneralPath path = PDType1Font.ZAPF_DINGBATS.getPath("a174"); + addPath(contentStream, path); + contentStream.fillAndStroke(); + } + + private void drawCrossHairs(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 20, 20); + + float min = Math.min(bbox.getWidth(), bbox.getHeight()); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(0); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.61f); // value from Adobe + + contentStream.transform(Matrix.getScaleInstance(0.001f * min / 1.5f, 0.001f * min / 1.5f)); + contentStream.transform(Matrix.getTranslateInstance(0, 50)); + + // we get the shape of a Symbol crosshair (0x2295) and use that one. + // Adobe uses a different font (which one?), or created the shape from scratch. + GeneralPath path = PDType1Font.SYMBOL.getPath("circleplus"); + addPath(contentStream, path); + contentStream.fillAndStroke(); + } + + private void drawUpArrow(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + adjustRectAndBBox(annotation, 17, 20); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + contentStream.moveTo(1, 7); + contentStream.lineTo(5, 7); + contentStream.lineTo(5, 1); + contentStream.lineTo(12, 1); + contentStream.lineTo(12, 7); + contentStream.lineTo(16, 7); + contentStream.lineTo(8.5f, 19); + contentStream.closeAndFillAndStroke(); + } + + private void drawUpLeftArrow(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + adjustRectAndBBox(annotation, 17, 17); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + contentStream.transform(Matrix.getRotateInstance(Math.toRadians(45), 8, -4)); + + contentStream.moveTo(1, 7); + contentStream.lineTo(5, 7); + contentStream.lineTo(5, 1); + contentStream.lineTo(12, 1); + contentStream.lineTo(12, 7); + contentStream.lineTo(16, 7); + contentStream.lineTo(8.5f, 19); + contentStream.closeAndFillAndStroke(); + } + + private void drawRightArrow(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + PDRectangle bbox = adjustRectAndBBox(annotation, 20, 20); + + float min = Math.min(bbox.getWidth(), bbox.getHeight()); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(0.59f); // value from Adobe + + // Adobe first fills a white circle with CA ca 0.6, so do we + contentStream.saveGraphicsState(); + contentStream.setLineWidth(1); + PDExtendedGraphicsState gs = new PDExtendedGraphicsState(); + gs.setAlphaSourceFlag(false); + gs.setStrokingAlphaConstant(0.6f); + gs.setNonStrokingAlphaConstant(0.6f); + gs.setBlendMode(BlendMode.NORMAL); + contentStream.setGraphicsStateParameters(gs); + contentStream.setNonStrokingColor(1f); + drawCircle2(contentStream, min / 2, min / 2, min / 2 - 1); + contentStream.fill(); + contentStream.restoreGraphicsState(); + + contentStream.saveGraphicsState(); + // rescale so that the glyph fits into circle and move it to circle center + // values gathered by trial and error + contentStream.transform(Matrix.getScaleInstance(0.001f * min / 1.3f, 0.001f * min / 1.3f)); + contentStream.transform(Matrix.getTranslateInstance(200, 300)); + + // we get the shape of a Zapf Dingbats right arrow (0x2794) and use that one. + // Adobe uses a different font (which one?), or created the shape from scratch. + GeneralPath path = PDType1Font.ZAPF_DINGBATS.getPath("a160"); + addPath(contentStream, path); + contentStream.restoreGraphicsState(); + // surprisingly, this one not counterclockwise. + drawCircle(contentStream, min / 2, min / 2, min / 2 - 1); + contentStream.fillAndStroke(); + } + + private void drawComment(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + adjustRectAndBBox(annotation, 18, 18); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(200); + + // Adobe first fills a white rectangle with CA ca 0.6, so do we + contentStream.saveGraphicsState(); + contentStream.setLineWidth(1); + PDExtendedGraphicsState gs = new PDExtendedGraphicsState(); + gs.setAlphaSourceFlag(false); + gs.setStrokingAlphaConstant(0.6f); + gs.setNonStrokingAlphaConstant(0.6f); + gs.setBlendMode(BlendMode.NORMAL); + contentStream.setGraphicsStateParameters(gs); + contentStream.setNonStrokingColor(1f); + contentStream.addRect(0.3f, 0.3f, 18-0.6f, 18-0.6f); + contentStream.fill(); + contentStream.restoreGraphicsState(); + + contentStream.transform(Matrix.getScaleInstance(0.003f, 0.003f)); + contentStream.transform(Matrix.getTranslateInstance(500, -300)); + + // outer shape was gathered from Font Awesome by "printing" comment.svg + // into a PDF and looking at the content stream + contentStream.moveTo(2549, 5269); + contentStream.curveTo(1307, 5269, 300, 4451, 300, 3441); + contentStream.curveTo(300, 3023, 474, 2640, 764, 2331); + contentStream.curveTo(633, 1985, 361, 1691, 357, 1688); + contentStream.curveTo(299, 1626, 283, 1537, 316, 1459); + contentStream.curveTo(350, 1382, 426, 1332, 510, 1332); + contentStream.curveTo(1051, 1332, 1477, 1558, 1733, 1739); + contentStream.curveTo(1987, 1659, 2261, 1613, 2549, 1613); + contentStream.curveTo(3792, 1613, 4799, 2431, 4799, 3441); + contentStream.curveTo(4799, 4451, 3792, 5269, 2549, 5269); + contentStream.closePath(); + + // can't use addRect: if we did that, we wouldn't get the donut effect + contentStream.moveTo(0.3f / 0.003f - 500, 0.3f / 0.003f + 300); + contentStream.lineTo(0.3f / 0.003f - 500, 0.3f / 0.003f + 300 + 17.4f / 0.003f); + contentStream.lineTo(0.3f / 0.003f - 500 + 17.4f / 0.003f, 0.3f / 0.003f + 300 + 17.4f / 0.003f); + contentStream.lineTo(0.3f / 0.003f - 500 + 17.4f / 0.003f, 0.3f / 0.003f + 300); + + contentStream.closeAndFillAndStroke(); + } + + private void drawKey(PDAnnotationText annotation, final PDAppearanceContentStream contentStream) + throws IOException + { + adjustRectAndBBox(annotation, 13, 18); + + contentStream.setMiterLimit(4); + contentStream.setLineJoinStyle(1); + contentStream.setLineCapStyle(0); + contentStream.setLineWidth(200); + + contentStream.transform(Matrix.getScaleInstance(0.003f, 0.003f)); + contentStream.transform(Matrix.getRotateInstance(Math.toRadians(45), 2500, -800)); + + // shape was gathered from Font Awesome by "printing" key.svg into a PDF + // and looking at the content stream + contentStream.moveTo(4799, 4004); + contentStream.curveTo(4799, 3149, 4107, 2457, 3253, 2457); + contentStream.curveTo(3154, 2457, 3058, 2466, 2964, 2484); + contentStream.lineTo(2753, 2246); + contentStream.curveTo(2713, 2201, 2656, 2175, 2595, 2175); + contentStream.lineTo(2268, 2175); + contentStream.lineTo(2268, 1824); + contentStream.curveTo(2268, 1707, 2174, 1613, 2057, 1613); + contentStream.lineTo(1706, 1613); + contentStream.lineTo(1706, 1261); + contentStream.curveTo(1706, 1145, 1611, 1050, 1495, 1050); + contentStream.lineTo(510, 1050); + contentStream.curveTo(394, 1050, 300, 1145, 300, 1261); + contentStream.lineTo(300, 1947); + contentStream.curveTo(300, 2003, 322, 2057, 361, 2097); + contentStream.lineTo(1783, 3519); + contentStream.curveTo(1733, 3671, 1706, 3834, 1706, 4004); + contentStream.curveTo(1706, 4858, 2398, 5550, 3253, 5550); + contentStream.curveTo(4109, 5550, 4799, 4860, 4799, 4004); + contentStream.closePath(); + contentStream.moveTo(3253, 4425); + contentStream.curveTo(3253, 4192, 3441, 4004, 3674, 4004); + contentStream.curveTo(3907, 4004, 4096, 4192, 4096, 4425); + contentStream.curveTo(4096, 4658, 3907, 4847, 3674, 4847); + contentStream.curveTo(3441, 4847, 3253, 4658, 3253, 4425); + contentStream.fillAndStroke(); + } + + private void addPath(final PDAppearanceContentStream contentStream, GeneralPath path) throws IOException + { + double curX = 0; + double curY = 0; + PathIterator it = path.getPathIterator(new AffineTransform()); + double[] coords = new double[6]; + while (!it.isDone()) + { + int type = it.currentSegment(coords); + switch (type) + { + case PathIterator.SEG_CLOSE: + contentStream.closePath(); + break; + case PathIterator.SEG_CUBICTO: + contentStream.curveTo((float) coords[0], (float) coords[1], (float) coords[2], + (float) coords[3], (float) coords[4], (float) coords[5]); + curX = coords[4]; + curY = coords[5]; + break; + case PathIterator.SEG_QUADTO: + // Convert quadratic Bézier curve to cubic + // https://fontforge.github.io/bezier.html + // CP1 = QP0 + 2/3 *(QP1-QP0) + // CP2 = QP2 + 2/3 *(QP1-QP2) + double cp1x = curX + 2d / 3d * (coords[0] - curX); + double cp1y = curY + 2d / 3d * (coords[1] - curY); + double cp2x = coords[2] + 2d / 3d * (coords[0] - coords[2]); + double cp2y = coords[3] + 2d / 3d * (coords[1] - coords[3]); + contentStream.curveTo((float) cp1x, (float) cp1y, + (float) cp2x, (float) cp2y, + (float) coords[2], (float) coords[3]); + curX = coords[2]; + curY = coords[3]; + break; + case PathIterator.SEG_LINETO: + contentStream.lineTo((float) coords[0], (float) coords[1]); + curX = coords[0]; + curY = coords[1]; + break; + case PathIterator.SEG_MOVETO: + contentStream.moveTo((float) coords[0], (float) coords[1]); + curX = coords[0]; + curY = coords[1]; + break; + default: + break; + } + it.next(); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDUnderlineAppearanceHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDUnderlineAppearanceHandler.java new file mode 100644 index 00000000000..798ed76f8de --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/handlers/PDUnderlineAppearanceHandler.java @@ -0,0 +1,168 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.annotation.handlers; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.PDDocument; + +/** + * + */ +public class PDUnderlineAppearanceHandler extends PDAbstractAppearanceHandler +{ + private static final Log LOG = LogFactory.getLog(PDUnderlineAppearanceHandler.class); + + public PDUnderlineAppearanceHandler(PDAnnotation annotation) + { + super(annotation); + } + + public PDUnderlineAppearanceHandler(PDAnnotation annotation, PDDocument document) + { + super(annotation, document); + } + + @Override + public void generateAppearanceStreams() + { + generateNormalAppearance(); + generateRolloverAppearance(); + generateDownAppearance(); + } + + @Override + public void generateNormalAppearance() + { + PDAnnotationTextMarkup annotation = (PDAnnotationTextMarkup) getAnnotation(); + PDRectangle rect = annotation.getRectangle(); + float[] pathsArray = annotation.getQuadPoints(); + if (pathsArray == null) + { + return; + } + AnnotationBorder ab = AnnotationBorder.getAnnotationBorder(annotation, annotation.getBorderStyle()); + PDColor color = annotation.getColor(); + if (color == null || color.getComponents().length == 0) + { + return; + } + if (Float.compare(ab.width, 0) == 0) + { + // value found in adobe reader + ab.width = 1.5f; + } + + // Adjust rectangle even if not empty, see PLPDF.com-MarkupAnnotations.pdf + //TODO in a class structure this should be overridable + // this is similar to polyline but different data type + // all coordinates (unlike painting) are used because I'm lazy + float minX = Float.MAX_VALUE; + float minY = Float.MAX_VALUE; + float maxX = Float.MIN_VALUE; + float maxY = Float.MIN_VALUE; + for (int i = 0; i < pathsArray.length / 2; ++i) + { + float x = pathsArray[i * 2]; + float y = pathsArray[i * 2 + 1]; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + rect.setLowerLeftX(Math.min(minX - ab.width / 2, rect.getLowerLeftX())); + rect.setLowerLeftY(Math.min(minY - ab.width / 2, rect.getLowerLeftY())); + rect.setUpperRightX(Math.max(maxX + ab.width / 2, rect.getUpperRightX())); + rect.setUpperRightY(Math.max(maxY + ab.width / 2, rect.getUpperRightY())); + annotation.setRectangle(rect); + + PDAppearanceContentStream cs = null; + + try + { + cs = getNormalAppearanceAsContentStream(); + + setOpacity(cs, annotation.getConstantOpacity()); + + cs.setStrokingColor(color); + if (ab.dashArray != null) + { + cs.setLineDashPattern(ab.dashArray, 0); + } + cs.setLineWidth(ab.width); + + // spec is incorrect + // https://stackoverflow.com/questions/9855814/pdf-spec-vs-acrobat-creation-quadpoints + for (int i = 0; i < pathsArray.length / 8; ++i) + { + // Adobe doesn't use the lower coordinate for the line, it uses lower + delta / 7. + // do the math for diagonal annotations with this weird old trick: + // https://stackoverflow.com/questions/7740507/extend-a-line-segment-a-specific-distance + float len0 = (float) (Math.sqrt(Math.pow(pathsArray[i * 8] - pathsArray[i * 8 + 4], 2) + + Math.pow(pathsArray[i * 8 + 1] - pathsArray[i * 8 + 5], 2))); + float x0 = pathsArray[i * 8 + 4]; + float y0 = pathsArray[i * 8 + 5]; + if (Float.compare(len0, 0) != 0) + { + // only if both coordinates are not identical to avoid divide by zero + x0 += (pathsArray[i * 8] - pathsArray[i * 8 + 4]) / len0 * len0 / 7; + y0 += (pathsArray[i * 8 + 1] - pathsArray[i * 8 + 5]) / len0 * (len0 / 7); + } + float len1 = (float) (Math.sqrt(Math.pow(pathsArray[i * 8 + 2] - pathsArray[i * 8 + 6], 2) + + Math.pow(pathsArray[i * 8 + 3] - pathsArray[i * 8 + 7], 2))); + float x1 = pathsArray[i * 8 + 6]; + float y1 = pathsArray[i * 8 + 7]; + if (Float.compare(len1, 0) != 0) + { + // only if both coordinates are not identical to avoid divide by zero + x1 += (pathsArray[i * 8 + 2] - pathsArray[i * 8 + 6]) / len1 * len1 / 7; + y1 += (pathsArray[i * 8 + 3] - pathsArray[i * 8 + 7]) / len1 * len1 / 7; + } + cs.moveTo(x0, y0); + cs.lineTo(x1, y1); + } + cs.stroke(); + } + catch (IOException ex) + { + LOG.error(ex); + } + finally + { + IOUtils.closeQuietly(cs); + } + } + + @Override + public void generateRolloverAppearance() + { + // No rollover appearance generated + } + + @Override + public void generateDownAppearance() + { + // No down appearance generated + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/AppearanceStyle.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/AppearanceStyle.java new file mode 100644 index 00000000000..b2b98181749 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/AppearanceStyle.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation.layout; + +import org.apache.pdfbox.pdmodel.font.PDFont; + +/** + * Define styling attributes to be used for text formatting. + * + */ +public class AppearanceStyle +{ + private PDFont font; + /** + * The font size to be used for text formatting. + * + * Defaulting to 12 to match Acrobats default. + */ + private float fontSize = 12.0f; + + /** + * The leading (distance between lines) to be used for text formatting. + * + * Defaulting to 1.2*fontSize to match Acrobats default. + */ + private float leading = 14.4f; + + /** + * Get the font used for text formatting. + * + * @return the font used for text formatting. + */ + PDFont getFont() + { + return font; + } + + /** + * Set the font to be used for text formatting. + * + * @param font the font to be used. + */ + public void setFont(PDFont font) + { + this.font = font; + } + + /** + * Get the fontSize used for text formatting. + * + * @return the fontSize used for text formatting. + */ + float getFontSize() + { + return fontSize; + } + + /** + * Set the font size to be used for formatting. + * + * @param fontSize the font size. + */ + public void setFontSize(float fontSize) + { + this.fontSize = fontSize; + leading = fontSize * 1.2f; + } + + /** + * Get the leading used for text formatting. + * + * @return the leading used for text formatting. + */ + float getLeading() + { + return leading; + } + + /** + * Set the leading used for text formatting. + * + * @param leading the leading to be used. + */ + void setLeading(float leading) + { + this.leading = leading; + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/PlainText.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/PlainText.java new file mode 100644 index 00000000000..2425c607467 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/PlainText.java @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation.layout; + +import java.io.IOException; +import java.text.AttributedString; +import java.text.BreakIterator; +import java.text.AttributedCharacterIterator.Attribute; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.font.PDFont; + +/** + * A block of text. + *

+ * A block of text can contain multiple paragraphs which will + * be treated individually within the block placement. + *

+ * + */ +public class PlainText +{ + private static final float FONTSCALE = 1000f; + + private final List paragraphs; + + /** + * Construct the text block from a single value. + * + * Constructs the text block from a single value splitting + * into individual {@link Paragraph} when a new line character is + * encountered. + * + * @param textValue the text block string. + */ + public PlainText(String textValue) + { + String[] parts = textValue.replace('\t', ' ').split("\\r\\n|\\n|\\r|\\u2028|\\u2029"); + paragraphs = new ArrayList(parts.length); + for (String part : parts) + { + // Acrobat prints a space for an empty paragraph + if (part.length() == 0) + { + part = " "; + } + paragraphs.add(new Paragraph(part)); + } + } + + /** + * Construct the text block from a list of values. + * + * Constructs the text block from a list of values treating each + * entry as an individual {@link Paragraph}. + * + * @param listValue the text block string. + */ + public PlainText(List listValue) + { + paragraphs = new ArrayList(listValue.size()); + for (String part : listValue) + { + paragraphs.add(new Paragraph(part)); + } + } + + /** + * Get the list of paragraphs. + * + * @return the paragraphs. + */ + List getParagraphs() + { + return paragraphs; + } + + /** + * Attribute keys and attribute values used for text handling. + * + * This is similar to {@link java.awt.font.TextAttribute} but + * handled individually as to avoid a dependency on awt. + * + */ + static class TextAttribute extends Attribute + { + /** + * UID for serializing. + */ + private static final long serialVersionUID = -3138885145941283005L; + + /** + * Attribute width of the text. + */ + public static final Attribute WIDTH = new TextAttribute("width"); + + protected TextAttribute(String name) + { + super(name); + } + + + } + + /** + * A block of text to be formatted as a whole. + *

+ * A block of text can contain multiple paragraphs which will + * be treated individually within the block placement. + *

+ * + */ + static class Paragraph + { + private final String textContent; + + Paragraph(String text) + { + textContent = text; + } + + /** + * Get the paragraph text. + * + * @return the text. + */ + String getText() + { + return textContent; + } + + /** + * Break the paragraph into individual lines. + * + * @param font the font used for rendering the text. + * @param fontSize the fontSize used for rendering the text. + * @param width the width of the box holding the content. + * @return the individual lines. + * @throws IOException + */ + List getLines(PDFont font, float fontSize, float width) throws IOException + { + BreakIterator iterator = BreakIterator.getLineInstance(); + iterator.setText(textContent); + + final float scale = fontSize/FONTSCALE; + + int start = iterator.first(); + int end = iterator.next(); + float lineWidth = 0; + + List textLines = new ArrayList(); + Line textLine = new Line(); + + while (end != BreakIterator.DONE) + { + String word = textContent.substring(start,end); + float wordWidth = font.getStringWidth(word) * scale; + + lineWidth = lineWidth + wordWidth; + + // check if the last word would fit without the whitespace ending it + if (lineWidth >= width && Character.isWhitespace(word.charAt(word.length()-1))) + { + float whitespaceWidth = font.getStringWidth(word.substring(word.length()-1)) * scale; + lineWidth = lineWidth - whitespaceWidth; + } + + if (lineWidth >= width) + { + textLine.setWidth(textLine.calculateWidth(font, fontSize)); + textLines.add(textLine); + textLine = new Line(); + lineWidth = font.getStringWidth(word) * scale; + } + + AttributedString as = new AttributedString(word); + as.addAttribute(TextAttribute.WIDTH, wordWidth); + Word wordInstance = new Word(word); + wordInstance.setAttributes(as); + textLine.addWord(wordInstance); + start = end; + end = iterator.next(); + } + textLine.setWidth(textLine.calculateWidth(font, fontSize)); + textLines.add(textLine); + return textLines; + } + } + + /** + * An individual line of text. + */ + static class Line + { + private final List words = new ArrayList(); + private float lineWidth; + + float getWidth() + { + return lineWidth; + } + + void setWidth(float width) + { + lineWidth = width; + } + + float calculateWidth(PDFont font, float fontSize) throws IOException + { + final float scale = fontSize/FONTSCALE; + float calculatedWidth = 0f; + int indexOfWord = 0; + for (Word word : words) + { + calculatedWidth = calculatedWidth + + (Float) word.getAttributes().getIterator().getAttribute(TextAttribute.WIDTH); + String text = word.getText(); + if (indexOfWord == words.size() -1 && Character.isWhitespace(text.charAt(text.length()-1))) + { + float whitespaceWidth = font.getStringWidth(text.substring(text.length()-1)) * scale; + calculatedWidth = calculatedWidth - whitespaceWidth; + } + ++indexOfWord; + } + return calculatedWidth; + } + + List getWords() + { + return words; + } + + float getInterWordSpacing(float width) + { + return (width - lineWidth)/(words.size()-1); + } + + void addWord(Word word) + { + words.add(word); + } + } + + /** + * An individual word. + * + * A word is defined as a string which must be kept + * on the same line. + */ + static class Word + { + private AttributedString attributedString; + private final String textContent; + + Word(String text) + { + textContent = text; + } + + String getText() + { + return textContent; + } + + AttributedString getAttributes() + { + return attributedString; + } + + void setAttributes(AttributedString as) + { + this.attributedString = as; + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/PlainTextFormatter.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/PlainTextFormatter.java new file mode 100644 index 00000000000..8c5effaf849 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/layout/PlainTextFormatter.java @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.annotation.layout; + +import java.io.IOException; +import java.util.List; + +import org.apache.pdfbox.pdmodel.PDAppearanceContentStream; +import org.apache.pdfbox.pdmodel.interactive.annotation.layout.PlainText.Line; +import org.apache.pdfbox.pdmodel.interactive.annotation.layout.PlainText.Paragraph; +import org.apache.pdfbox.pdmodel.interactive.annotation.layout.PlainText.TextAttribute; +import org.apache.pdfbox.pdmodel.interactive.annotation.layout.PlainText.Word; + +/** + * TextFormatter to handle plain text formatting for annotation rectangles. + * + * The text formatter will take a single value or an array of values which + * are treated as paragraphs. + */ + +public class PlainTextFormatter +{ + + enum TextAlign + { + LEFT(0), CENTER(1), RIGHT(2), JUSTIFY(4); + + private final int alignment; + + private TextAlign(int alignment) + { + this.alignment = alignment; + } + + int getTextAlign() + { + return alignment; + } + + public static TextAlign valueOf(int alignment) + { + for (TextAlign textAlignment : TextAlign.values()) + { + if (textAlignment.getTextAlign() == alignment) + { + return textAlignment; + } + } + return TextAlign.LEFT; + } + } + + /** + * The scaling factor for font units to PDF units + */ + private static final int FONTSCALE = 1000; + + private final AppearanceStyle appearanceStyle; + private final boolean wrapLines; + private final float width; + + private final PDAppearanceContentStream contents; + private final PlainText textContent; + private final TextAlign textAlignment; + + private float horizontalOffset; + private float verticalOffset; + + public static class Builder + { + + // required parameters + private PDAppearanceContentStream contents; + + // optional parameters + private AppearanceStyle appearanceStyle; + private boolean wrapLines = false; + private float width = 0f; + private PlainText textContent; + private TextAlign textAlignment = TextAlign.LEFT; + + + // initial offset from where to start the position of the first line + private float horizontalOffset = 0f; + private float verticalOffset = 0f; + + public Builder(PDAppearanceContentStream contents) + { + this.contents = contents; + } + + public Builder style(AppearanceStyle appearanceStyle) + { + this.appearanceStyle = appearanceStyle; + return this; + } + + public Builder wrapLines(boolean wrapLines) + { + this.wrapLines = wrapLines; + return this; + } + + public Builder width(float width) + { + this.width = width; + return this; + } + + public Builder textAlign(int alignment) + { + this.textAlignment = TextAlign.valueOf(alignment); + return this; + } + + public Builder textAlign(TextAlign alignment) + { + this.textAlignment = alignment; + return this; + } + + + public Builder text(PlainText textContent) + { + this.textContent = textContent; + return this; + } + + public Builder initialOffset(float horizontalOffset, float verticalOffset) + { + this.horizontalOffset = horizontalOffset; + this.verticalOffset = verticalOffset; + return this; + } + + public PlainTextFormatter build() + { + return new PlainTextFormatter(this); + } + } + + private PlainTextFormatter(Builder builder) + { + appearanceStyle = builder.appearanceStyle; + wrapLines = builder.wrapLines; + width = builder.width; + contents = builder.contents; + textContent = builder.textContent; + textAlignment = builder.textAlignment; + horizontalOffset = builder.horizontalOffset; + verticalOffset = builder.verticalOffset; + } + + /** + * Format the text block. + * + * @throws IOException if there is an error writing to the stream. + */ + public void format() throws IOException + { + if (textContent != null && !textContent.getParagraphs().isEmpty()) + { + boolean isFirstParagraph = true; + for (Paragraph paragraph : textContent.getParagraphs()) + { + if (wrapLines) + { + List lines = paragraph.getLines( + appearanceStyle.getFont(), + appearanceStyle.getFontSize(), + width + ); + processLines(lines, isFirstParagraph); + isFirstParagraph = false; + } + else + { + float startOffset = 0f; + + + float lineWidth = appearanceStyle.getFont().getStringWidth(paragraph.getText()) * + appearanceStyle.getFontSize() / FONTSCALE; + + if (lineWidth < width) + { + switch (textAlignment) + { + case CENTER: + startOffset = (width - lineWidth)/2; + break; + case RIGHT: + startOffset = width - lineWidth; + break; + case JUSTIFY: + default: + startOffset = 0f; + } + } + + contents.newLineAtOffset(horizontalOffset + startOffset, verticalOffset); + contents.showText(paragraph.getText()); + } + } + } + } + + /** + * Process lines for output. + * + * Process lines for an individual paragraph and generate the + * commands for the content stream to show the text. + * + * @param lines the lines to process. + * @throws IOException if there is an error writing to the stream. + */ + private void processLines(List lines, boolean isFirstParagraph) throws IOException + { + float wordWidth; + + float lastPos = 0f; + float startOffset = 0f; + float interWordSpacing = 0f; + + for (Line line : lines) + { + switch (textAlignment) + { + case CENTER: + startOffset = (width - line.getWidth())/2; + break; + case RIGHT: + startOffset = width - line.getWidth(); + break; + case JUSTIFY: + if (lines.indexOf(line) != lines.size() -1) + { + interWordSpacing = line.getInterWordSpacing(width); + } + break; + default: + startOffset = 0f; + } + + float offset = -lastPos + startOffset + horizontalOffset; + + if (lines.indexOf(line) == 0 && isFirstParagraph) + { + contents.newLineAtOffset(offset, verticalOffset); + } + else + { + // keep the last position + verticalOffset = verticalOffset - appearanceStyle.getLeading(); + contents.newLineAtOffset(offset, - appearanceStyle.getLeading()); + } + + lastPos += offset; + + List words = line.getWords(); + for (Word word : words) + { + contents.showText(word.getText()); + wordWidth = (Float) word.getAttributes().getIterator().getAttribute(TextAttribute.WIDTH); + if (words.indexOf(word) != words.size() -1) + { + contents.newLineAtOffset(wordWidth + interWordSpacing, 0f); + lastPos = lastPos + wordWidth + interWordSpacing; + } + } + } + horizontalOffset = horizontalOffset - lastPos; + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/package.html index 58cdca4c9ea..e9240456913 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/annotation/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/COSFilterInputStream.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/COSFilterInputStream.java index df7ae7799b8..498e4da4b63 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/COSFilterInputStream.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/COSFilterInputStream.java @@ -17,115 +17,100 @@ package org.apache.pdfbox.pdmodel.interactive.digitalsignature; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; +import org.apache.pdfbox.io.IOUtils; - +/** + * A filtered stream that includes the bytes that are in the (begin,length) intervals passed in the + * constructor. + * + * @author boix_jor + * + */ public class COSFilterInputStream extends FilterInputStream { - private final int[] byteRange; - private long position = 0; - - public COSFilterInputStream(InputStream in, int[] byteRange) - { - super(in); - this.byteRange = byteRange; - } + private int[][] ranges; + private int range; + private long position = 0; - public COSFilterInputStream(byte[] in, int[] byteRange) - { - super(new ByteArrayInputStream(in)); - this.byteRange = byteRange; - } + public COSFilterInputStream(InputStream in, int[] byteRange) + { + super(in); + calculateRanges(byteRange); + } - @Override - public int read() throws IOException - { - nextAvailable(); - int i = super.read(); - if (i>-1) + public COSFilterInputStream(byte[] in, int[] byteRange) { - ++position; + this(new ByteArrayInputStream(in), byteRange); } - return i; - } - - @Override - public int read(byte[] b) throws IOException - { - return read(b,0,b.length); - } - - @Override - public int read(byte[] b, int off, int len) throws IOException - { - if (len == 0) + + @Override + public int read() throws IOException { - return 0; + if ((this.range == -1 || getRemaining() <= 0) && !nextRange()) + { + return -1; // EOF + } + int result = super.read(); + this.position++; + return result; } - - int c = read(); - if (c == -1) + + @Override + public int read(byte[] b) throws IOException { - return -1; + return read(b, 0, b.length); } - b[off] = (byte)c; - - int i = 1; - try + + @Override + public int read(byte[] b, int off, int len) throws IOException { - for (; i < len; i++) + if ((this.range == -1 || getRemaining() <= 0) && !nextRange()) { - c = read(); - if (c == -1) - { - break; - } - b[off + i] = (byte)c; + return -1; // EOF } + int bytesRead = super.read(b, off, (int) Math.min(len, getRemaining())); + this.position += bytesRead; + return bytesRead; } - catch (IOException ee) + + public byte[] toByteArray() throws IOException { + return IOUtils.toByteArray(this); } - return i; - } - private boolean inRange() throws IOException - { - long pos = position; - for (int i = 0; ipos) - { - return true; - } + this.ranges = new int[byteRange.length / 2][]; + for (int i = 0; i < byteRange.length / 2; i++) + { + this.ranges[i] = new int[] { byteRange[i * 2], byteRange[i * 2] + byteRange[i * 2 + 1] }; + } + this.range = -1; } - return false; - } - private void nextAvailable() throws IOException - { - while (!inRange()) + private long getRemaining() { - ++position; - if(super.read()<0) - { - break; - } + return this.ranges[this.range][1] - this.position; } - } - - public byte[] toByteArray() throws IOException - { - ByteArrayOutputStream byteOS = new ByteArrayOutputStream(); - byte[] buffer = new byte[1024]; - int c; - while ((c = this.read(buffer)) != -1) + + private boolean nextRange() throws IOException { - byteOS.write(buffer, 0, c); + if (this.range + 1 < this.ranges.length) + { + this.range++; + while (this.position < this.ranges[this.range][0]) + { + long skipped = super.skip(this.ranges[this.range][0] - this.position); + this.position += skipped; + } + return true; + } + else + { + return false; + } } - return byteOS.toByteArray(); - } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/ExternalSigningSupport.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/ExternalSigningSupport.java new file mode 100644 index 00000000000..f5a294ad562 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/ExternalSigningSupport.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.digitalsignature; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Interface for external signature creation scenarios. It contains method for retrieving PDF data + * to be sign and setting created CMS signature to the PDF. + * + */ +public interface ExternalSigningSupport +{ + /** + * Get PDF content to be signed. Obtained InputStream must be closed after use. + * + * @return content stream + * + * @throws java.io.IOException if something went wrong + */ + InputStream getContent() throws IOException; + + /** + * Set CMS signature bytes to PDF. + * + * @param signature CMS signature as byte array + * + * @throws IOException if exception occurred during PDF writing + */ + void setSignature(byte[] signature) throws IOException; +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDPropBuild.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDPropBuild.java index 3fea732ffa0..7410bb98770 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDPropBuild.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDPropBuild.java @@ -75,7 +75,7 @@ public COSDictionary getCOSObject() public PDPropBuildDataDict getFilter() { PDPropBuildDataDict filter = null; - COSDictionary filterDic = (COSDictionary)dictionary.getDictionaryObject(COSName.FILTER); + COSDictionary filterDic = dictionary.getCOSDictionary(COSName.FILTER); if (filterDic != null) { filter = new PDPropBuildDataDict(filterDic); @@ -103,7 +103,7 @@ public void setPDPropBuildFilter(PDPropBuildDataDict filter) public PDPropBuildDataDict getPubSec() { PDPropBuildDataDict pubSec = null; - COSDictionary pubSecDic = (COSDictionary)dictionary.getDictionaryObject(COSName.PUB_SEC); + COSDictionary pubSecDic = dictionary.getCOSDictionary(COSName.PUB_SEC); if (pubSecDic != null) { pubSec = new PDPropBuildDataDict(pubSecDic); @@ -130,7 +130,7 @@ public void setPDPropBuildPubSec(PDPropBuildDataDict pubSec) public PDPropBuildDataDict getApp() { PDPropBuildDataDict app = null; - COSDictionary appDic = (COSDictionary)dictionary.getDictionaryObject(COSName.APP); + COSDictionary appDic = dictionary.getCOSDictionary(COSName.APP); if (appDic != null) { app = new PDPropBuildDataDict(appDic); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDPropBuildDataDict.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDPropBuildDataDict.java index cf709fbe945..89fa31f236f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDPropBuildDataDict.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDPropBuildDataDict.java @@ -262,23 +262,34 @@ public void setOS(String os) * attribute will be set to true. * * @return true if NonEFontNoWarn is set to true + * @see #setNonEFontNoWarn(boolean) */ public boolean getNonEFontNoWarn() { return dictionary.getBoolean(COSName.NON_EFONT_NO_WARN, true); } - /* - * setNonEFontNoWarn missing. Maybe not needed or should be self - * implemented. + /** + * If true, the reader should not display a warning about fonts not being embedded. * - * Documentation says: - * (Optional; PDF 1.5) If there is a LegalPDF dictionary in the catalog - * of the PDF file and the NonEmbeddedFonts attribute in this dictionary - * has a non zero value, and the viewing application has a preference - * set to suppress the display of this warning then the value of this - * attribute will be set to true. + * @param noEmbedFontWarning true if there is a Legal dictionary in the catalog and the + * NonEmbeddedFonts attribute has a non-zero value + * + * Documentation says: (Optional; PDF 1.5) If there is a LegalPDF dictionary in the catalog of + * the PDF file and the NonEmbeddedFonts attribute in this dictionary has a non zero value, and + * the viewing application has a preference set to suppress the display of this warning then the + * value of this attribute will be set to true. + * + * @see + * Digital + * Signature Build Dictionary Specification + * @see #getNonEFontNoWarn() + * @see COSName#NON_EFONT_NO_WARN */ + public void setNonEFontNoWarn(boolean noEmbedFontWarning) + { + dictionary.setBoolean(COSName.NON_EFONT_NO_WARN, noEmbedFontWarning); + } /** * If true, the application was in trusted mode when signing took place. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValue.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValue.java index 2ecab6860f3..a60d0561fdf 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValue.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValue.java @@ -20,6 +20,7 @@ import java.util.List; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.COSArrayList; @@ -67,7 +68,7 @@ public class PDSeedValue implements COSObjectable */ public static final int FLAG_DIGEST_METHOD = 1 << 6; - private COSDictionary dictionary; + private final COSDictionary dictionary; /** * Default constructor. @@ -284,7 +285,7 @@ public List getSubFilter() actuals.add(element); } } - retval = new COSArrayList(actuals, fields); + retval = new COSArrayList(actuals, fields); } return retval; } @@ -324,7 +325,7 @@ public List getDigestMethod() actuals.add(element); } } - retval = new COSArrayList(actuals, fields); + retval = new COSArrayList(actuals, fields); } return retval; } @@ -413,7 +414,7 @@ public List getReasons() actuals.add(element); } } - retval = new COSArrayList(actuals, fields); + retval = new COSArrayList(actuals, fields); } return retval; } @@ -424,8 +425,23 @@ public List getReasons() * by conforming products. * * @param reasons is a list of possible text string that specifying possible reasons + * + * @deprecated use {@link #setReasons(java.util.List) } */ + @Deprecated public void setReasonsd(List reasons) + { + setReasons(reasons); + } + + /** + * (Optional) An array of text strings that specifying possible reasons for signing + * a document. If specified, the reasons supplied in this entry replace those used + * by conforming products. + * + * @param reasons is a list of possible text string that specifying possible reasons + */ + public void setReasons(List reasons) { dictionary.setItem(COSName.REASONS, COSArrayList.converterToCOSArray(reasons)); } @@ -445,7 +461,7 @@ public void setReasonsd(List reasons) */ public PDSeedValueMDP getMDP() { - COSDictionary dict = (COSDictionary)dictionary.getDictionaryObject(COSName.MDP); + COSDictionary dict = dictionary.getCOSDictionary(COSName.MDP); PDSeedValueMDP mdp = null; if (dict != null) { @@ -475,6 +491,35 @@ public void setMPD(PDSeedValueMDP mdp) } } + /** + * (Optional) A certificate seed value dictionary containing information about the certificate + * to be used when signing. + * + * @return dictionary + */ + public PDSeedValueCertificate getSeedValueCertificate() + { + COSBase base = dictionary.getDictionaryObject(COSName.CERT); + PDSeedValueCertificate certificate = null; + if (base instanceof COSDictionary) + { + COSDictionary dict = (COSDictionary) base; + certificate = new PDSeedValueCertificate(dict); + } + return certificate; + } + + /** + * (Optional) A certificate seed value dictionary containing information about the certificate + * to be used when signing. + * + * @param certificate dictionary + */ + public void setSeedValueCertificate(PDSeedValueCertificate certificate) + { + dictionary.setItem(COSName.CERT, certificate); + } + /** *

(Optional; PDF 1.6) A time stamp dictionary containing two entries. URL which * is a ASCII string specifying the URL to a rfc3161 conform timestamp server and Ff @@ -484,7 +529,7 @@ public void setMPD(PDSeedValueMDP mdp) */ public PDSeedValueTimeStamp getTimeStamp() { - COSDictionary dict = (COSDictionary)dictionary.getDictionaryObject(COSName.TIME_STAMP); + COSDictionary dict = dictionary.getCOSDictionary(COSName.TIME_STAMP); PDSeedValueTimeStamp timestamp = null; if (dict != null) { @@ -530,7 +575,7 @@ public List getLegalAttestation() actuals.add(element); } } - retval = new COSArrayList(actuals, fields); + retval = new COSArrayList(actuals, fields); } return retval; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueCertificate.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueCertificate.java new file mode 100644 index 00000000000..af4cc4abd3c --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueCertificate.java @@ -0,0 +1,646 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.digitalsignature; + +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdmodel.common.COSArrayList; +import org.apache.pdfbox.pdmodel.common.COSObjectable; + +/** + * This class represents a certificate seed value dictionary that is in the seed value which puts + * constraints on certificates when signing documents. + * + * @author Hossam Hazem + */ +public class PDSeedValueCertificate implements COSObjectable +{ + /** + * A Ff flag. + */ + public static final int FLAG_SUBJECT = 1; + + /** + * A Ff flag. + */ + public static final int FLAG_ISSUER = 1 << 1; + + /** + * A Ff flag. + */ + public static final int FLAG_OID = 1 << 2; + + /** + * A Ff flag. + */ + public static final int FLAG_SUBJECT_DN = 1 << 3; + + /** + * A Ff flag. + */ + public static final int FLAG_KEY_USAGE = 1 << 5; + + /** + * A Ff flag. + */ + public static final int FLAG_URL = 1 << 6; + private final COSDictionary dictionary; + + /** + * Default constructor. + */ + public PDSeedValueCertificate() + { + dictionary = new COSDictionary(); + dictionary.setItem(COSName.TYPE, COSName.SV_CERT); + dictionary.setDirect(true); + } + + /** + * Constructor. + * + * @param dict The certificate seed value dictionary. + */ + public PDSeedValueCertificate(COSDictionary dict) + { + dictionary = dict; + dictionary.setDirect(true); + } + + /** + * Convert this standard java object to a COS dictionary. + * + * @return The COS dictionary that matches this Java object. + */ + @Override + public COSDictionary getCOSObject() + { + return dictionary; + } + + /** + * + * @return true if the Subject is required + */ + public boolean isSubjectRequired() + { + return dictionary.getFlag(COSName.FF, FLAG_SUBJECT); + } + + /** + * set true if subject shall be required as a constraint on signature. + * + * @param flag if true, the specified Subject shall be enforced as a constraint. + */ + public void setSubjectRequired(boolean flag) + { + dictionary.setFlag(COSName.FF, FLAG_SUBJECT, flag); + } + + /** + * + * @return true if the Issuer is required + */ + public boolean isIssuerRequired() + { + return dictionary.getFlag(COSName.FF, FLAG_ISSUER); + } + + /** + * set true if Issuer shall be required as a constraint on signature. + * + * @param flag if true, the specified Issuer shall be enforced as a constraint. + */ + public void setIssuerRequired(boolean flag) + { + dictionary.setFlag(COSName.FF, FLAG_ISSUER, flag); + } + + /** + * + * @return true if the OID is required + */ + public boolean isOIDRequired() + { + return dictionary.getFlag(COSName.FF, FLAG_OID); + } + + /** + * set true if OID shall be required as a constraint on signature. + * + * @param flag if true, the specified OID shall be enforced as a constraint. + */ + public void setOIDRequired(boolean flag) + { + dictionary.setFlag(COSName.FF, FLAG_OID, flag); + } + + /** + * + * @return true if the Subject DN is required + */ + public boolean isSubjectDNRequired() + { + return dictionary.getFlag(COSName.FF, FLAG_SUBJECT_DN); + } + + /** + * set true if subject DN shall be required as a constraint on signature. + * + * @param flag if true, the specified Subject DN shall be enforced as a constraint. + */ + public void setSubjectDNRequired(boolean flag) + { + dictionary.setFlag(COSName.FF, FLAG_SUBJECT_DN, flag); + } + + /** + * + * @return true if the KeyUsage is required + */ + public boolean isKeyUsageRequired() + { + return dictionary.getFlag(COSName.FF, FLAG_KEY_USAGE); + } + + /** + * set true if KeyUsage shall be required as a constraint on signature. + * + * @param flag if true, the specified KeyUsage shall be enforced as a constraint. + */ + public void setKeyUsageRequired(boolean flag) + { + dictionary.setFlag(COSName.FF, FLAG_KEY_USAGE, flag); + } + + /** + * + * @return true if the URL is required + */ + public boolean isURLRequired() + { + return dictionary.getFlag(COSName.FF, FLAG_URL); + } + + /** + * set true if URL shall be required as a constraint on signature. + * + * @param flag if true, the specified URL shall be enforced as a constraint. + */ + public void setURLRequired(boolean flag) + { + dictionary.setFlag(COSName.FF, FLAG_URL, flag); + } + + /** + * Returns list of byte arrays that contains DER-encoded X.509v3 certificates + * + * @return a list of bytes arrays representing the subject. + */ + public List getSubject() + { + COSArray array = dictionary.getCOSArray(COSName.SUBJECT); + return array != null ? getListOfByteArraysFromCOSArray(array) : null; + } + + /** + * (Optional) A list of byte arrays containing DER-encoded X.509v3 certificates that are + * acceptable for signing. if + * Subject is not null and {@link #isSubjectRequired()} is true then the subject + * constraint is enforced on the subjects in this array subjects. + * + * @param subjects list of byte arrays containing DER-encoded X.509v3 certificates that are + * acceptable for signing. + */ + public void setSubject(List subjects) + { + dictionary.setItem(COSName.SUBJECT, convertListOfByteArraysToCOSArray(subjects)); + } + + /** + * (Optional) byte array containing DER-encoded X.509v3 certificate that is acceptable for + * signing. works like {@link #setSubject(List)} but one byte array + * + * @param subject byte array containing DER-encoded X.509v3 certificate + */ + public void addSubject(byte[] subject) + { + COSArray array = dictionary.getCOSArray(COSName.SUBJECT); + if (array == null) + { + array = new COSArray(); + } + array.add(new COSString(subject)); + dictionary.setItem(COSName.SUBJECT, array); + } + + /** + * removes a subject from the list + * + * @param subject byte array containing DER-encoded X.509v3 certificate + */ + public void removeSubject(byte[] subject) + { + COSArray array = dictionary.getCOSArray(COSName.SUBJECT); + if (array != null) + { + array.remove(new COSString(subject)); + } + } + + /** + * Returns list of maps that contains subject distinguished names like [(cn: John Doe, o: Doe), (cn: John Smith)] + * both keys are typically of the form 'cn', 'o', 'email', '2.5.4.43'; and values are text strings. + * + * @return a list of maps containing the subject distinguished names + */ + public List> getSubjectDN() + { + COSArray cosArray = dictionary.getCOSArray(COSName.SUBJECT_DN); + if (cosArray != null) + { + List subjectDNList = cosArray.toList(); + List> result = new LinkedList>(); + for (COSBase subjectDNItem : subjectDNList) + { + if (subjectDNItem instanceof COSDictionary) + { + COSDictionary subjectDNItemDict = (COSDictionary) subjectDNItem; + Map subjectDNMap = new HashMap(); + for (COSName key : subjectDNItemDict.keySet()) + { + subjectDNMap.put(key.getName(), subjectDNItemDict.getString(key)); + } + result.add(subjectDNMap); + } + } + return result; + } + return null; + } + + /** + * (Optional; PDF 1.7) A list of maps, where each map contains key value pairs, that specify the + * Subject Distinguished Name (DN) that must be present within the certificate for it to be + * acceptable for signing. The certificate must at a minimum contain all the attributes + * specified in one of the maps entered. + * + * @param subjectDN list of maps that contains subject distinguished names + */ + public void setSubjectDN(List> subjectDN) + { + List subjectDNDict = new LinkedList(); + for (Map subjectDNItem : subjectDN) + { + COSDictionary dict = new COSDictionary(); + for (Map.Entry entry : subjectDNItem.entrySet()) + { + dict.setItem(entry.getKey(), new COSString(entry.getValue())); + } + subjectDNDict.add(dict); + } + dictionary.setItem(COSName.SUBJECT_DN, + COSArrayList.converterToCOSArray(subjectDNDict)); + } + + /** + * Returns list of key usages of certificate strings where each string is 9 characters long and each character is + * one of these values {0, 1, X} 0 for must not set, 1 for must set, X for don't care. each index in the string + * represents a key usage: + *

    + *
  1. digitalSignature
  2. + *
  3. non-Repudiation
  4. + *
  5. keyEncipherment
  6. + *
  7. dataEncipherment
  8. + *
  9. keyAgreement
  10. + *
  11. keyCertSign
  12. + *
  13. cRLSign
  14. + *
  15. encipherOnly
  16. + *
  17. decipherOnly
  18. + *
+ * + * @return list of key usages + */ + public List getKeyUsage() + { + COSArray array = dictionary.getCOSArray(COSName.KEY_USAGE); + if (array != null) + { + List keyUsageExtensions = new LinkedList(); + for (COSBase item : array) + { + if (item instanceof COSString) + { + keyUsageExtensions.add(((COSString) item).getString()); + } + } + return keyUsageExtensions; + } + return null; + } + + /** + * (Optional; PDF 1.7) A List of ASCII strings, where each string specifies an acceptable + * key-usage extension that must be present in the signing certificate. Multiple strings specify + * a range of acceptable key-usage extensions; where each string 9 characters long and each + * character is one of these values {0, 1, X} 0 for must not set, 1 for must set, X for don't + * care. each index in the string represents a key usage: + *
    + *
  1. digitalSignature
  2. + *
  3. non-Repudiation
  4. + *
  5. keyEncipherment
  6. + *
  7. dataEncipherment
  8. + *
  9. keyAgreement
  10. + *
  11. keyCertSign
  12. + *
  13. cRLSign
  14. + *
  15. encipherOnly
  16. + *
  17. decipherOnly
  18. + *
+ * + * @param keyUsageExtensions list of ASCII strings that consists only of {0, 1, X} + */ + public void setKeyUsage(List keyUsageExtensions) + { + dictionary.setItem(COSName.KEY_USAGE, + COSArrayList.converterToCOSArray(keyUsageExtensions)); + } + + /** + * (Optional; PDF 1.7) specifies an acceptable key-usage extension that must be presennt in the + * signing certificate for works like {@link #setKeyUsage(List)} but takes only one string + * + * @param keyUsageExtension String that consist only of {0, 1, X} + */ + public void addKeyUsage(String keyUsageExtension) + { + String allowedChars = "01X"; + for (int c = 0; c < keyUsageExtension.length(); c++) + { + if (allowedChars.indexOf(keyUsageExtension.charAt(c)) == -1) + { + throw new IllegalArgumentException("characters can only be 0, 1, X"); + } + } + COSArray array = dictionary.getCOSArray(COSName.KEY_USAGE); + if (array == null) + { + array = new COSArray(); + } + array.add(new COSString(keyUsageExtension)); + dictionary.setItem(COSName.KEY_USAGE, array); + } + + /** + * works like {@link #addKeyUsage(String)} but enters each character separately + * + * @param digitalSignature char that is one of {0, 1, X} + * @param nonRepudiation char that is one of {0, 1, X} + * @param keyEncipherment char that is one of {0, 1, X} + * @param dataEncipherment char that is one of {0, 1, X} + * @param keyAgreement char that is one of {0, 1, X} + * @param keyCertSign char that is one of {0, 1, X} + * @param cRLSign char that is one of {0, 1, X} + * @param encipherOnly char that is one of {0, 1, X} + * @param decipherOnly char that is one of {0, 1, X} + */ + public void addKeyUsage(char digitalSignature, char nonRepudiation, char keyEncipherment, + char dataEncipherment, char keyAgreement, char keyCertSign, char cRLSign, + char encipherOnly, char decipherOnly) + { + String string = "" + digitalSignature + nonRepudiation + keyEncipherment + dataEncipherment + + keyAgreement + keyCertSign + cRLSign + encipherOnly + decipherOnly; + addKeyUsage(string); + } + + /** + * Removes a key usage extension + * + * @param keyUsageExtension ASCII string that consists of {0, 1, X} + */ + public void removeKeyUsage(String keyUsageExtension) + { + COSArray array = dictionary.getCOSArray(COSName.KEY_USAGE); + if (array != null) + { + array.remove(new COSString(keyUsageExtension)); + } + } + + /** + * Returns list of array of bytes of DER-encoded X.509v3 certificates + * + * @return a list of byte arrays representing the issuer + */ + public List getIssuer() + { + COSArray array = dictionary.getCOSArray(COSName.ISSUER); + return array != null ? getListOfByteArraysFromCOSArray(array) : null; + } + + /** + * (Optional) A list of array of bytes containing DER-encoded X.509v3 certificates of acceptable + * issuers. If the signer’s certificate chains up to any of the specified issuers (either + * directly or indirectly), the certificate is considered acceptable for signing. + * + * @param issuers A list of byte array containing DER-encoded X.509v3 certificates + */ + public void setIssuer(List issuers) + { + dictionary.setItem(COSName.ISSUER, convertListOfByteArraysToCOSArray(issuers)); + } + + /** + * array of bytes containing DER-encoded X.509v3 certificates of acceptable issuers. If the + * signer’s certificate chains up to any of the specified issuers (either directly or + * indirectly), the certificate is considered acceptable for signing. + * + * @param issuer A byte array containing DER-encoded X.509v3 certificate + */ + public void addIssuer(byte[] issuer) + { + COSArray array = dictionary.getCOSArray(COSName.ISSUER); + if (array == null) + { + array = new COSArray(); + } + array.add(new COSString(issuer)); + dictionary.setItem(COSName.ISSUER, array); + } + + /** + * Removes an issuer from the issuers list + * + * @param issuer A byte array containing DER-encoded X.509v3 certificate + */ + public void removeIssuer(byte[] issuer) + { + COSArray array = dictionary.getCOSArray(COSName.ISSUER); + if (array != null) + { + array.remove(new COSString(issuer)); + } + } + + /** + * Returns A list of array of bytes that contain Object Identifiers (OIDs) of the certificate policies that must be + * present in the signing certificate + * + * @return an array of object identifiers. + */ + public List getOID() + { + COSArray array = dictionary.getCOSArray(COSName.OID); + return array != null ? getListOfByteArraysFromCOSArray(array) : null; + } + + /** + * (Optional) A list of byte arrays that contain Object Identifiers (OIDs) of the certificate + * policies that must be present in the signing certificate. This field is only applicable if + * the value of Issuer is not empty. + * + * @param oidByteStrings list of byte arrays that contain OIDs + */ + public void setOID(List oidByteStrings) + { + dictionary.setItem(COSName.OID, convertListOfByteArraysToCOSArray(oidByteStrings)); + } + + /** + * works like {@link #setOID(List)} but for one object + * + * @param oid the object identifier. + */ + public void addOID(byte[] oid) + { + COSArray array = dictionary.getCOSArray(COSName.OID); + if (array == null) + { + array = new COSArray(); + } + array.add(new COSString(oid)); + dictionary.setItem(COSName.OID, array); + } + + /** + * removes an OID from the list + * + * @param oid the object identifier to be removed. + */ + public void removeOID(byte[] oid) + { + COSArray array = dictionary.getCOSArray(COSName.OID); + if (array != null) + { + array.remove(new COSString(oid)); + } + } + + /** + * returns String of the URL + * + * @return the URL + */ + public String getURL() + { + return dictionary.getString(COSName.URL); + } + + /** + * (Optional) A URL, the use for which is defined by the URLType entry. + * + * @param url String of the URL + */ + public void setURL(String url) + { + dictionary.setString(COSName.URL, url); + } + + /** + * A name indicating the usage of the URL entry. There are standard uses and there can be + * implementation-specific use for this URL. The following value specifies a valid standard + * usage: + *
    + *
  • Browser, The URL references content that should be displayed in a web browser to allow + * enrolling for a new credential if a matching credential is not found. The Ff attribute’s URL + * bit is ignored for this usage.
  • + *
  • ASSP, The URL references a signature web service that can be used for server-based + * signing. If the Ff attribute’s URL bit indicates that this is a required constraint, this + * implies that the credential used when signing must come from this server.
  • + *
+ * + * @return string of URL type + */ + public String getURLType() + { + return dictionary.getNameAsString(COSName.URL_TYPE); + } + + /** + * (Optional; PDF 1.7) A name indicating the usage of the URL entry. There are standard uses and + * there can be implementation-specific uses for this URL. The following value specifies a valid + * standard usage: + *
    + *
  • Browser, The URL references content that should be displayed in a web browser to allow + * enrolling for a new credential if a matching credential is not found. The Ff attribute’s URL + * bit is ignored for this usage.
  • + *
  • ASSP, The URL references a signature web service that can be used for server-based + * signing. If the Ff attribute’s URL bit indicates that this is a required constraint, this + * implies that the credential used when signing must come from this server.
  • + *
+ * Third parties can extend the use of this attribute with their own attribute values, which + * must conform to the guidelines specified in + * PDF + * Spec 1.7 Appendix E (PDF Name Registry) + * if urlType is not set the default is Browser for URL + * + * @param urlType String of the urlType + */ + public void setURLType(String urlType) + { + dictionary.setName(COSName.URL_TYPE, urlType); + } + + private static List getListOfByteArraysFromCOSArray(COSArray array) + { + List result = new LinkedList(); + for (COSBase item : array) + { + if (item instanceof COSString) + { + result.add(((COSString) item).getBytes()); + } + } + return result; + } + + private static COSArray convertListOfByteArraysToCOSArray(List strings) + { + COSArray array = new COSArray(); + for (byte[] string : strings) + { + array.add(new COSString(string)); + } + return array; + } + +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueMDP.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueMDP.java index 1f17f17a837..ff46f6a1fc6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueMDP.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueMDP.java @@ -27,7 +27,7 @@ */ public class PDSeedValueMDP { - private COSDictionary dictionary; + private final COSDictionary dictionary; /** * Default constructor. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueTimeStamp.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueTimeStamp.java index 4f9652ae06a..1db7e7ee4d8 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueTimeStamp.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSeedValueTimeStamp.java @@ -27,7 +27,7 @@ */ public class PDSeedValueTimeStamp { - private COSDictionary dictionary; + private final COSDictionary dictionary; /** * Default constructor. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSignature.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSignature.java index 0d62663eaff..361495c3001 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSignature.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSignature.java @@ -16,12 +16,14 @@ */ package org.apache.pdfbox.pdmodel.interactive.digitalsignature; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Calendar; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; @@ -29,7 +31,10 @@ import org.apache.pdfbox.pdmodel.common.COSObjectable; /** - * This represents a digital signature that can be attached to a document. + * This represents a digital signature that can be attached to a document. To learn more about + * digital signatures, read + * Digital + * Signatures in a PDF by Adobe. * * @author Ben Litchfield * @author Thomas Chojecki @@ -102,6 +107,7 @@ public PDSignature(COSDictionary dict) * * @return The COS dictionary that matches this Java object. */ + @Override public COSDictionary getCOSObject() { return dictionary; @@ -138,7 +144,10 @@ public void setSubFilter(COSName subfilter) } /** - * Sets the name. + * Sets the name of the person or authority signing the document. According to the PDF + * specification, this value should be used only when it is not possible to extract the name + * from the signature. + * * @param name the name to be used */ public void setName(String name) @@ -147,7 +156,8 @@ public void setName(String name) } /** - * Sets the location. + * Sets the CPU host name or physical location of the signing. + * * @param location the location to be used */ public void setLocation(String location) @@ -156,7 +166,7 @@ public void setLocation(String location) } /** - * Sets the reason. + * Sets the reason for the signing, such as (I agree...). * * @param reason the reason to be used */ @@ -166,7 +176,8 @@ public void setReason(String reason) } /** - * Sets the contact info. + * Sets the contact info provided by the signer to enable a recipient to contact the signer to + * verify the signature, e.g. a phone number. * * @param contactInfo the contact info to be used */ @@ -205,7 +216,9 @@ public String getSubFilter() } /** - * Returns the name. + * Returns the name of the person or authority signing the document. According to the PDF + * specification, this value should be used only when it is not possible to extract the name + * from the signature. * * @return the name */ @@ -215,7 +228,7 @@ public String getName() } /** - * Returns the location. + * Returns the CPU host name or physical location of the signing. * * @return the location */ @@ -225,7 +238,7 @@ public String getLocation() } /** - * Returns the reason. + * Returns the reason for the signing, such as (I agree...). * * @return the reason */ @@ -235,9 +248,10 @@ public String getReason() } /** - * Returns the contact info. + * Returns the contact info provided by the signer to enable a recipient to contact the signer + * to verify the signature, e.g. a phone number. * - * @return teh contact info + * @return the contact info */ public String getContactInfo() { @@ -272,16 +286,21 @@ public void setByteRange(int[] range) } dictionary.setItem(COSName.BYTERANGE, ary); + ary.setDirect(true); } /** * Read out the byterange from the file. * - * @return a integer array with the byterange + * @return an integer array with the byterange, or an empty array if there is none. */ public int[] getByteRange() { - COSArray byteRange = (COSArray)dictionary.getDictionaryObject(COSName.BYTERANGE); + COSArray byteRange = dictionary.getCOSArray(COSName.BYTERANGE); + if (byteRange == null) + { + return new int[0]; + } int[] ary = new int[byteRange.size()]; for (int i = 0; i and ) - else if(buffer[c-1]==0x3E || buffer[c-1]==0x29) - { - byteOS.write(buffer, 0, c-1); + ++start; + --writeLen; } - else + // Filter > and ) at the end + if(buffer[readLen-1]==0x3E || buffer[readLen-1]==0x29) { - byteOS.write(buffer, 0, c); + --writeLen; } + baos.write(buffer, start, writeLen); } - fis.close(); + is.close(); - return COSString.parseHex(byteOS.toString("ISO-8859-1")).getBytes(); + return COSString.parseHex(baos.toString("ISO-8859-1")).getBytes(); } /** @@ -362,9 +397,13 @@ public void setContents(byte[] bytes) } /** - * Will return the signed content of the document. + * Return the signed content of the document. This is not a PDF file, nor is it the PDF file + * before signing, it is the byte sequence made of the input minus the area where the signature + * bytes will be. See "The ByteRange and signature value" in the document + * Digital + * Signatures in a PDF. * - * @param pdfFile The signed pdf file as InputStream + * @param pdfFile The signed pdf file as InputStream. It will be closed in this method. * @return a byte array containing only the signed part of the content * @throws IOException if the pdfFile can't be read */ @@ -387,7 +426,11 @@ public byte[] getSignedContent(InputStream pdfFile) throws IOException } /** - * Will return the signed content of the document. + * Return the signed content of the document. This is not a PDF file, nor is it the PDF file + * before signing, it is the byte sequence made of the input minus the area where the signature + * bytes will be. See "The ByteRange and signature value" in the document + * Digital + * Signatures in a PDF. * * @param pdfFile The signed pdf file as byte array * @return a byte array containing only the signed part of the content @@ -411,14 +454,14 @@ public byte[] getSignedContent(byte[] pdfFile) throws IOException } /** - * PDF signature build dictionary. Provides informations about the signature handler. + * PDF signature build dictionary. Provides information about the signature handler. * * @return the pdf signature build dictionary. */ public PDPropBuild getPropBuild() { PDPropBuild propBuild = null; - COSDictionary propBuildDic = (COSDictionary)dictionary.getDictionaryObject(COSName.PROP_BUILD); + COSDictionary propBuildDic = dictionary.getCOSDictionary(COSName.PROP_BUILD); if (propBuildDic != null) { propBuild = new PDPropBuild(propBuildDic); @@ -427,7 +470,7 @@ public PDPropBuild getPropBuild() } /** - * PDF signature build dictionary. Provides informations about the signature handler. + * PDF signature build dictionary. Provides information about the signature handler. * * @param propBuild the prop build */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureInterface.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureInterface.java index 4746acd6959..d0e4db37525 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureInterface.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureInterface.java @@ -31,6 +31,7 @@ public interface SignatureInterface * * @param content is the content as a (Filter)InputStream * @return signature as a byte array + * @throws IOException if something went wrong */ byte[] sign(InputStream content) throws IOException; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java index a009eb9046f..d7671c68ddd 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SignatureOptions.java @@ -22,19 +22,25 @@ import java.io.InputStream; import org.apache.pdfbox.cos.COSDocument; +import org.apache.pdfbox.io.RandomAccessBuffer; import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSigProperties; /** - * TODO description needed + * This contains the visual signature as a COSDocument, its preferred size and the page. */ public class SignatureOptions implements Closeable { private COSDocument visualSignature; private int preferredSignatureSize; private int pageNo; - + + // the pdf to be read + // this is done analog to PDDocument + private RandomAccessRead pdfSource = null; + public static final int DEFAULT_SIGNATURE_SIZE = 0x2500; /** @@ -47,85 +53,90 @@ public SignatureOptions() /** * Set the 0-based page number. - * + * * @param pageNo the page number */ public void setPage(int pageNo) { this.pageNo = pageNo; } - + /** * Get the 0-based page number. - * + * * @return the page number */ - public int getPage() + public int getPage() { return pageNo; } - + /** * Reads the visual signature from the given file. - * + * * @param file the file containing the visual signature - * @throws IOException when something went wrong during parsing + * @throws IOException when something went wrong during parsing */ public void setVisualSignature(File file) throws IOException - { - PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file)); - parser.parse(); - visualSignature = parser.getDocument(); + { + initFromRandomAccessRead(new RandomAccessBufferedFileInputStream(file)); } - + /** * Reads the visual signature from the given input stream. - * + * * @param is the input stream containing the visual signature - * @throws IOException when something went wrong during parsing + * @throws IOException when something went wrong during parsing */ public void setVisualSignature(InputStream is) throws IOException - { - PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(is)); + { + initFromRandomAccessRead(new RandomAccessBuffer(is)); + } + + private void initFromRandomAccessRead(RandomAccessRead rar) throws IOException + { + pdfSource = rar; + PDFParser parser = new PDFParser(pdfSource); parser.parse(); visualSignature = parser.getDocument(); } - + /** * Reads the visual signature from the given visual signature properties - * - * @param visSignatureProperties the PDVisibleSigProperties object containing the visual signature - * + * + * @param visSignatureProperties the PDVisibleSigProperties object containing the + * visual signature + * * @throws IOException when something went wrong during parsing */ public void setVisualSignature(PDVisibleSigProperties visSignatureProperties) throws IOException - { + { setVisualSignature(visSignatureProperties.getVisibleSignature()); } /** * Get the visual signature. - * + * * @return the visual signature */ public COSDocument getVisualSignature() { return visualSignature; } - + /** * Get the preferred size of the signature. - * + * * @return the preferred size of the signature in bytes. */ public int getPreferredSignatureSize() { return preferredSignatureSize; } - + /** * Set the preferred size of the signature. - * + * * @param size the size of the signature in bytes. Only values above 0 will be considered. */ public void setPreferredSignatureSize(int size) @@ -137,7 +148,9 @@ public void setPreferredSignatureSize(int size) } /** - * Closes the visual signature COSDocument, if any. + * Closes the visual signature COSDocument, if any. Do not call this before you're saved your + * signed PDF document, or saving will fail because COSStream objects held both by the + * COSDocument and by the signed document would no longer be available. * * @throws IOException if the document could not be closed */ @@ -148,5 +161,9 @@ public void close() throws IOException { visualSignature.close(); } + if (pdfSource != null) + { + pdfSource.close(); + } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SigningSupport.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SigningSupport.java new file mode 100644 index 00000000000..6b35025d535 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/SigningSupport.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.digitalsignature; + +import org.apache.pdfbox.pdfwriter.COSWriter; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; + +/** + * Class to be used when creating PDF signatures externally. COSWriter is used to obtain data to be + * signed and set the resulted CMS signature. + * + */ +public class SigningSupport implements ExternalSigningSupport, Closeable +{ + private COSWriter cosWriter; + + public SigningSupport(COSWriter cosWriter) + { + this.cosWriter = cosWriter; + } + + @Override + public InputStream getContent() throws IOException + { + return cosWriter.getDataToSign(); + } + + @Override + public void setSignature(byte[] signature) throws IOException + { + cosWriter.writeExternalSignature(signature); + } + + @Override + public void close() throws IOException + { + if (cosWriter != null) + { + try + { + cosWriter.close(); + } + finally + { + cosWriter = null; + } + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/package.html index 0f75d8db14d..78d5ecc0ba8 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/package.html @@ -15,11 +15,11 @@ ! limitations under the License. !--> - - + + -The digitial signature library will manage signatures that are stored in the PDF document. +The digital signature library will manage signatures that are stored in the PDF document. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateBuilder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateBuilder.java index 4bee05f6ab5..c89ebd30183 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateBuilder.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateBuilder.java @@ -40,57 +40,69 @@ public interface PDFTemplateBuilder { /** * In order to create Affine Transform, using parameters. - * @param params + * + * @param params parameter values + * @deprecated use {@link #createAffineTransform(java.awt.geom.AffineTransform) } */ + @Deprecated void createAffineTransform(byte[] params); + /** + * In order to create Affine Transform, using parameters. + * + * @param affineTransform the transformation + */ + void createAffineTransform(AffineTransform affineTransform); + /** * Creates specified size page. * - * @param properties + * @param properties property value */ void createPage(PDVisibleSignDesigner properties); /** * Creates template using page. * - * @param page - * @throws IOException + * @param page the given page + * @throws IOException if something went wrong */ void createTemplate(PDPage page) throws IOException; /** * Creates Acro forms in the template. * - * @param template + * @param template the template document */ void createAcroForm(PDDocument template); /** * Creates signature fields. * - * @param acroForm - * @throws IOException + * @param acroForm the acroform + * @throws IOException if something went wrong */ void createSignatureField(PDAcroForm acroForm) throws IOException; /** - * Creates PDSignatureField. - * - * @param pdSignatureField - * @param page - * @param signatureName - * @throws IOException + * Creates the signature with the given name and assign it to the signature field parameter and assign the page + * parameter to the widget. + * + * @param pdSignatureField signature filed + * @param page the given page + * @param signerName the name of the person or authority signing the document. According to the PDF specification, + * this value should be used only when it is not possible to extract the name from the signature. + * @throws IOException if something went wrong */ - void createSignature(PDSignatureField pdSignatureField, PDPage page, String signatureName) + void createSignature(PDSignatureField pdSignatureField, PDPage page, String signerName) throws IOException; /** * Create AcroForm Dictionary. * - * @param acroForm - * @param signatureField - * @throws IOException + * @param acroForm the acroform + * @param signatureField the signature field + * @throws IOException if something went wrong */ void createAcroFormDictionary(PDAcroForm acroForm, PDSignatureField signatureField) throws IOException; @@ -98,9 +110,9 @@ void createAcroFormDictionary(PDAcroForm acroForm, PDSignatureField signatureFie /** * Creates SignatureRectangle. * - * @param signatureField - * @param properties - * @throws IOException + * @param signatureField the signature field + * @param properties properties + * @throws IOException if something went wrong */ void createSignatureRectangle(PDSignatureField signatureField, PDVisibleSignDesigner properties) throws IOException; @@ -112,21 +124,37 @@ void createSignatureRectangle(PDSignatureField signatureField, /** * Creates signature image. - * @param template - * @param image - * @throws IOException + * + * @param template template document + * @param image signature image + * @throws IOException if something went wrong */ void createSignatureImage(PDDocument template, BufferedImage image) throws IOException; /** + * An array of four numbers in the form coordinate system, giving the coordinates of the left, bottom, right, and + * top edges, respectively, of the form XObject’s bounding box. These boundaries shall be used to clip the form + * XObject and to determine its size for caching. + * + * @param params parameters * - * @param params + * @deprecated use {@link #createFormatterRectangle(int[]) createFormatterRectangle(int[])} */ + @Deprecated void createFormatterRectangle(byte[] params); + /** + * An array of four numbers in the form coordinate system, giving the coordinates of the left, bottom, right, and + * top edges, respectively, of the form XObject’s bounding box. These boundaries shall be used to clip the form + * XObject and to determine its size for caching. + * + * @param params parameters + */ + void createFormatterRectangle(int[] params); + /** * - * @param template + * @param template template document */ void createHolderFormStream(PDDocument template); @@ -138,26 +166,26 @@ void createSignatureRectangle(PDSignatureField signatureField, /** * Creates Form * - * @param holderFormResources - * @param holderFormStream - * @param formrect + * @param holderFormResources holder form resources + * @param holderFormStream holder stream + * @param bbox bounding box */ void createHolderForm(PDResources holderFormResources, PDStream holderFormStream, - PDRectangle formrect); + PDRectangle bbox); /** * Creates appearance dictionary * - * @param holderForml - * @param signatureField - * @throws IOException + * @param holderForml holder XObject + * @param signatureField the signature field + * @throws IOException if something went wrong */ void createAppearanceDictionary(PDFormXObject holderForml, PDSignatureField signatureField) throws IOException; /** * - * @param template + * @param template template document */ void createInnerFormStream(PDDocument template); @@ -168,24 +196,23 @@ void createAppearanceDictionary(PDFormXObject holderForml, /** * - * @param innerFormResources - * @param innerFormStream - * @param formrect + * @param innerFormResources inner form resources + * @param innerFormStream inner form stream + * @param bbox bounding box */ - void createInnerForm(PDResources innerFormResources, PDStream innerFormStream, - PDRectangle formrect); + void createInnerForm(PDResources innerFormResources, PDStream innerFormStream, PDRectangle bbox); /** * - * @param innerForm - * @param holderFormResources + * @param innerForm inner form XObject + * @param holderFormResources holder form resources */ void insertInnerFormToHolderResources(PDFormXObject innerForm, PDResources holderFormResources); /** * - * @param template + * @param template template document */ void createImageFormStream(PDDocument template); @@ -197,27 +224,37 @@ void insertInnerFormToHolderResources(PDFormXObject innerForm, /** * Creates Image form * - * @param imageFormResources - * @param innerFormResource - * @param imageFormStream - * @param formrect - * @param affineTransform - * @param img - * @throws IOException + * @param imageFormResources image form resources + * @param innerFormResource inner form resources + * @param imageFormStream image from stream + * @param bbox bounding box + * @param affineTransform transformation + * @param img ImageXObject + * @throws IOException if something went wrong */ void createImageForm(PDResources imageFormResources, PDResources innerFormResource, - PDStream imageFormStream, PDRectangle formrect, AffineTransform affineTransform, + PDStream imageFormStream, PDRectangle bbox, AffineTransform affineTransform, PDImageXObject img) throws IOException; + /** + * Creates the background layer form (n0). + * + * @param innerFormResource inner acroform resources + * @param formatter rectangle of the formatter + * @throws IOException if something went wrong + */ + void createBackgroundLayerForm(PDResources innerFormResource, PDRectangle formatter) + throws IOException; + /** * Inject procSetArray * - * @param innerForm - * @param page - * @param innerFormResources - * @param imageFormResources - * @param holderFormResources - * @param procSet + * @param innerForm inner form + * @param page the given page + * @param innerFormResources inner form resources + * @param imageFormResources image form resources + * @param holderFormResources holder form resources + * @param procSet procset values */ void injectProcSetArray(PDFormXObject innerForm, PDPage page, PDResources innerFormResources, PDResources imageFormResources, @@ -226,32 +263,32 @@ void injectProcSetArray(PDFormXObject innerForm, PDPage page, /** * injects appearance streams * - * @param holderFormStream - * @param innterFormStream - * @param imageFormStream - * @param imageObjectName - * @param imageName - * @param innerFormName - * @param properties - * @throws IOException + * @param holderFormStream holder form stream + * @param innerFormStream inner form stream + * @param imageFormStream image form stream + * @param imageFormName image form name + * @param imageName image name + * @param innerFormName inner form name + * @param properties property values + * @throws IOException if something went wrong */ - void injectAppearanceStreams(PDStream holderFormStream, PDStream innterFormStream, - PDStream imageFormStream, COSName imageObjectName, COSName imageName, + void injectAppearanceStreams(PDStream holderFormStream, PDStream innerFormStream, + PDStream imageFormStream, COSName imageFormName, COSName imageName, COSName innerFormName, PDVisibleSignDesigner properties) throws IOException; /** * just to create visible signature * - * @param template + * @param template template document */ void createVisualSignature(PDDocument template); /** * adds Widget Dictionary * - * @param signatureField - * @param holderFormResources - * @throws IOException + * @param signatureField the signature field + * @param holderFormResources holder form resources + * @throws IOException if something went wrong */ void createWidgetDictionary(PDSignatureField signatureField, PDResources holderFormResources) throws IOException; @@ -265,8 +302,8 @@ void createWidgetDictionary(PDSignatureField signatureField, /** * Closes template * - * @param template - * @throws IOException + * @param template template document + * @throws IOException if something went wrong */ void closeTemplate(PDDocument template) throws IOException; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateCreator.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateCreator.java index 1d4ae5b82d0..d0630c01c2e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateCreator.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateCreator.java @@ -18,12 +18,15 @@ import java.awt.geom.AffineTransform; import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdfwriter.COSWriter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; @@ -34,28 +37,29 @@ import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; /** - * Using that class, we build pdf template. + * Class to build PDF template. + * * @author Vakhtang Koroghlishvili */ public class PDFTemplateCreator { - PDFTemplateBuilder pdfBuilder; - private static final Log logger = LogFactory.getLog(PDFTemplateCreator.class); + private final PDFTemplateBuilder pdfBuilder; + private static final Log LOG = LogFactory.getLog(PDFTemplateCreator.class); /** - * sets PDFBuilder + * Constructor. * - * @param bookBuilder + * @param templateBuilder */ - public PDFTemplateCreator(PDFTemplateBuilder bookBuilder) + public PDFTemplateCreator(PDFTemplateBuilder templateBuilder) { - pdfBuilder = bookBuilder; + pdfBuilder = templateBuilder; } /** - * that method returns object of PDFStructure + * Returns the PDFTemplateStructure object. * - * @return PDFStructure + * @return the PDFTemplateStructure object. */ public PDFTemplateStructure getPdfStructure() { @@ -63,14 +67,15 @@ public PDFTemplateStructure getPdfStructure() } /** - * this method builds pdf step by step, and finally it returns stream of visible signature + * Build a PDF with a visible signature step by step, and return it as a stream. + * * @param properties * @return InputStream * @throws IOException */ public InputStream buildPDF(PDVisibleSignDesigner properties) throws IOException { - logger.info("pdf building has been started"); + LOG.info("pdf building has been started"); PDFTemplateStructure pdfStructure = pdfBuilder.getStructure(); // we create array of [Text, ImageB, ImageC, ImageI] @@ -93,19 +98,23 @@ public InputStream buildPDF(PDVisibleSignDesigner properties) throws IOException PDSignatureField pdSignatureField = pdfStructure.getSignatureField(); // create signature - pdfBuilder.createSignature(pdSignatureField, page, properties.getSignatureFieldName()); + //TODO + // The line below has no effect with the CreateVisibleSignature example. + // The signature field is needed as a "holder" for the /AP tree, + // but the /P and /V PDSignatureField entries are ignored by PDDocument.addSignature + pdfBuilder.createSignature(pdSignatureField, page, ""); // that is /AcroForm/DR entry pdfBuilder.createAcroFormDictionary(acroForm, pdSignatureField); // create AffineTransform - pdfBuilder.createAffineTransform(properties.getAffineTransformParams()); + pdfBuilder.createAffineTransform(properties.getTransform()); AffineTransform transform = pdfStructure.getAffineTransform(); // rectangle, formatter, image. /AcroForm/DR/XObject contains that form pdfBuilder.createSignatureRectangle(pdSignatureField, properties); - pdfBuilder.createFormatterRectangle(properties.getFormatterRectangleParams()); - PDRectangle formatter = pdfStructure.getFormatterRectangle(); + pdfBuilder.createFormatterRectangle(properties.getFormatterRectangleParameters()); + PDRectangle bbox = pdfStructure.getFormatterRectangle(); pdfBuilder.createSignatureImage(template, properties.getImage()); // create form stream, form and resource. @@ -113,45 +122,47 @@ public InputStream buildPDF(PDVisibleSignDesigner properties) throws IOException PDStream holderFormStream = pdfStructure.getHolderFormStream(); pdfBuilder.createHolderFormResources(); PDResources holderFormResources = pdfStructure.getHolderFormResources(); - pdfBuilder.createHolderForm(holderFormResources, holderFormStream, formatter); + pdfBuilder.createHolderForm(holderFormResources, holderFormStream, bbox); // that is /AP entry the appearance dictionary. pdfBuilder.createAppearanceDictionary(pdfStructure.getHolderForm(), pdSignatureField); - // inner form stream, form and resource (hlder form containts inner form) + // inner form stream, form and resource (holder form contains inner form) pdfBuilder.createInnerFormStream(template); pdfBuilder.createInnerFormResource(); PDResources innerFormResource = pdfStructure.getInnerFormResources(); - pdfBuilder.createInnerForm(innerFormResource, pdfStructure.getInnerFormStream(), formatter); + pdfBuilder.createInnerForm(innerFormResource, pdfStructure.getInnerFormStream(), bbox); PDFormXObject innerForm = pdfStructure.getInnerForm(); // inner form must be in the holder form as we wrote pdfBuilder.insertInnerFormToHolderResources(innerForm, holderFormResources); - // Image form is in this structure: /AcroForm/DR/FRM0/Resources/XObject/n0 + // Image form is in this structure: /AcroForm/DR/FRM/Resources/XObject/n2 pdfBuilder.createImageFormStream(template); PDStream imageFormStream = pdfStructure.getImageFormStream(); pdfBuilder.createImageFormResources(); PDResources imageFormResources = pdfStructure.getImageFormResources(); - pdfBuilder.createImageForm(imageFormResources, innerFormResource, imageFormStream, formatter, + pdfBuilder.createImageForm(imageFormResources, innerFormResource, imageFormStream, bbox, transform, pdfStructure.getImage()); + + pdfBuilder.createBackgroundLayerForm(innerFormResource, bbox); // now inject procSetArray pdfBuilder.injectProcSetArray(innerForm, page, innerFormResource, imageFormResources, holderFormResources, pdfStructure.getProcSet()); - COSName imgFormName = pdfStructure.getImageFormName(); - COSName imgName = pdfStructure.getImageName(); + COSName imageFormName = pdfStructure.getImageFormName(); + COSName imageName = pdfStructure.getImageName(); COSName innerFormName = pdfStructure.getInnerFormName(); // now create Streams of AP pdfBuilder.injectAppearanceStreams(holderFormStream, imageFormStream, imageFormStream, - imgFormName, imgName, innerFormName, properties); + imageFormName, imageName, innerFormName, properties); pdfBuilder.createVisualSignature(template); pdfBuilder.createWidgetDictionary(pdSignatureField, holderFormResources); - ByteArrayInputStream in = pdfStructure.getTemplateAppearanceStream(); - logger.info("stream returning started, size= " + in.available()); + InputStream in = getVisualSignatureAsStream(pdfStructure.getVisualSignature()); + LOG.info("stream returning started, size= " + in.available()); // we must close the document template.close(); @@ -159,4 +170,13 @@ public InputStream buildPDF(PDVisibleSignDesigner properties) throws IOException // return result of the stream return in; } + + private InputStream getVisualSignatureAsStream(COSDocument visualSignature) throws IOException + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + COSWriter writer = new COSWriter(baos); + writer.write(visualSignature); + writer.close(); + return new ByteArrayInputStream(baos.toByteArray()); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateStructure.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateStructure.java index 1a54b233148..07cba701335 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateStructure.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDFTemplateStructure.java @@ -61,7 +61,7 @@ public class PDFTemplateStructure private PDResources holderFormResources; private PDFormXObject holderForm; private PDAppearanceDictionary appearanceDictionary; - private PDStream innterFormStream; + private PDStream innerFormStream; private PDResources innerFormResources; private PDFormXObject innerForm; private PDStream imageFormStream; @@ -169,7 +169,7 @@ public void setPdSignature(PDSignature pdSignature) } /** - * Gets Dictionary of AcroForm. Thats /DR + * Gets Dictionary of AcroForm. That's /DR * entry in the AcroForm * @return the AcroForm's dictionary */ @@ -181,7 +181,7 @@ public COSDictionary getAcroFormDictionary() /** * Acroform have its Dictionary, so we here set * the Dictionary which is in this location: - * AcroForm/DR + * AcroForm/DR * @param acroFormDictionary */ public void setAcroFormDictionary(COSDictionary acroFormDictionary) @@ -362,16 +362,16 @@ public void setAppearanceDictionary(PDAppearanceDictionary appearanceDictionary) */ public PDStream getInnerFormStream() { - return innterFormStream; + return innerFormStream; } /** * Sets inner form stream - * @param innterFormStream + * @param innerFormStream */ - public void setInnterFormStream(PDStream innterFormStream) + public void setInnterFormStream(PDStream innerFormStream) { - this.innterFormStream = innterFormStream; + this.innerFormStream = innerFormStream; } /** @@ -567,12 +567,27 @@ public void setAcroFormFields(List acroFormFields) { this.acroFormFields = acroFormFields; } - - /** - * Gets AP of the created template - * @return the templates Appearance Stream - * @throws IOException - */ + + /** + * Returns the visual signature COSDocument as a stream and closes the template field + * PDDocument. + * + * @return the visual signature COSDocument as a stream + * @throws IOException + * @deprecated This will be removed in 2.1 because the method name is misleading and confusing, + * and the work done rather belongs into the calling class: + *
+     * {@code
+     * COSDocument visualSignature = structure.getVisualSignature();
+     *  ByteArrayOutputStream baos = new ByteArrayOutputStream();
+     *  COSWriter writer = new COSWriter(baos);
+     *  writer.write(visualSignature);
+     *  writer.close();
+     *  structure.getTemplate().close();
+     *  ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+     * } 
+ */ + @Deprecated public ByteArrayInputStream getTemplateAppearanceStream() throws IOException { COSDocument visualSignature = getVisualSignature(); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDVisibleSigBuilder.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDVisibleSigBuilder.java index c279190b76d..ef838de0287 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDVisibleSigBuilder.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDVisibleSigBuilder.java @@ -44,14 +44,24 @@ import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; /** - * Implementation of PDFTemplateBuilder. - * @see PDFTemplateBuilder + * Implementation of {@link PDFTemplateBuilder}. This builds the signature PDF but doesn't keep the + * elements, these are kept in its PDF template structure. + * * @author Vakhtang Koroghlishvili */ public class PDVisibleSigBuilder implements PDFTemplateBuilder { private final PDFTemplateStructure pdfStructure; - private static final Log log = LogFactory.getLog(PDVisibleSigBuilder.class); + private static final Log LOG = LogFactory.getLog(PDVisibleSigBuilder.class); + + /** + * Constructor, creates PDF template structure. + */ + public PDVisibleSigBuilder() + { + pdfStructure = new PDFTemplateStructure(); + LOG.info("PDF Structure has been created"); + } @Override public void createPage(PDVisibleSignDesigner properties) @@ -59,9 +69,16 @@ public void createPage(PDVisibleSignDesigner properties) PDPage page = new PDPage(new PDRectangle(properties.getPageWidth(), properties.getPageHeight())); pdfStructure.setPage(page); - log.info("PDF page has been created"); + LOG.info("PDF page has been created"); } + /** + * Creates a PDDocument and adds the page parameter to it and keeps this as a template in the + * PDF template Structure. + * + * @param page + * @throws IOException + */ @Override public void createTemplate(PDPage page) throws IOException { @@ -70,19 +87,13 @@ public void createTemplate(PDPage page) throws IOException pdfStructure.setTemplate(template); } - public PDVisibleSigBuilder() - { - pdfStructure = new PDFTemplateStructure(); - log.info("PDF Structure has been created"); - } - @Override public void createAcroForm(PDDocument template) { PDAcroForm theAcroForm = new PDAcroForm(template); template.getDocumentCatalog().setAcroForm(theAcroForm); pdfStructure.setAcroForm(theAcroForm); - log.info("AcroForm has been created"); + LOG.info("AcroForm has been created"); } @Override @@ -96,23 +107,24 @@ public void createSignatureField(PDAcroForm acroForm) throws IOException { PDSignatureField sf = new PDSignatureField(acroForm); pdfStructure.setSignatureField(sf); - log.info("Signature field has been created"); + LOG.info("Signature field has been created"); } @Override - public void createSignature(PDSignatureField pdSignatureField, PDPage page, - String signatureName) throws IOException + public void createSignature(PDSignatureField pdSignatureField, PDPage page, String signerName) + throws IOException { PDSignature pdSignature = new PDSignature(); PDAnnotationWidget widget = pdSignatureField.getWidgets().get(0); pdSignatureField.setValue(pdSignature); widget.setPage(page); page.getAnnotations().add(widget); - pdSignature.setName(signatureName); - pdSignature.setByteRange(new int[] { 0, 0, 0, 0 }); - pdSignature.setContents(new byte[4096]); + if (!signerName.isEmpty()) + { + pdSignature.setName(signerName); + } pdfStructure.setPdSignature(pdSignature); - log.info("PDSignature has been created"); + LOG.info("PDSignature has been created"); } @Override @@ -129,7 +141,7 @@ public void createAcroFormDictionary(PDAcroForm acroForm, PDSignatureField signa acroForm.setDefaultAppearance("/sylfaen 0 Tf 0 g"); pdfStructure.setAcroFormFields(acroFormFields); pdfStructure.setAcroFormDictionary(acroFormDict); - log.info("AcroForm dictionary has been created"); + LOG.info("AcroForm dictionary has been created"); } @Override @@ -145,16 +157,29 @@ public void createSignatureRectangle(PDSignatureField signatureField, rect.setLowerLeftX(properties.getxAxis()); signatureField.getWidgets().get(0).setRectangle(rect); pdfStructure.setSignatureRectangle(rect); - log.info("Signature rectangle has been created"); + LOG.info("Signature rectangle has been created"); } + /** + * {@inheritDoc } + * + * @deprecated use {@link #createAffineTransform(java.awt.geom.AffineTransform) } + */ @Override + @Deprecated public void createAffineTransform(byte[] params) { AffineTransform transform = new AffineTransform(params[0], params[1], params[2], params[3], params[4], params[5]); pdfStructure.setAffineTransform(transform); - log.info("Matrix has been added"); + LOG.info("Matrix has been added"); + } + + @Override + public void createAffineTransform(AffineTransform affineTransform) + { + pdfStructure.setAffineTransform(affineTransform); + LOG.info("Matrix has been added"); } @Override @@ -167,27 +192,46 @@ public void createProcSetArray() procSetArr.add(COSName.getPDFName("ImageC")); procSetArr.add(COSName.getPDFName("ImageI")); pdfStructure.setProcSet(procSetArr); - log.info("ProcSet array has been created"); + LOG.info("ProcSet array has been created"); } @Override public void createSignatureImage(PDDocument template, BufferedImage image) throws IOException { pdfStructure.setImage(LosslessFactory.createFromImage(template, image)); - log.info("Visible Signature Image has been created"); + LOG.info("Visible Signature Image has been created"); } + /** + * {@inheritDoc } + * + * @deprecated use {@link #createFormatterRectangle(int[]) createFormatterRectangle(int[])} + */ @Override + @Deprecated public void createFormatterRectangle(byte[] params) { PDRectangle formatterRectangle = new PDRectangle(); - formatterRectangle.setUpperRightX(params[0]); - formatterRectangle.setUpperRightY(params[1]); - formatterRectangle.setLowerLeftX(params[2]); - formatterRectangle.setLowerLeftY(params[3]); + formatterRectangle.setLowerLeftX(Math.min(params[0],params[2])); + formatterRectangle.setLowerLeftY(Math.min(params[1],params[3])); + formatterRectangle.setUpperRightX(Math.max(params[0],params[2])); + formatterRectangle.setUpperRightY(Math.max(params[1],params[3])); + + pdfStructure.setFormatterRectangle(formatterRectangle); + LOG.info("Formatter rectangle has been created"); + } + + @Override + public void createFormatterRectangle(int[] params) + { + PDRectangle formatterRectangle = new PDRectangle(); + formatterRectangle.setLowerLeftX(Math.min(params[0],params[2])); + formatterRectangle.setLowerLeftY(Math.min(params[1],params[3])); + formatterRectangle.setUpperRightX(Math.max(params[0],params[2])); + formatterRectangle.setUpperRightY(Math.max(params[1],params[3])); pdfStructure.setFormatterRectangle(formatterRectangle); - log.info("Formatter rectangle has been created"); + LOG.info("Formatter rectangle has been created"); } @Override @@ -195,7 +239,7 @@ public void createHolderFormStream(PDDocument template) { PDStream holderForm = new PDStream(template); pdfStructure.setHolderFormStream(holderForm); - log.info("Holder form stream has been created"); + LOG.info("Holder form stream has been created"); } @Override @@ -203,20 +247,20 @@ public void createHolderFormResources() { PDResources holderFormResources = new PDResources(); pdfStructure.setHolderFormResources(holderFormResources); - log.info("Holder form resources have been created"); + LOG.info("Holder form resources have been created"); } @Override public void createHolderForm(PDResources holderFormResources, PDStream holderFormStream, - PDRectangle formrect) + PDRectangle bbox) { PDFormXObject holderForm = new PDFormXObject(holderFormStream); holderForm.setResources(holderFormResources); - holderForm.setBBox(formrect); + holderForm.setBBox(bbox); holderForm.setFormType(1); pdfStructure.setHolderForm(holderForm); - log.info("Holder form has been created"); + LOG.info("Holder form has been created"); } @@ -233,15 +277,15 @@ public void createAppearanceDictionary(PDFormXObject holderForml, signatureField.getWidgets().get(0).setAppearance(appearance); pdfStructure.setAppearanceDictionary(appearance); - log.info("PDF appearance dictionary has been created"); + LOG.info("PDF appearance dictionary has been created"); } @Override public void createInnerFormStream(PDDocument template) { - PDStream innterFormStream = new PDStream(template); - pdfStructure.setInnterFormStream(innterFormStream); - log.info("Stream of another form (inner form - it will be inside holder form) " + + PDStream innerFormStream = new PDStream(template); + pdfStructure.setInnterFormStream(innerFormStream); + LOG.info("Stream of another form (inner form - it will be inside holder form) " + "has been created"); } @@ -250,29 +294,30 @@ public void createInnerFormResource() { PDResources innerFormResources = new PDResources(); pdfStructure.setInnerFormResources(innerFormResources); - log.info("Resources of another form (inner form - it will be inside holder form)" + + LOG.info("Resources of another form (inner form - it will be inside holder form)" + "have been created"); } @Override - public void createInnerForm(PDResources innerFormResources, PDStream innerFormStream, - PDRectangle formrect) + public void createInnerForm(PDResources innerFormResources, + PDStream innerFormStream, + PDRectangle bbox) { PDFormXObject innerForm = new PDFormXObject(innerFormStream); innerForm.setResources(innerFormResources); - innerForm.setBBox(formrect); + innerForm.setBBox(bbox); innerForm.setFormType(1); pdfStructure.setInnerForm(innerForm); - log.info("Another form (inner form - it will be inside holder form) has been created"); + LOG.info("Another form (inner form - it will be inside holder form) has been created"); } @Override public void insertInnerFormToHolderResources(PDFormXObject innerForm, - PDResources holderFormResources) + PDResources holderFormResources) { - COSName innerFormName = holderFormResources.add(innerForm, "FRM"); - pdfStructure.setInnerFormName(innerFormName); - log.info("Now inserted inner form inside holder form"); + holderFormResources.put(COSName.FRM, innerForm); + pdfStructure.setInnerFormName(COSName.FRM); + LOG.info("Now inserted inner form inside holder form"); } @Override @@ -280,7 +325,7 @@ public void createImageFormStream(PDDocument template) { PDStream imageFormStream = new PDStream(template); pdfStructure.setImageFormStream(imageFormStream); - log.info("Created image form stream"); + LOG.info("Created image form stream"); } @Override @@ -288,28 +333,42 @@ public void createImageFormResources() { PDResources imageFormResources = new PDResources(); pdfStructure.setImageFormResources(imageFormResources); - log.info("Created image form resources"); + LOG.info("Created image form resources"); } @Override public void createImageForm(PDResources imageFormResources, PDResources innerFormResource, - PDStream imageFormStream, PDRectangle formrect, AffineTransform at, + PDStream imageFormStream, PDRectangle bbox, AffineTransform at, PDImageXObject img) throws IOException { PDFormXObject imageForm = new PDFormXObject(imageFormStream); - imageForm.setBBox(formrect); + imageForm.setBBox(bbox); imageForm.setMatrix(at); imageForm.setResources(imageFormResources); imageForm.setFormType(1); imageFormResources.getCOSObject().setDirect(true); - COSName imageFormName = innerFormResource.add(imageForm, "n"); + COSName imageFormName = COSName.getPDFName("n2"); + innerFormResource.put(imageFormName, imageForm); COSName imageName = imageFormResources.add(img, "img"); pdfStructure.setImageForm(imageForm); pdfStructure.setImageFormName(imageFormName); pdfStructure.setImageName(imageName); - log.info("Created image form"); + LOG.info("Created image form"); + } + + @Override + public void createBackgroundLayerForm(PDResources innerFormResource, PDRectangle bbox) + throws IOException + { + // create blank n0 background layer form + PDFormXObject n0Form = new PDFormXObject(pdfStructure.getTemplate().getDocument().createCOSStream()); + n0Form.setBBox(bbox); + n0Form.setResources(new PDResources()); + n0Form.setFormType(1); + innerFormResource.put(COSName.getPDFName("n0"), n0Form); + LOG.info("Created background layer form"); } @Override @@ -322,30 +381,27 @@ public void injectProcSetArray(PDFormXObject innerForm, PDPage page, innerFormResources.getCOSObject().setItem(COSName.PROC_SET, procSet); imageFormResources.getCOSObject().setItem(COSName.PROC_SET, procSet); holderFormResources.getCOSObject().setItem(COSName.PROC_SET, procSet); - log.info("Inserted ProcSet to PDF"); + LOG.info("Inserted ProcSet to PDF"); } @Override - public void injectAppearanceStreams(PDStream holderFormStream, PDStream innterFormStream, - PDStream imageFormStream, COSName imageObjectName, + public void injectAppearanceStreams(PDStream holderFormStream, PDStream innerFormStream, + PDStream imageFormStream, COSName imageFormName, COSName imageName, COSName innerFormName, PDVisibleSignDesigner properties) throws IOException { - // 100 means that document width is 100% via the rectangle. if rectangle - // is 500px, images 100% is 500px. - // String imgFormComment = "q "+imageWidthSize+ " 0 0 50 0 0 cm /" + - // imageName + " Do Q\n" + builder.toString(); - String imgFormComment = "q " + 100 + " 0 0 50 0 0 cm /" + imageName.getName() + " Do Q\n"; - String holderFormComment = "q 1 0 0 1 0 0 cm /" + innerFormName.getName() + " Do Q \n"; - String innerFormComment = "q 1 0 0 1 0 0 cm /" + imageObjectName.getName() + " Do Q\n"; - - appendRawCommands(pdfStructure.getHolderFormStream().createOutputStream(), - holderFormComment); - appendRawCommands(pdfStructure.getInnerFormStream().createOutputStream(), - innerFormComment); - appendRawCommands(pdfStructure.getImageFormStream().createOutputStream(), - imgFormComment); - log.info("Injected appearance stream to pdf"); + // Use width and height of BBox as values for transformation matrix. + int width = (int) this.getStructure().getFormatterRectangle().getWidth(); + int height = (int) this.getStructure().getFormatterRectangle().getHeight(); + + String imgFormContent = "q " + width + " 0 0 " + height + " 0 0 cm /" + imageName.getName() + " Do Q\n"; + String holderFormContent = "q 1 0 0 1 0 0 cm /" + innerFormName.getName() + " Do Q\n"; + String innerFormContent = "q 1 0 0 1 0 0 cm /n0 Do Q q 1 0 0 1 0 0 cm /" + imageFormName.getName() + " Do Q\n"; + + appendRawCommands(pdfStructure.getHolderFormStream().createOutputStream(), holderFormContent); + appendRawCommands(pdfStructure.getInnerFormStream().createOutputStream(), innerFormContent); + appendRawCommands(pdfStructure.getImageFormStream().createOutputStream(), imgFormContent); + LOG.info("Injected appearance stream to pdf"); } public void appendRawCommands(OutputStream os, String commands) throws IOException @@ -358,7 +414,7 @@ public void appendRawCommands(OutputStream os, String commands) throws IOExcepti public void createVisualSignature(PDDocument template) { pdfStructure.setVisualSignature(template.getDocument()); - log.info("Visible signature has been created"); + LOG.info("Visible signature has been created"); } @Override @@ -370,7 +426,7 @@ public void createWidgetDictionary(PDSignatureField signatureField, widgetDict.setItem(COSName.DR, holderFormResources.getCOSObject()); pdfStructure.setWidgetDictionary(widgetDict); - log.info("WidgetDictionary has been created"); + LOG.info("WidgetDictionary has been created"); } @Override diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDVisibleSignDesigner.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDVisibleSignDesigner.java index 42e520dad9f..fffe07e349d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDVisibleSignDesigner.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/PDVisibleSignDesigner.java @@ -16,20 +16,25 @@ */ package org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible; +import java.awt.geom.AffineTransform; import java.awt.image.BufferedImage; + +import java.io.BufferedInputStream; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import javax.imageio.ImageIO; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; /** - * Builder for visible signature design. - * Uses use param() instead of setParam() + * Class for visible signature design properties. Setters use param() instead of setParam() to allow + * chaining. * * @author Vakhtang Koroghlishvili */ @@ -44,8 +49,10 @@ public class PDVisibleSignDesigner private BufferedImage image; private String signatureFieldName = "sig"; private byte[] formatterRectangleParams = { 0, 0, 100, 50 }; - private byte[] AffineTransformParams = { 1, 0, 0, 1, 0, 0 }; + private int[] formatterRectangleParameters = { 0, 0, 100, 50 }; + private AffineTransform affineTransform = new AffineTransform(); private float imageSizeInPercents; + private int rotation = 0; /** * Constructor. @@ -58,7 +65,11 @@ public class PDVisibleSignDesigner public PDVisibleSignDesigner(String filename, InputStream imageStream, int page) throws IOException { - this(new FileInputStream(filename), imageStream, page); + // set visible signature image Input stream + readImageStream(imageStream); + + // calculate height and width of document page + calculatePageSizeFromFile(filename, page); } /** @@ -75,13 +86,8 @@ public PDVisibleSignDesigner(InputStream documentStream, InputStream imageStream // set visible signature image Input stream readImageStream(imageStream); - // create PD document - PDDocument document = PDDocument.load(documentStream); - // calculate height and width of document page - calculatePageSize(document, page); - - document.close(); + calculatePageSizeFromStream(documentStream, page); } /** @@ -109,7 +115,11 @@ public PDVisibleSignDesigner(PDDocument document, InputStream imageStream, int p public PDVisibleSignDesigner(String filename, BufferedImage image, int page) throws IOException { - this(new FileInputStream(filename), image, page); + // set visible signature image + setImage(image); + + // calculate height and width of document page + calculatePageSizeFromFile(filename, page); } /** @@ -126,13 +136,8 @@ public PDVisibleSignDesigner(InputStream documentStream, BufferedImage image, in // set visible signature image setImage(image); - // create PD document - PDDocument document = PDDocument.load(documentStream); - // calculate height and width of document page - calculatePageSize(document, page); - - document.close(); + calculatePageSizeFromStream(documentStream, page); } /** @@ -148,6 +153,40 @@ public PDVisibleSignDesigner(PDDocument document, BufferedImage image, int page) calculatePageSize(document, page); } + /** + * Constructor usable for signing existing signature fields. + * + * @param imageStream image as a stream + * @throws IOException + */ + public PDVisibleSignDesigner(InputStream imageStream) throws IOException + { + // set visible signature image Input stream + readImageStream(imageStream); + } + + private void calculatePageSizeFromFile(String filename, int page) throws IOException + { + // create PD document + PDDocument document = PDDocument.load(new File(filename)); + + // calculate height and width of document page + calculatePageSize(document, page); + + document.close(); + } + + private void calculatePageSizeFromStream(InputStream documentStream, int page) throws IOException + { + // create PD document + PDDocument document = PDDocument.load(documentStream); + + // calculate height and width of document page + calculatePageSize(document, page); + + document.close(); + } + /** * Each page of document can be different sizes. This method calculates the page size based on * the page media box. @@ -163,43 +202,101 @@ private void calculatePageSize(PDDocument document, int page) throw new IllegalArgumentException("First page of pdf is 1, not " + page); } - PDPage firstPage = document.getPage(page - 1); PDRectangle mediaBox = firstPage.getMediaBox(); pageHeight(mediaBox.getHeight()); pageWidth = mediaBox.getWidth(); + imageSizeInPercents = 100; + rotation = firstPage.getRotation() % 360; + } - float x = pageWidth; - float y = 0; - pageWidth += y; - float tPercent = (100 * y / (x + y)); - imageSizeInPercents = 100 - tPercent; + /** + * Adjust signature for page rotation. This is optional, call this after all x and y coordinates + * have been set if you want the signature to be positioned regardless of page orientation. + * + * @return Visible Signature Configuration Object + */ + public PDVisibleSignDesigner adjustForRotation() + { + switch (rotation) + { + case 90: + // https://stackoverflow.com/a/34359956/535646 + float temp = yAxis; + yAxis = pageHeight - xAxis - imageWidth; + xAxis = temp; + + affineTransform = new AffineTransform( + 0, imageHeight / imageWidth, -imageWidth / imageHeight, 0, imageWidth, 0); + + temp = imageHeight; + imageHeight = imageWidth; + imageWidth = temp; + break; + + case 180: + float newX = pageWidth - xAxis - imageWidth; + float newY = pageHeight - yAxis - imageHeight; + xAxis = newX; + yAxis = newY; + + affineTransform = new AffineTransform(-1, 0, 0, -1, imageWidth, imageHeight); + break; + + case 270: + temp = xAxis; + xAxis = pageWidth - yAxis - imageHeight; + yAxis = temp; + + affineTransform = new AffineTransform( + 0, -imageHeight / imageWidth, imageWidth / imageHeight, 0, 0, imageHeight); + + temp = imageHeight; + imageHeight = imageWidth; + imageWidth = temp; + break; + + case 0: + default: + break; + } + return this; } /** * Set the image for the signature. - * - * @param path of image location - * @return image Stream + * + * @param path Path of the image file. + * @return Visible Signature Configuration Object * @throws IOException */ public PDVisibleSignDesigner signatureImage(String path) throws IOException { - InputStream fin = new FileInputStream(path); - readImageStream(fin); + InputStream in = null; + try + { + in = new BufferedInputStream(new FileInputStream(path)); + readImageStream(in); + } + finally + { + IOUtils.closeQuietly(in); + } return this; } /** * Zoom signature image with some percent. * - * @param percent increase image with x percent. + * @param percent increase (positive value) or decrease (negative value) image with x percent. * @return Visible Signature Configuration Object */ public PDVisibleSignDesigner zoom(float percent) { imageHeight += (imageHeight * percent) / 100; imageWidth += (imageWidth * percent) / 100; + formatterRectangleParameters[2] = (int) imageWidth.floatValue(); + formatterRectangleParameters[3] = (int) imageHeight.floatValue(); return this; } @@ -273,6 +370,7 @@ public float getWidth() public PDVisibleSignDesigner width(float width) { this.imageWidth = width; + this.formatterRectangleParameters[2] = (int) width; return this; } @@ -293,6 +391,7 @@ public float getHeight() public PDVisibleSignDesigner height(float height) { this.imageHeight = height; + this.formatterRectangleParameters[3] = (int) height; return this; } @@ -367,49 +466,108 @@ private void setImage(BufferedImage image) this.image = image; imageHeight = (float) image.getHeight(); imageWidth = (float) image.getWidth(); + formatterRectangleParameters[2] = image.getWidth(); + formatterRectangleParameters[3] = image.getHeight(); } /** + * @return Affine Transform parameters for PDF Matrix * - * @return Affine Transform parameters of for PDF Matrix + * @deprecated use {@link #getTransform() }. */ + @Deprecated public byte[] getAffineTransformParams() { - return AffineTransformParams; + return new byte[] + { + (byte) affineTransform.getScaleX(), + (byte) affineTransform.getShearY(), + (byte) affineTransform.getShearX(), + (byte) affineTransform.getScaleY(), + (byte) affineTransform.getTranslateX(), + (byte) affineTransform.getTranslateY() + }; + } + + /** + * @return Affine Transform parameters for PDF Matrix + */ + public AffineTransform getTransform() + { + return affineTransform; } /** * * @param affineTransformParams * @return Visible Signature Configuration Object + * @deprecated use {@link #transform}. */ + @Deprecated public PDVisibleSignDesigner affineTransformParams(byte[] affineTransformParams) { - AffineTransformParams = affineTransformParams; + affineTransform = new AffineTransform(affineTransformParams[0], affineTransformParams[1], + affineTransformParams[2], affineTransformParams[3], + affineTransformParams[4], affineTransformParams[5]); return this; } /** * - * @return formatter PDRectanle parameters + * @param affineTransform + * @return Visible Signature Configuration Object + */ + public PDVisibleSignDesigner transform(AffineTransform affineTransform) + { + this.affineTransform = new AffineTransform(affineTransform); + return this; + } + + /** + * @return formatter PDRectangle parameters + * @deprecated use {@link #getFormatterRectangleParameters() getFormatterRectangleParameters()} */ + @Deprecated public byte[] getFormatterRectangleParams() { return formatterRectangleParams; } + /** + * + * @return formatter PDRectangle parameters + */ + public int[] getFormatterRectangleParameters() + { + return formatterRectangleParameters; + } + /** * Sets formatter PDRectangle * * @param formatterRectangleParams * @return Visible Signature Configuration Object + * @deprecated use {@link #formatterRectangleParameters(int[]) formatterRectangleParameters(int[])} */ + @Deprecated public PDVisibleSignDesigner formatterRectangleParams(byte[] formatterRectangleParams) { this.formatterRectangleParams = formatterRectangleParams; return this; } + /** + * Sets formatter PDRectangle + * + * @param formatterRectangleParameters + * @return Visible Signature Configuration Object + */ + public PDVisibleSignDesigner formatterRectangleParameters(int[] formatterRectangleParameters) + { + this.formatterRectangleParameters = formatterRectangleParameters; + return this; + } + /** * * @return page width diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/package.html index cabf72c6a9a..59292521dd0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/visible/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/PDPageDestination.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/PDPageDestination.java index e84e00f3e1b..1684379ed13 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/PDPageDestination.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/PDPageDestination.java @@ -78,9 +78,9 @@ public PDPage getPage() } /** - * Set the page for this destination. + * Set the page for a local destination. For an external destination, call {@link #setPageNumber(int) setPageNumber(int pageNumber)}. * - * @param page The page for the destination. + * @param page The page for a local destination. */ public void setPage( PDPage page ) { @@ -165,24 +165,34 @@ public int retrievePageNumber() } else if (page instanceof COSDictionary) { - //TODO make this a static utility method of PDPageTree? - COSBase parent = page; - while (((COSDictionary) parent).getDictionaryObject(COSName.PARENT, COSName.P) != null) - { - parent = ((COSDictionary) parent).getDictionaryObject(COSName.PARENT, COSName.P); - } - // now parent is the pages node - PDPageTree pages = new PDPageTree((COSDictionary) parent); - return pages.indexOf(new PDPage((COSDictionary) page)); + return indexOfPageTree((COSDictionary) page); } } return retval; } + // climb up the page tree up to the top to be able to call PageTree.indexOf for a page dictionary + private int indexOfPageTree(COSDictionary pageDict) + { + COSDictionary parent = pageDict; + while (parent.getDictionaryObject(COSName.PARENT, COSName.P) instanceof COSDictionary) + { + parent = (COSDictionary) parent.getDictionaryObject(COSName.PARENT, COSName.P); + } + if (parent.containsKey(COSName.KIDS) && COSName.PAGES.equals(parent.getItem(COSName.TYPE))) + { + // now parent is the highest pages node + PDPageTree pages = new PDPageTree(parent); + return pages.indexOf(new PDPage(pageDict)); + } + return -1; + } + /** - * Set the page number for this destination. + * Set the page number for a remote destination. For an internal destination, call + * {@link #setPage(PDPage) setPage(PDPage page)}. * - * @param pageNumber The page for the destination. + * @param pageNumber The page for a remote destination. */ public void setPageNumber( int pageNumber ) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/package.html index 4de0c188500..4dcdef78549 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItemIterator.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItemIterator.java index 5c7675af5ea..984c09ca556 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItemIterator.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineItemIterator.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline; import java.util.Iterator; +import java.util.NoSuchElementException; /** * Iterator over the linked list of {@link PDOutlineItem} siblings. @@ -45,6 +46,10 @@ public boolean hasNext() @Override public PDOutlineItem next() { + if (!hasNext()) + { + throw new NoSuchElementException(); + } if (currentItem == null) { currentItem = startingItem; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineNode.java index 57ef0cf18fa..e810d34863b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineNode.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/PDOutlineNode.java @@ -18,6 +18,7 @@ import java.util.Iterator; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.PDDictionaryWrapper; @@ -51,14 +52,15 @@ public PDOutlineNode(COSDictionary dict) */ PDOutlineNode getParent() { - COSDictionary item = (COSDictionary) getCOSObject().getDictionaryObject(COSName.PARENT); - if (item != null) + COSBase base = getCOSObject().getDictionaryObject(COSName.PARENT); + if (base instanceof COSDictionary) { - if (COSName.OUTLINES.equals(item.getCOSName(COSName.TYPE))) + COSDictionary parent = (COSDictionary) base; + if (COSName.OUTLINES.equals(parent.getCOSName(COSName.TYPE))) { - return new PDDocumentOutline(item); + return new PDDocumentOutline(parent); } - return new PDOutlineItem(item); + return new PDOutlineItem(parent); } return null; } @@ -173,10 +175,10 @@ public boolean hasChildren() PDOutlineItem getOutlineItem(COSName name) { - COSDictionary item = (COSDictionary) getCOSObject().getDictionaryObject(name); - if (item != null) + COSBase base = getCOSObject().getDictionaryObject(name); + if (base instanceof COSDictionary) { - return new PDOutlineItem(item); + return new PDOutlineItem((COSDictionary) base); } return null; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/package.html index 4948b2d8922..c64641c48b4 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/outline/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/package.html index 973a29a22b6..8cef9489628 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java index f5a9f067bb8..9f7452ae6c7 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java @@ -17,18 +17,31 @@ package org.apache.pdfbox.pdmodel.interactive.form; import java.awt.geom.AffineTransform; +import java.awt.geom.GeneralPath; +import java.awt.geom.Point2D; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.util.ArrayList; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; +import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDSimpleFont; +import org.apache.pdfbox.pdmodel.font.PDType3CharProc; +import org.apache.pdfbox.pdmodel.font.PDType3Font; +import org.apache.pdfbox.pdmodel.font.PDVectorFont; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript; import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceCharacteristicsDictionary; @@ -36,6 +49,7 @@ import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; +import org.apache.pdfbox.util.Matrix; /** * Create the AcroForms field appearance helper. @@ -43,162 +57,319 @@ * @author Stephan Gerhard * @author Ben Litchfield */ -class AppearanceGeneratorHelper -{ +class AppearanceGeneratorHelper { + private static final Log LOG = LogFactory.getLog(AppearanceGeneratorHelper.class); + private static final Operator BMC = Operator.getOperator("BMC"); private static final Operator EMC = Operator.getOperator("EMC"); - + private final PDVariableText field; - private final PDDefaultAppearanceString defaultAppearance; + + private PDDefaultAppearanceString defaultAppearance; private String value; - + /** * The highlight color * - * The color setting is used by Adobe to display the highlight box for selected entries in a list box. + * The color setting is used by Adobe to display the highlight box for selected + * entries in a list box. * - * Regardless of other settings in an existing appearance stream Adobe will always use this value. + * Regardless of other settings in an existing appearance stream Adobe will + * always use this value. */ - private static final int[] HIGHLIGHT_COLOR = {153,193,215}; - + private static final float[] HIGHLIGHT_COLOR = { 153 / 255f, 193 / 255f, 215 / 255f }; + /** * The scaling factor for font units to PDF units */ private static final int FONTSCALE = 1000; - + /** * The default font size used for multiline text */ - private static final float DEFAULT_FONT_SIZE = 12; - + private static final float DEFAULT_FONT_SIZE = 12; + + /** + * The minimum/maximum font sizes used for multiline text auto sizing + */ + private static final float MINIMUM_FONT_SIZE = 4; + private static final float MAXIMUM_FONT_SIZE = 300; + /** * The default padding applied by Acrobat to the fields bbox. */ private static final float DEFAULT_PADDING = 0.5f; - + /** * Constructs a COSAppearance from the given field. * * @param field the field which you wish to control the appearance of - * @throws IOException + * @throws IOException */ - AppearanceGeneratorHelper(PDVariableText field) throws IOException - { + AppearanceGeneratorHelper(PDVariableText field) throws IOException { this.field = field; - this.defaultAppearance = field.getDefaultAppearanceString(); + validateAndEnsureAcroFormResources(); + + try { + this.defaultAppearance = field.getDefaultAppearanceString(); + } catch (IOException ex) { + throw new IOException("Could not process default appearance string '" + field.getDefaultAppearance() + + "' for field '" + field.getFullyQualifiedName() + "'", ex); + } } - + + /* + * Adobe Reader/Acrobat are adding resources which are at the field/widget level + * to the AcroForm level. + */ + private void validateAndEnsureAcroFormResources() { + // add font resources which might be available at the field + // level but are not at the AcroForm level to the AcroForm + // to match Adobe Reader/Acrobat behavior + if (field.getAcroForm().getDefaultResources() == null) { + return; + } + + PDResources acroFormResources = field.getAcroForm().getDefaultResources(); + + for (PDAnnotationWidget widget : field.getWidgets()) + { + PDAppearanceStream stream = widget.getNormalAppearanceStream(); + if (stream == null) + { + continue; + } + PDResources widgetResources = stream.getResources(); + if (widgetResources == null) + { + continue; + } + for (COSName fontResourceName : widgetResources.getFontNames()) + { + try + { + if (acroFormResources.getFont(fontResourceName) == null) + { + LOG.debug("Adding font resource " + fontResourceName + " from widget to AcroForm"); + acroFormResources.put(fontResourceName, widgetResources.getFont(fontResourceName)); + } + } + catch (IOException e) + { + LOG.warn("Unable to match field level font with AcroForm font"); + } + } + } + } + /** * This is the public method for setting the appearance stream. * * @param apValue the String value which the appearance should represent * @throws IOException If there is an error creating the stream. */ - public void setAppearanceValue(String apValue) throws IOException - { - value = apValue; + public void setAppearanceValue(String apValue) throws IOException { + value = getFormattedValue(apValue); - for (PDAnnotationWidget widget : field.getWidgets()) - { - PDFormFieldAdditionalActions actions = field.getActions(); + // Treat multiline field values in single lines as single lime values. + // This is in line with how Adobe Reader behaves when entering text + // interactively but NOT how it behaves when the field value has been + // set programmatically and Reader is forced to generate the appearance + // using PDAcroForm.setNeedAppearances + // see PDFBOX-3911 + if (field instanceof PDTextField && !((PDTextField) field).isMultiline()) { + value = value.replaceAll("\\u000D\\u000A|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029]", " "); + } - // in case all tests fail the field will be formatted by acrobat - // when it is opened. See FreedomExpressions.pdf for an example of this. - if (actions == null || actions.getF() == null || - widget.getCOSObject().getDictionaryObject(COSName.AP) != null) + for (PDAnnotationWidget widget : field.getWidgets()) { + + if (widget.getCOSObject().containsKey("PMD")) { - PDAppearanceDictionary appearanceDict = widget.getAppearance(); - if (appearanceDict == null) - { - appearanceDict = new PDAppearanceDictionary(); - widget.setAppearance(appearanceDict); - } + LOG.warn("widget of field " + field.getFullyQualifiedName() + " is a PaperMetaData widget, no appearance stream created"); + continue; + } + + // some fields have the /Da at the widget level if the + // widgets differ in layout. + PDDefaultAppearanceString acroFormAppearance = defaultAppearance; + + if (widget.getCOSObject().getDictionaryObject(COSName.DA) != null) { + defaultAppearance = getWidgetDefaultAppearanceString(widget); + } + + PDRectangle rect = widget.getRectangle(); + if (rect == null) { + widget.getCOSObject().removeItem(COSName.AP); + LOG.warn("widget of field " + field.getFullyQualifiedName() + + " has no rectangle, no appearance stream created"); + continue; + } - PDAppearanceEntry appearance = appearanceDict.getNormalAppearance(); + PDAppearanceDictionary appearanceDict = widget.getAppearance(); + if (appearanceDict == null) { + appearanceDict = new PDAppearanceDictionary(); + widget.setAppearance(appearanceDict); + } + + PDAppearanceEntry appearance = appearanceDict.getNormalAppearance(); + // TODO support appearances other than "normal" + + PDAppearanceStream appearanceStream; + if (isValidAppearanceStream(appearance)) { + appearanceStream = appearance.getAppearanceStream(); + } else { + appearanceStream = prepareNormalAppearanceStream(widget); + appearanceDict.setNormalAppearance(appearanceStream); // TODO support appearances other than "normal" - - PDAppearanceStream appearanceStream; - if (appearance.isStream()) - { - appearanceStream = appearance.getAppearanceStream(); - } - else - { - appearanceStream = new PDAppearanceStream(field.getAcroForm().getDocument()); - appearanceStream.setBBox(widget.getRectangle().createRetranslatedRectangle()); - appearanceDict.setNormalAppearance(appearanceStream); - // TODO support appearances other than "normal" - } - - /* - * Adobe Acrobat always recreates the complete appearance stream if there is an appearance characteristics - * entry (the widget dictionaries MK entry). In addition if there is no content yet also create the apperance - * stream from the entries. - * - */ - if (widget.getAppearanceCharacteristics() != null || appearanceStream.getContentStream().getLength() == 0) - { - initializeAppearanceContent(widget, appearanceStream); - } - - setAppearanceContent(widget, appearanceStream); } + + /* + * Adobe Acrobat always recreates the complete appearance stream if there is an + * appearance characteristics entry (the widget dictionaries MK entry). In + * addition if there is no content yet also create the appearance stream from + * the entries. + * + */ + if (widget.getAppearanceCharacteristics() != null || appearanceStream.getContentStream().getLength() == 0) { + initializeAppearanceContent(widget, appearanceStream); + } + + setAppearanceContent(widget, appearanceStream); + + // restore the field level appearance + defaultAppearance = acroFormAppearance; + } + } + + private String getFormattedValue(String apValue) { + // format the field value for the appearance if there is scripting support and + // the field + // has a format event + PDFormFieldAdditionalActions actions = field.getActions(); + + if (actions != null && actions.getF() != null) { + if (field.getAcroForm().getScriptingHandler() != null) { + ScriptingHandler scriptingHandler = field.getAcroForm().getScriptingHandler(); + return scriptingHandler.format((PDActionJavaScript) field.getActions().getF(), apValue); + } else { + LOG.info( + "Field contains a formatting action but no ScriptingHandler has been supplied - formatted value might be incorrect"); + return apValue; + } + } + return apValue; + } + + private static boolean isValidAppearanceStream(PDAppearanceEntry appearance) { + if (appearance == null) { + return false; + } + if (!appearance.isStream()) { + return false; + } + PDRectangle bbox = appearance.getAppearanceStream().getBBox(); + if (bbox == null) { + return false; + } + return Math.abs(bbox.getWidth()) > 0 && Math.abs(bbox.getHeight()) > 0; + } + + private PDAppearanceStream prepareNormalAppearanceStream(PDAnnotationWidget widget) { + PDAppearanceStream appearanceStream = new PDAppearanceStream(field.getAcroForm().getDocument()); + + // Calculate the entries for the bounding box and the transformation matrix + // settings for the appearance stream + int rotation = resolveRotation(widget); + PDRectangle rect = widget.getRectangle(); + Matrix matrix = Matrix.getRotateInstance(Math.toRadians(rotation), 0, 0); + Point2D.Float point2D = matrix.transformPoint(rect.getWidth(), rect.getHeight()); + + PDRectangle bbox = new PDRectangle(Math.abs((float) point2D.getX()), Math.abs((float) point2D.getY())); + appearanceStream.setBBox(bbox); + + AffineTransform at = calculateMatrix(bbox, rotation); + if (!at.isIdentity()) { + appearanceStream.setMatrix(at); + } + appearanceStream.setFormType(1); + appearanceStream.setResources(new PDResources()); + return appearanceStream; + } + + private PDDefaultAppearanceString getWidgetDefaultAppearanceString(PDAnnotationWidget widget) throws IOException { + COSString da = (COSString) widget.getCOSObject().getDictionaryObject(COSName.DA); + PDResources dr = field.getAcroForm().getDefaultResources(); + return new PDDefaultAppearanceString(da, dr); + } + + private int resolveRotation(PDAnnotationWidget widget) { + PDAppearanceCharacteristicsDictionary characteristicsDictionary = widget.getAppearanceCharacteristics(); + if (characteristicsDictionary != null) { + // 0 is the default value if the R key doesn't exist + return characteristicsDictionary.getRotation(); } + return 0; } - + /** * Initialize the content of the appearance stream. * - * Get settings like border style, border width and colors to be used to draw a rectangle and background color - * around the widget + * Get settings like border style, border width and colors to be used to draw a + * rectangle and background color around the widget * - * @param widget the field widget + * @param widget the field widget * @param appearanceStream the appearance stream to be used * @throws IOException in case we can't write to the appearance stream */ - private void initializeAppearanceContent(PDAnnotationWidget widget, PDAppearanceStream appearanceStream) throws IOException - { + private void initializeAppearanceContent(PDAnnotationWidget widget, PDAppearanceStream appearanceStream) + throws IOException { ByteArrayOutputStream output = new ByteArrayOutputStream(); - PDPageContentStream contents = new PDPageContentStream(field.getAcroForm().getDocument(), - appearanceStream, output); + PDPageContentStream contents = new PDPageContentStream(field.getAcroForm().getDocument(), appearanceStream, + output); PDAppearanceCharacteristicsDictionary appearanceCharacteristics = widget.getAppearanceCharacteristics(); - - // TODO: support more entries like patterns, background color etc. - if (appearanceCharacteristics != null) - { + + // TODO: support more entries like patterns, etc. + if (appearanceCharacteristics != null) { + PDColor backgroundColour = appearanceCharacteristics.getBackground(); + if (backgroundColour != null) { + contents.setNonStrokingColor(backgroundColour); + PDRectangle bbox = resolveBoundingBox(widget, appearanceStream); + contents.addRect(bbox.getLowerLeftX(), bbox.getLowerLeftY(), bbox.getWidth(), bbox.getHeight()); + contents.fill(); + } + float lineWidth = 0f; PDColor borderColour = appearanceCharacteristics.getBorderColour(); - if (borderColour != null) - { - contents.setNonStrokingColor(borderColour); + if (borderColour != null) { + contents.setStrokingColor(borderColour); lineWidth = 1f; } PDBorderStyleDictionary borderStyle = widget.getBorderStyle(); - if (borderStyle != null && borderStyle.getWidth() > 0) - { + if (borderStyle != null && borderStyle.getWidth() > 0) { lineWidth = borderStyle.getWidth(); } - if (lineWidth > 0) - { - contents.setLineWidth(lineWidth); + if (lineWidth > 0 && borderColour != null) { + if (lineWidth != 1) { + contents.setLineWidth(lineWidth); + } PDRectangle bbox = resolveBoundingBox(widget, appearanceStream); - PDRectangle clipRect = applyPadding(bbox, Math.max(DEFAULT_PADDING, lineWidth/2)); - contents.addRect(clipRect.getLowerLeftX(),clipRect.getLowerLeftY(),clipRect.getWidth(), clipRect.getHeight()); + PDRectangle clipRect = applyPadding(bbox, Math.max(DEFAULT_PADDING, lineWidth / 2)); + contents.addRect(clipRect.getLowerLeftX(), clipRect.getLowerLeftY(), clipRect.getWidth(), + clipRect.getHeight()); contents.closeAndStroke(); } } - + contents.close(); output.close(); writeToStream(output.toByteArray(), appearanceStream); } - + /** * Parses an appearance stream into tokens. */ - private List tokenize(PDAppearanceStream appearanceStream) throws IOException - { + private List tokenize(PDAppearanceStream appearanceStream) throws IOException { PDFStreamParser parser = new PDFStreamParser(appearanceStream); parser.parse(); return parser.getTokens(); @@ -207,43 +378,36 @@ private List tokenize(PDAppearanceStream appearanceStream) throws IOExce /** * Constructs and sets new contents for given appearance stream. */ - private void setAppearanceContent(PDAnnotationWidget widget, - PDAppearanceStream appearanceStream) throws IOException - { + private void setAppearanceContent(PDAnnotationWidget widget, PDAppearanceStream appearanceStream) + throws IOException { // first copy any needed resources from the document’s DR dictionary into // the stream’s Resources dictionary defaultAppearance.copyNeededResourcesTo(appearanceStream); - + // then replace the existing contents of the appearance stream from /Tx BMC // to the matching EMC ByteArrayOutputStream output = new ByteArrayOutputStream(); ContentStreamWriter writer = new ContentStreamWriter(output); - + List tokens = tokenize(appearanceStream); int bmcIndex = tokens.indexOf(BMC); - if (bmcIndex == -1) - { + if (bmcIndex == -1) { // append to existing stream writer.writeTokens(tokens); writer.writeTokens(COSName.TX, BMC); - } - else - { + } else { // prepend content before BMC writer.writeTokens(tokens.subList(0, bmcIndex + 1)); } - + // insert field contents insertGeneratedAppearance(widget, appearanceStream, output); - + int emcIndex = tokens.indexOf(EMC); - if (emcIndex == -1) - { + if (emcIndex == -1) { // append EMC writer.writeTokens(EMC); - } - else - { + } else { // append contents after EMC writer.writeTokens(tokens.subList(emcIndex, tokens.size())); } @@ -251,274 +415,296 @@ private void setAppearanceContent(PDAnnotationWidget widget, output.close(); writeToStream(output.toByteArray(), appearanceStream); } - + /** - * Generate and insert text content and clipping around it. + * Generate and insert text content and clipping around it. */ - private void insertGeneratedAppearance(PDAnnotationWidget widget, - PDAppearanceStream appearanceStream, - OutputStream output) throws IOException - { - PDPageContentStream contents = new PDPageContentStream(field.getAcroForm().getDocument(), - appearanceStream, output); - - appearanceStream.setMatrix(new AffineTransform()); - appearanceStream.setFormType(1); - - // Acrobat calculates the left and right padding dependent on the offset of the border edge + private void insertGeneratedAppearance(PDAnnotationWidget widget, PDAppearanceStream appearanceStream, + OutputStream output) throws IOException { + PDPageContentStream contents = new PDPageContentStream(field.getAcroForm().getDocument(), appearanceStream, + output); + + PDRectangle bbox = resolveBoundingBox(widget, appearanceStream); + + // Acrobat calculates the left and right padding dependent on the offset of the + // border edge // This calculation works for forms having been generated by Acrobat. - // The minimum distance is always 1f even if there is no rectangle being drawn around. + // The minimum distance is always 1f even if there is no rectangle being drawn + // around. float borderWidth = 0; - if (widget.getBorderStyle() != null) - { + if (widget.getBorderStyle() != null) { borderWidth = widget.getBorderStyle().getWidth(); } - PDRectangle bbox = resolveBoundingBox(widget, appearanceStream); PDRectangle clipRect = applyPadding(bbox, Math.max(1f, borderWidth)); PDRectangle contentRect = applyPadding(clipRect, Math.max(1f, borderWidth)); - + contents.saveGraphicsState(); - + // Acrobat always adds a clipping path - contents.addRect(clipRect.getLowerLeftX(), clipRect.getLowerLeftY(), - clipRect.getWidth(), clipRect.getHeight()); + contents.addRect(clipRect.getLowerLeftX(), clipRect.getLowerLeftY(), clipRect.getWidth(), clipRect.getHeight()); contents.clip(); - + // get the font - PDFont font = field.getDefaultAppearanceString().getFont(); - + PDFont font = defaultAppearance.getFont(); + if (font == null) { + throw new IllegalArgumentException("font is null, check whether /DA entry is incomplete or incorrect"); + } + if (font.getName().contains("+")) { + LOG.warn("Font '" + defaultAppearance.getFontName().getName() + "' of field '" + + field.getFullyQualifiedName() + "' contains subsetted font '" + font.getName() + "'"); + LOG.warn("This may bring trouble with PDField.setValue(), PDAcroForm.flatten() or " + + "PDAcroForm.refreshAppearances()"); + LOG.warn("You should replace this font with a non-subsetted font:"); + LOG.warn("PDFont font = PDType0Font.load(doc, new FileInputStream(fontfile), false);"); + LOG.warn("acroForm.getDefaultResources().put(COSName.getPDFName(\"" + + defaultAppearance.getFontName().getName() + "\", font);"); + } + // calculate the fontSize (because 0 = autosize) - float fontSize = calculateFontSize(font, contentRect); - + float fontSize = defaultAppearance.getFontSize(); + + if (fontSize == 0) { + fontSize = calculateFontSize(font, contentRect); + } + // for a listbox generate the highlight rectangle for the selected // options - if (field instanceof PDListBox) - { - insertGeneratedSelectionHighlight(contents, appearanceStream, font, fontSize); + if (field instanceof PDListBox) { + insertGeneratedListboxSelectionHighlight(contents, appearanceStream, font, fontSize); } - + // start the text output contents.beginText(); - // write the /DA string - field.getDefaultAppearanceString().writeTo(contents, fontSize); - + // write font and color from the /DA string, with the calculated font size + defaultAppearance.writeTo(contents, fontSize); + // calculate the y-position of the baseline float y; - + // calculate font metrics at font size float fontScaleY = fontSize / FONTSCALE; float fontBoundingBoxAtSize = font.getBoundingBox().getHeight() * fontScaleY; - float fontCapAtSize = font.getFontDescriptor().getCapHeight() * fontScaleY; - float fontDescentAtSize = font.getFontDescriptor().getDescent() * fontScaleY; - - if (field instanceof PDTextField && ((PDTextField) field).isMultiline()) - { - y = contentRect.getUpperRightY() - fontBoundingBoxAtSize; + + float fontCapAtSize = 0; + float fontDescentAtSize = 0; + + if (font.getFontDescriptor() != null) { + fontCapAtSize = font.getFontDescriptor().getCapHeight() * fontScaleY; + fontDescentAtSize = font.getFontDescriptor().getDescent() * fontScaleY; + } else { + float fontCapHeight = resolveCapHeight(font); + float fontDescent = resolveDescent(font); + LOG.debug("missing font descriptor - resolved Cap/Descent to " + fontCapHeight + "/" + fontDescent); + fontCapAtSize = fontCapHeight * fontScaleY; + fontDescentAtSize = fontDescent * fontScaleY; } - else - { - // Adobe shows the text 'shiftet up' in case the caps don't fit into the clipping area - if (fontCapAtSize > clipRect.getHeight()) - { + + if (field instanceof PDTextField && ((PDTextField) field).isMultiline()) { + y = contentRect.getUpperRightY() - fontBoundingBoxAtSize; + } else { + // Adobe shows the text 'shiftet up' in case the caps don't fit into the + // clipping area + if (fontCapAtSize > clipRect.getHeight()) { y = clipRect.getLowerLeftY() + -fontDescentAtSize; - } - else - { + } else { // calculate the position based on the content rectangle y = clipRect.getLowerLeftY() + (clipRect.getHeight() - fontCapAtSize) / 2; - + // check to ensure that ascents and descents fit if (y - clipRect.getLowerLeftY() < -fontDescentAtSize) { - + float fontDescentBased = -fontDescentAtSize + contentRect.getLowerLeftY(); float fontCapBased = contentRect.getHeight() - contentRect.getLowerLeftY() - fontCapAtSize; - + y = Math.min(fontDescentBased, Math.max(y, fontCapBased)); } } } - + // show the text float x = contentRect.getLowerLeftX(); - + // special handling for comb boxes as these are like table cells with individual // chars if (shallComb()) { insertGeneratedCombAppearance(contents, appearanceStream, font, fontSize); - } - else if (field instanceof PDListBox) - { + } else if (field instanceof PDListBox) { insertGeneratedListboxAppearance(contents, appearanceStream, contentRect, font, fontSize); - } - else - { + } else { PlainText textContent = new PlainText(value); AppearanceStyle appearanceStyle = new AppearanceStyle(); appearanceStyle.setFont(font); appearanceStyle.setFontSize(fontSize); - + // Adobe Acrobat uses the font's bounding box for the leading between the lines appearanceStyle.setLeading(font.getBoundingBox().getHeight() * fontScaleY); - - PlainTextFormatter formatter = new PlainTextFormatter - .Builder(contents) - .style(appearanceStyle) - .text(textContent) - .width(contentRect.getWidth()) - .wrapLines(isMultiLine()) - .initialOffset(x, y) - .textAlign(field.getQ()) - .build(); + + PlainTextFormatter formatter = new PlainTextFormatter.Builder(contents).style(appearanceStyle) + .text(textContent).width(contentRect.getWidth()).wrapLines(isMultiLine()).initialOffset(x, y) + .textAlign(getTextAlign(widget)).build(); formatter.format(); } - + contents.endText(); contents.restoreGraphicsState(); contents.close(); } - - private boolean isMultiLine() - { + + /* + * PDFBox handles a widget with a joined in field dictionary and without an + * individual name as a widget only. As a result - as a widget can't have a + * quadding /Q entry we need to do a low level access to the dictionary and + * otherwise get the quadding from the field. + */ + private int getTextAlign(PDAnnotationWidget widget) { + // Use quadding value from joined field/widget if set, else use from field. + return widget.getCOSObject().getInt(COSName.Q, field.getQ()); + } + + private AffineTransform calculateMatrix(PDRectangle bbox, int rotation) { + if (rotation == 0) { + return new AffineTransform(); + } + float tx = 0, ty = 0; + switch (rotation) { + case 90: + tx = bbox.getUpperRightY(); + break; + case 180: + tx = bbox.getUpperRightY(); + ty = bbox.getUpperRightX(); + break; + case 270: + ty = bbox.getUpperRightX(); + break; + default: + break; + } + Matrix matrix = Matrix.getRotateInstance(Math.toRadians(rotation), tx, ty); + return matrix.createAffineTransform(); + + } + + private boolean isMultiLine() { return field instanceof PDTextField && ((PDTextField) field).isMultiline(); } - + /** * Determine if the appearance shall provide a comb output. * *

* May be set only if the MaxLen entry is present in the text field dictionary - * and if the Multiline, Password, and FileSelect flags are clear. - * If set, the field shall be automatically divided into as many equally spaced positions, + * and if the Multiline, Password, and FileSelect flags are clear. If set, the + * field shall be automatically divided into as many equally spaced positions, * or combs, as the value of MaxLen, and the text is laid out into those combs. *

* * @return the comb state */ - private boolean shallComb() - { - return field instanceof PDTextField && - ((PDTextField) field).isComb() && - !((PDTextField) field).isMultiline() && - !((PDTextField) field).isPassword() && - !((PDTextField) field).isFileSelect(); + private boolean shallComb() { + return field instanceof PDTextField && ((PDTextField) field).isComb() && !((PDTextField) field).isMultiline() + && !((PDTextField) field).isPassword() && !((PDTextField) field).isFileSelect(); } - + /** * Generate the appearance for comb fields. * - * @param contents the content stream to write to + * @param contents the content stream to write to * @param appearanceStream the appearance stream used - * @param font the font to be used - * @param fontSize the font size to be used + * @param font the font to be used + * @param fontSize the font size to be used * @throws IOException */ private void insertGeneratedCombAppearance(PDPageContentStream contents, PDAppearanceStream appearanceStream, - PDFont font, float fontSize) throws IOException - { - - // TODO: Currently the quadding is not taken into account - // so the comb is always filled from left to right. - + PDFont font, float fontSize) throws IOException { + + // TODO: Currently the quadding is not taken into account + // so the comb is always filled from left to right. + int maxLen = ((PDTextField) field).getMaxLen(); int numChars = Math.min(value.length(), maxLen); - + PDRectangle paddingEdge = applyPadding(appearanceStream.getBBox(), 1); - + float combWidth = appearanceStream.getBBox().getWidth() / maxLen; float ascentAtFontSize = font.getFontDescriptor().getAscent() / FONTSCALE * fontSize; - float baselineOffset = paddingEdge.getLowerLeftY() + - (appearanceStream.getBBox().getHeight() - ascentAtFontSize)/2; - + float baselineOffset = paddingEdge.getLowerLeftY() + + (appearanceStream.getBBox().getHeight() - ascentAtFontSize) / 2; + float prevCharWidth = 0f; - float currCharWidth = 0f; - - float xOffset = combWidth/2; - String combString = ""; - - for (int i = 0; i < numChars; i++) - { - combString = value.substring(i, i+1); - currCharWidth = font.getStringWidth(combString) / FONTSCALE * fontSize/2; - - xOffset = xOffset + prevCharWidth/2 - currCharWidth/2; - + float xOffset = combWidth / 2; + + for (int i = 0; i < numChars; i++) { + String combString = value.substring(i, i + 1); + float currCharWidth = font.getStringWidth(combString) / FONTSCALE * fontSize / 2; + + xOffset = xOffset + prevCharWidth / 2 - currCharWidth / 2; + contents.newLineAtOffset(xOffset, baselineOffset); contents.showText(combString); - + baselineOffset = 0; prevCharWidth = currCharWidth; xOffset = combWidth; } } - - private void insertGeneratedSelectionHighlight(PDPageContentStream contents, PDAppearanceStream appearanceStream, - PDFont font, float fontSize) throws IOException - { + + private void insertGeneratedListboxSelectionHighlight(PDPageContentStream contents, + PDAppearanceStream appearanceStream, PDFont font, float fontSize) throws IOException { List indexEntries = ((PDListBox) field).getSelectedOptionsIndex(); List values = ((PDListBox) field).getValue(); List options = ((PDListBox) field).getOptionsExportValues(); - - // TODO: support highlighting multiple items if multiselect is set - - int selectedIndex = 0; - - if (!values.isEmpty() && !options.isEmpty()) - { - if (!indexEntries.isEmpty()) - { - selectedIndex = indexEntries.get(0); - } - else - { - selectedIndex = options.indexOf(values.get(0)); + + if (!values.isEmpty() && !options.isEmpty() && indexEntries.isEmpty()) { + // create indexEntries from options + indexEntries = new ArrayList(); + for (String v : values) { + indexEntries.add(options.indexOf(v)); } } - - // The first entry which shall be presented might be adjusted by the optional TI key - // If this entry is present the first entry to be displayed is the keys value otherwise + + // The first entry which shall be presented might be adjusted by the optional TI + // key + // If this entry is present the first entry to be displayed is the keys value + // otherwise // display starts with the first entry in Opt. int topIndex = ((PDListBox) field).getTopIndex(); - - float highlightBoxHeight = font.getBoundingBox().getHeight() * fontSize / FONTSCALE - 2f; - - // the padding area + + float highlightBoxHeight = font.getBoundingBox().getHeight() * fontSize / FONTSCALE; + + // the padding area PDRectangle paddingEdge = applyPadding(appearanceStream.getBBox(), 1); - - contents.setNonStrokingColor(HIGHLIGHT_COLOR[0],HIGHLIGHT_COLOR[1],HIGHLIGHT_COLOR[2]); - - contents.addRect(paddingEdge.getLowerLeftX(), - paddingEdge.getUpperRightY() - highlightBoxHeight * (selectedIndex - topIndex + 1), - paddingEdge.getWidth(), - highlightBoxHeight); - contents.fill(); - contents.setNonStrokingColor(0); + + for (int selectedIndex : indexEntries) { + contents.setNonStrokingColor(HIGHLIGHT_COLOR[0], HIGHLIGHT_COLOR[1], HIGHLIGHT_COLOR[2]); + + contents.addRect(paddingEdge.getLowerLeftX(), + paddingEdge.getUpperRightY() - highlightBoxHeight * (selectedIndex - topIndex + 1) + 2, + paddingEdge.getWidth(), highlightBoxHeight); + contents.fill(); + } + contents.setNonStrokingColor(0f); } - - + private void insertGeneratedListboxAppearance(PDPageContentStream contents, PDAppearanceStream appearanceStream, - PDRectangle contentRect, PDFont font, float fontSize) throws IOException - { - contents.setNonStrokingColor(0); - + PDRectangle contentRect, PDFont font, float fontSize) throws IOException { + contents.setNonStrokingColor(0f); + int q = field.getQ(); - if (q == PDVariableText.QUADDING_CENTERED || q == PDVariableText.QUADDING_RIGHT) - { + if (q == PDVariableText.QUADDING_CENTERED || q == PDVariableText.QUADDING_RIGHT) { float fieldWidth = appearanceStream.getBBox().getWidth(); float stringWidth = (font.getStringWidth(value) / FONTSCALE) * fontSize; float adjustAmount = fieldWidth - stringWidth - 4; - if (q == PDVariableText.QUADDING_CENTERED) - { + if (q == PDVariableText.QUADDING_CENTERED) { adjustAmount = adjustAmount / 2.0f; } contents.newLineAtOffset(adjustAmount, 0); - } - else if (q != PDVariableText.QUADDING_LEFT) - { + } else if (q != PDVariableText.QUADDING_LEFT) { throw new IOException("Error: Unknown justification value:" + q); } @@ -528,16 +714,12 @@ else if (q != PDVariableText.QUADDING_LEFT) float yTextPos = contentRect.getUpperRightY(); int topIndex = ((PDListBox) field).getTopIndex(); - - for (int i = topIndex; i < numOptions; i++) - { - - if (i == topIndex) - { + + for (int i = topIndex; i < numOptions; i++) { + + if (i == topIndex) { yTextPos = yTextPos - font.getFontDescriptor().getAscent() / FONTSCALE * fontSize; - } - else - { + } else { yTextPos = yTextPos - font.getBoundingBox().getHeight() / FONTSCALE * fontSize; contents.beginText(); } @@ -545,98 +727,169 @@ else if (q != PDVariableText.QUADDING_LEFT) contents.newLineAtOffset(contentRect.getLowerLeftX(), yTextPos); contents.showText(options.get(i)); - if (i - topIndex != (numOptions - 1)) - { + if (i != (numOptions - 1)) { contents.endText(); } } } - + /** * Writes the stream to the actual stream in the COSStream. * * @throws IOException If there is an error writing to the stream */ - private void writeToStream(byte[] data, PDAppearanceStream appearanceStream) throws IOException - { + private void writeToStream(byte[] data, PDAppearanceStream appearanceStream) throws IOException { OutputStream out = appearanceStream.getCOSObject().createOutputStream(); out.write(data); out.close(); } /** - * My "not so great" method for calculating the fontsize. It does not work superb, but it - * handles ok. + * My "not so great" method for calculating the fontsize. It does not work + * superb, but it handles ok. * * @return the calculated font-size * @throws IOException If there is an error getting the font information. */ - private float calculateFontSize(PDFont font, PDRectangle contentRect) throws IOException - { + private float calculateFontSize(PDFont font, PDRectangle contentRect) throws IOException { float fontSize = defaultAppearance.getFontSize(); - + // zero is special, it means the text is auto-sized - if (fontSize == 0) - { - if (isMultiLine()) - { + if (fontSize == 0) { + if (isMultiLine()) { + PlainText textContent = new PlainText(value); + if (textContent.getParagraphs() != null) { + float width = contentRect.getWidth() - contentRect.getLowerLeftX(); + float fs = MINIMUM_FONT_SIZE; + while (fs <= DEFAULT_FONT_SIZE) { + // determine the number of lines needed for this font and contentRect + int numLines = 0; + for (PlainText.Paragraph paragraph : textContent.getParagraphs()) { + numLines += paragraph.getLines(font, fs, width).size(); + } + // calculate the height required for this font size + float fontScaleY = fs / FONTSCALE; + float leading = font.getBoundingBox().getHeight() * fontScaleY; + float height = leading * numLines; + + // if this font size didn't fit, use the prior size that did fit + if (height > contentRect.getHeight()) { + return Math.max(fs - 1, MINIMUM_FONT_SIZE); + } + fs++; + } + return Math.min(fs, DEFAULT_FONT_SIZE); + } + // Acrobat defaults to 12 for multiline text with size 0 return DEFAULT_FONT_SIZE; - } - else - { + } else { float yScalingFactor = FONTSCALE * font.getFontMatrix().getScaleY(); float xScalingFactor = FONTSCALE * font.getFontMatrix().getScaleX(); - + // fit width float width = font.getStringWidth(value) * font.getFontMatrix().getScaleX(); float widthBasedFontSize = contentRect.getWidth() / width * xScalingFactor; // fit height - float height = (font.getFontDescriptor().getCapHeight() + - -font.getFontDescriptor().getDescent()) * font.getFontMatrix().getScaleY(); - if (height <= 0) - { + float height = (font.getFontDescriptor().getCapHeight() + -font.getFontDescriptor().getDescent()) + * font.getFontMatrix().getScaleY(); + if (height <= 0) { height = font.getBoundingBox().getHeight() * font.getFontMatrix().getScaleY(); } float heightBasedFontSize = contentRect.getHeight() / height * yScalingFactor; - + return Math.min(heightBasedFontSize, widthBasedFontSize); } } return fontSize; } - + + /* + * Resolve the cap height. + * + * This is a very basic implementation using the height of "H" as reference. + */ + private float resolveCapHeight(PDFont font) throws IOException { + return resolveGlyphHeight(font, "H".codePointAt(0)); + } + + /* + * Resolve the descent. + * + * This is a very basic implementation using the height of "y" - "a" as reference. + */ + private float resolveDescent(PDFont font) throws IOException { + return resolveGlyphHeight(font, "y".codePointAt(0)) - resolveGlyphHeight(font, "a".codePointAt(0)); + } + + // this calculates the real (except for type 3 fonts) individual glyph bounds + private float resolveGlyphHeight(PDFont font, int code) throws IOException { + GeneralPath path = null; + if (font instanceof PDType3Font) { + // It is difficult to calculate the real individual glyph bounds for type 3 + // fonts + // because these are not vector fonts, the content stream could contain almost + // anything + // that is found in page content streams. + PDType3Font t3Font = (PDType3Font) font; + PDType3CharProc charProc = t3Font.getCharProc(code); + if (charProc != null) { + BoundingBox fontBBox = t3Font.getBoundingBox(); + PDRectangle glyphBBox = charProc.getGlyphBBox(); + if (glyphBBox != null) { + // PDFBOX-3850: glyph bbox could be larger than the font bbox + glyphBBox.setLowerLeftX(Math.max(fontBBox.getLowerLeftX(), glyphBBox.getLowerLeftX())); + glyphBBox.setLowerLeftY(Math.max(fontBBox.getLowerLeftY(), glyphBBox.getLowerLeftY())); + glyphBBox.setUpperRightX(Math.min(fontBBox.getUpperRightX(), glyphBBox.getUpperRightX())); + glyphBBox.setUpperRightY(Math.min(fontBBox.getUpperRightY(), glyphBBox.getUpperRightY())); + path = glyphBBox.toGeneralPath(); + } + } + } else if (font instanceof PDVectorFont) { + PDVectorFont vectorFont = (PDVectorFont) font; + path = vectorFont.getPath(code); + } else if (font instanceof PDSimpleFont) { + PDSimpleFont simpleFont = (PDSimpleFont) font; + + // these two lines do not always work, e.g. for the TT fonts in file 032431.pdf + // which is why PDVectorFont is tried first. + String name = simpleFont.getEncoding().getName(code); + path = simpleFont.getPath(name); + } else { + // shouldn't happen, please open issue in JIRA + LOG.warn("Unknown font class: " + font.getClass()); + } + if (path == null) { + return -1; + } + return (float) path.getBounds2D().getHeight(); + } + /** * Resolve the bounding box. * - * @param fieldWidget the annotation widget. + * @param fieldWidget the annotation widget. * @param appearanceStream the annotations appearance stream. * @return the resolved boundingBox. */ - private PDRectangle resolveBoundingBox(PDAnnotationWidget fieldWidget, - PDAppearanceStream appearanceStream) - { + private PDRectangle resolveBoundingBox(PDAnnotationWidget fieldWidget, PDAppearanceStream appearanceStream) { PDRectangle boundingBox = appearanceStream.getBBox(); - if (boundingBox == null) - { + if (boundingBox == null) { boundingBox = fieldWidget.getRectangle().createRetranslatedRectangle(); } return boundingBox; } - + /** * Apply padding to a box. * * @param box box * @return the padded box. */ - private PDRectangle applyPadding(PDRectangle box, float padding) - { - return new PDRectangle(box.getLowerLeftX() + padding, - box.getLowerLeftY() + padding, - box.getWidth() - 2 * padding, - box.getHeight() - 2 * padding); + private PDRectangle applyPadding(PDRectangle box, float padding) { + return new PDRectangle(box.getLowerLeftX() + padding, box.getLowerLeftY() + padding, + box.getWidth() - 2 * padding, box.getHeight() - 2 * padding); } } \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/FieldUtils.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/FieldUtils.java index 2977f66a914..915768f6aa2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/FieldUtils.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/FieldUtils.java @@ -25,7 +25,6 @@ import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSString; -import org.apache.pdfbox.pdmodel.common.COSArrayList; /** * A set of utility methods to help with common AcroForm form and field related functions. @@ -115,7 +114,7 @@ private FieldUtils() */ static List toKeyValueList(List key, List value) { - List list = new ArrayList(); + List list = new ArrayList(key.size()); for(int i =0; i getPairableItems(COSBase items, int pairIdx) if (items instanceof COSString) { - List array = new ArrayList(); + List array = new ArrayList(1); array.add(((COSString) items).getString()); return array; } else if (items instanceof COSArray) { - // test if there is a single text or a two-element array - COSBase entry = ((COSArray) items).get(0); - if (entry instanceof COSString) + List entryList = new ArrayList(); + for (COSBase entry : (COSArray) items) { - return COSArrayList.convertCOSStringCOSArrayToList((COSArray)items); - } - else - { - return getItemsFromPair(items, pairIdx); - } - } - return Collections.emptyList(); - } - - /** - * Return either one of a list of two-element arrays entries. - * - * @param items the array of elements or two-element arrays - * @param pairIdx the index into the two-element array - * @return a List of single elements - */ - private static List getItemsFromPair(COSBase items, int pairIdx) - { - List exportValues = new ArrayList(); - int numItems = ((COSArray) items).size(); - for (int i=0;i= pairIdx +1 && cosArray.get(pairIdx) instanceof COSString) + { + entryList.add(((COSString) cosArray.get(pairIdx)).getString()); + } + } + } + return entryList; } - return exportValues; + return Collections.emptyList(); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroForm.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroForm.java index d11fd76859c..671cea455c6 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroForm.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroForm.java @@ -16,13 +16,18 @@ */ package org.apache.pdfbox.pdmodel.interactive.form; +import java.awt.geom.GeneralPath; +import java.awt.geom.Rectangle2D; + import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -38,6 +43,7 @@ import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.COSArrayList; import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.fdf.FDFCatalog; import org.apache.pdfbox.pdmodel.fdf.FDFDictionary; import org.apache.pdfbox.pdmodel.fdf.FDFDocument; @@ -45,6 +51,7 @@ import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; import org.apache.pdfbox.util.Matrix; /** @@ -54,8 +61,8 @@ */ public final class PDAcroForm implements COSObjectable { - private static final Log LOG = LogFactory.getLog(PDAcroForm.class); - + private static final Log LOG = LogFactory.getLog(PDAcroForm.class); + private static final int FLAG_SIGNATURES_EXIST = 1; private static final int FLAG_APPEND_ONLY = 1 << 1; @@ -64,6 +71,8 @@ public final class PDAcroForm implements COSObjectable private Map fieldCache; + private ScriptingHandler scriptingHandler; + /** * Constructor. * @@ -77,7 +86,7 @@ public PDAcroForm(PDDocument doc) } /** - * Constructor. + * Constructor. Side effect: /Helv and /ZaDb fonts added with update mark. * * @param doc The document that this form is part of. * @param form The existing acroForm. @@ -164,26 +173,29 @@ public FDFDocument exportFDF() throws IOException *

Flattening a form field will take the current appearance and make that part * of the pages content stream. All form fields and annotations associated are removed.

* + *

Invisible and hidden fields will be skipped and will not become part of the + * page content stream

+ * *

The appearances for the form fields widgets will not be generated

* * @throws IOException */ public void flatten() throws IOException { - // for dynamic XFA forms there is no flatten as this would mean to do a rendering - // from the XFA content into a static PDF. - if (xfaIsDynamic()) - { - LOG.warn("Flatten for a dynamix XFA form is not supported"); - return; - } - - List fields = new ArrayList(); - for (PDField field: getFieldTree()) - { - fields.add(field); - } - flatten(fields, false); + // for dynamic XFA forms there is no flatten as this would mean to do a rendering + // from the XFA content into a static PDF. + if (xfaIsDynamic()) + { + LOG.warn("Flatten for a dynamix XFA form is not supported"); + return; + } + + List fields = new ArrayList(); + for (PDField field: getFieldTree()) + { + fields.add(field); + } + flatten(fields, false); } @@ -193,86 +205,116 @@ public void flatten() throws IOException *

Flattening a form field will take the current appearance and make that part * of the pages content stream. All form fields and annotations associated are removed.

* + *

Invisible and hidden fields will be skipped and will not become part of the + * page content stream

+ * * @param fields * @param refreshAppearances if set to true the appearances for the form field widgets will be updated * @throws IOException */ public void flatten(List fields, boolean refreshAppearances) throws IOException { - // for dynamic XFA forms there is no flatten as this would mean to do a rendering - // from the XFA content into a static PDF. - if (xfaIsDynamic()) - { - LOG.warn("Flatten for a dynamix XFA form is not supported"); - return; - } - - // refresh the appearances if set - if (refreshAppearances) - { - refreshAppearances(fields); - } - - // indicates if the original content stream - // has been wrapped in a q...Q pair. - boolean isContentStreamWrapped = false; + // Nothing to flatten if there are no fields provided + if (fields.isEmpty()) + { + return; + } - // the content stream to write to - PDPageContentStream contentStream; + if (!refreshAppearances && getNeedAppearances()) + { + LOG.warn("acroForm.getNeedAppearances() returns true, " + + "visual field appearances may not have been set"); + LOG.warn("call acroForm.refreshAppearances() or " + + "use the flatten() method with refreshAppearances parameter"); + } + + // for dynamic XFA forms there is no flatten as this would mean to do a rendering + // from the XFA content into a static PDF. + if (xfaIsDynamic()) + { + LOG.warn("Flatten for a dynamix XFA form is not supported"); + return; + } - // Iterate over all form fields and their widgets and create a - // FormXObject at the page content level from that - for (PDField field : fields) + // refresh the appearances if set + if (refreshAppearances) { - for (PDAnnotationWidget widget : field.getWidgets()) - { - if (widget.getNormalAppearanceStream() != null) - { - PDPage page = widget.getPage(); - if (!isContentStreamWrapped) - { - contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, true, true); - isContentStreamWrapped = true; - } - else - { - contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, true); - } - - PDFormXObject fieldObject = new PDFormXObject(widget.getNormalAppearanceStream().getCOSObject()); - - Matrix translationMatrix = Matrix.getTranslateInstance(widget.getRectangle().getLowerLeftX(), widget.getRectangle().getLowerLeftY()); - contentStream.saveGraphicsState(); - contentStream.transform(translationMatrix); - contentStream.drawForm(fieldObject); - contentStream.restoreGraphicsState(); - contentStream.close(); - } - } + refreshAppearances(fields); } + // get the widgets per page + Map> pagesWidgetsMap = buildPagesWidgetsMap(fields); + // preserve all non widget annotations for (PDPage page : document.getPages()) { + Set widgetsForPageMap = pagesWidgetsMap.get(page.getCOSObject()); + + // indicates if the original content stream + // has been wrapped in a q...Q pair. + boolean isContentStreamWrapped = false; + List annotations = new ArrayList(); for (PDAnnotation annotation: page.getAnnotations()) { - if (!(annotation instanceof PDAnnotationWidget)) + if (widgetsForPageMap == null || !widgetsForPageMap.contains(annotation.getCOSObject())) { annotations.add(annotation); } + else if (isVisibleAnnotation(annotation)) + { + PDPageContentStream contentStream = new PDPageContentStream( + document, page, AppendMode.APPEND, true, !isContentStreamWrapped); + try + { + isContentStreamWrapped = true; + + PDAppearanceStream appearanceStream = annotation.getNormalAppearanceStream(); + + PDFormXObject fieldObject = new PDFormXObject(appearanceStream.getCOSObject()); + + contentStream.saveGraphicsState(); + + // see https://stackoverflow.com/a/54091766/1729265 for an explanation + // of the steps required + // this will transform the appearance stream form object into the rectangle of the + // annotation bbox and map the coordinate systems + Matrix transformationMatrix = resolveTransformationMatrix(annotation, appearanceStream); + contentStream.transform(transformationMatrix); + contentStream.drawForm(fieldObject); + contentStream.restoreGraphicsState(); + } + finally + { + contentStream.close(); + } + } } page.setAnnotations(annotations); } // remove the fields - setFields(Collections.emptyList()); + removeFields(fields); // remove XFA for hybrid forms dictionary.removeItem(COSName.XFA); - - } + } + + private boolean isVisibleAnnotation(PDAnnotation annotation) + { + if (annotation.isInvisible() || annotation.isHidden()) + { + return false; + } + PDAppearanceStream normalAppearanceStream = annotation.getNormalAppearanceStream(); + if (normalAppearanceStream == null) + { + return false; + } + PDRectangle bbox = normalAppearanceStream.getBBox(); + return bbox != null && bbox.getWidth() > 0 && bbox.getHeight() > 0; + } /** * Refreshes the appearance streams and appearance dictionaries for @@ -284,10 +326,10 @@ public void refreshAppearances() throws IOException { for (PDField field : getFieldTree()) { - if (field instanceof PDTerminalField) - { - ((PDTerminalField) field).constructAppearances(); - } + if (field instanceof PDTerminalField) + { + ((PDTerminalField) field).constructAppearances(); + } } } @@ -302,10 +344,10 @@ public void refreshAppearances(List fields) throws IOException { for (PDField field : fields) { - if (field instanceof PDTerminalField) - { - ((PDTerminalField) field).constructAppearances(); - } + if (field instanceof PDTerminalField) + { + ((PDTerminalField) field).constructAppearances(); + } } } @@ -320,12 +362,12 @@ public void refreshAppearances(List fields) throws IOException * might either be terminal fields, non-terminal fields or a mixture of both. Non-terminal fields * mark branches which contents can be retrieved using {@link PDNonTerminalField#getChildren()}. * - * @return A list of the documents root fields. - * + * @return A list of the documents root fields, never null. If there are no fields then this + * method returns an empty list. */ public List getFields() { - COSArray cosFields = (COSArray) dictionary.getDictionaryObject(COSName.FIELDS); + COSArray cosFields = dictionary.getCOSArray(COSName.FIELDS); if (cosFields == null) { return Collections.emptyList(); @@ -424,7 +466,7 @@ public PDField getField(String fullyQualifiedName) // get the field from the field tree for (PDField field : getFieldTree()) { - if (field.getFullyQualifiedName().compareTo(fullyQualifiedName) == 0) + if (field.getFullyQualifiedName().equals(fullyQualifiedName)) { return field; } @@ -476,17 +518,17 @@ public void setNeedAppearances(Boolean value) } /** - * This will get the default resources for the acro form. + * This will get the default resources for the AcroForm. * - * @return The default resources. + * @return The default resources or null if there is none. */ public PDResources getDefaultResources() { PDResources retval = null; - COSDictionary dr = (COSDictionary) dictionary.getDictionaryObject(COSName.DR); - if (dr != null) + COSBase base = dictionary.getDictionaryObject(COSName.DR); + if (base instanceof COSDictionary) { - retval = new PDResources(dr, document.getResourceCache()); + retval = new PDResources((COSDictionary) base, document.getResourceCache()); } return retval; } @@ -548,13 +590,15 @@ public void setXFA(PDXFAResource xfa) } /** - * This will get the 'quadding' or justification of the text to be displayed. - * 0 - Left(default)
- * 1 - Centered
- * 2 - Right
- * Please see the QUADDING_CONSTANTS. + * This will get the document-wide default value for the quadding/justification of variable text + * fields. + *

+ * 0 - Left(default)
+ * 1 - Centered
+ * 2 - Right
+ * See the QUADDING constants of {@link PDVariableText}. * - * @return The justification of the text strings. + * @return The justification of the variable text fields. */ public int getQ() { @@ -568,9 +612,10 @@ public int getQ() } /** - * This will set the quadding/justification of the text. See QUADDING constants. + * This will set the document-wide default value for the quadding/justification of variable text + * fields. See the QUADDING constants of {@link PDVariableText}. * - * @param q The new text justification. + * @param q The justification of the variable text fields. */ public void setQ(int q) { @@ -616,4 +661,134 @@ public void setAppendOnly(boolean appendOnly) { dictionary.setFlag(COSName.SIG_FLAGS, FLAG_APPEND_ONLY, appendOnly); } + + /** + * Set a handler to support JavaScript actions in the form. + * + * @return scriptingHandler + */ + public ScriptingHandler getScriptingHandler() + { + return scriptingHandler; + } + + /** + * Set a handler to support JavaScript actions in the form. + * + * @param scriptingHandler + */ + public void setScriptingHandler(ScriptingHandler scriptingHandler) + { + this.scriptingHandler = scriptingHandler; + } + + private Matrix resolveTransformationMatrix(PDAnnotation annotation, PDAppearanceStream appearanceStream) + { + // 1st step transform appearance stream bbox with appearance stream matrix + Rectangle2D transformedAppearanceBox = getTransformedAppearanceBBox(appearanceStream); + PDRectangle annotationRect = annotation.getRectangle(); + + // 2nd step caclulate matrix to transform calculated rectangle into the annotation Rect boundaries + Matrix transformationMatrix = new Matrix(); + transformationMatrix.translate((float) (annotationRect.getLowerLeftX()-transformedAppearanceBox.getX()), (float) (annotationRect.getLowerLeftY()-transformedAppearanceBox.getY())); + transformationMatrix.scale((float) (annotationRect.getWidth()/transformedAppearanceBox.getWidth()), (float) (annotationRect.getHeight()/transformedAppearanceBox.getHeight())); + return transformationMatrix; + } + + /** + * Calculate the transformed appearance box. + * + * Apply the Matrix (or an identity transform) to the BBox of + * the appearance stream + * + * @param appearanceStream + * @return the transformed rectangle + */ + private Rectangle2D getTransformedAppearanceBBox(PDAppearanceStream appearanceStream) + { + Matrix appearanceStreamMatrix = appearanceStream.getMatrix(); + PDRectangle appearanceStreamBBox = appearanceStream.getBBox(); + GeneralPath transformedAppearanceBox = appearanceStreamBBox.transform(appearanceStreamMatrix); + return transformedAppearanceBox.getBounds2D(); + } + + private Map> buildPagesWidgetsMap(List fields) throws IOException + { + Map> pagesAnnotationsMap = + new HashMap>(); + boolean hasMissingPageRef = false; + + for (PDField field : fields) + { + List widgets = field.getWidgets(); + for (PDAnnotationWidget widget : widgets) + { + PDPage page = widget.getPage(); + if (page != null) + { + fillPagesAnnotationMap(pagesAnnotationsMap, page, widget); + } + else + { + hasMissingPageRef = true; + } + } + } + + if (!hasMissingPageRef) + { + return pagesAnnotationsMap; + } + + // If there is a widget with a missing page reference we need to build the map reverse i.e. + // from the annotations to the widget. + LOG.warn("There has been a widget with a missing page reference, will check all page annotations"); + for (PDPage page : document.getPages()) + { + for (PDAnnotation annotation : page.getAnnotations()) + { + if (annotation instanceof PDAnnotationWidget) + { + fillPagesAnnotationMap(pagesAnnotationsMap, page, (PDAnnotationWidget) annotation); + } + } + } + + return pagesAnnotationsMap; + } + + private void fillPagesAnnotationMap(Map> pagesAnnotationsMap, + PDPage page, PDAnnotationWidget widget) + { + if (pagesAnnotationsMap.get(page.getCOSObject()) == null) + { + Set widgetsForPage = new HashSet(); + widgetsForPage.add(widget.getCOSObject()); + pagesAnnotationsMap.put(page.getCOSObject(), widgetsForPage); + } + else + { + Set widgetsForPage = pagesAnnotationsMap.get(page.getCOSObject()); + widgetsForPage.add(widget.getCOSObject()); + } + } + + private void removeFields(List fields) + { + for (PDField field : fields) + { + COSArray array; + if (field.getParent() == null) + { + // if the field has no parent, assume it is at root level list, remove it from there + array = (COSArray) dictionary.getDictionaryObject(COSName.FIELDS); + } + else + { + // if the field has a parent, then remove from the list there + array = (COSArray) field.getParent().getCOSObject().getDictionaryObject(COSName.KIDS); + } + array.removeObject(field.getCOSObject()); + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDButton.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDButton.java index 54ac013c127..eb10d712d46 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDButton.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDButton.java @@ -26,7 +26,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -94,17 +94,23 @@ public boolean isPushButton() /** * Set the push button bit. * + * @deprecated use {@link org.apache.pdfbox.pdmodel.interactive.form.PDPushButton} instead * @param pushbutton if true the button field is treated as a push button field. */ + @Deprecated public void setPushButton(boolean pushbutton) { getCOSObject().setFlag(COSName.FF, FLAG_PUSHBUTTON, pushbutton); + if (pushbutton) + { + setRadioButton(false); + } } /** * Determines if radio button bit is set. * - * @return true if type of button field is a push button. + * @return true if type of button field is a radio button. */ public boolean isRadioButton() { @@ -114,16 +120,24 @@ public boolean isRadioButton() /** * Set the radio button bit. * + * @deprecated use {@link org.apache.pdfbox.pdmodel.interactive.form.PDRadioButton} instead * @param radiobutton if true the button field is treated as a radio button field. */ + @Deprecated public void setRadioButton(boolean radiobutton) { getCOSObject().setFlag(COSName.FF, FLAG_RADIO, radiobutton); + if (radiobutton) + { + setPushButton(false); + } } /** - * Returns the selected value. May be empty if NoToggleToOff is set but there is no value - * selected. + * Returns the selected value. + * + *

Off is the default value which will also be returned if the + * value hasn't been set at all. * * @return A non-null string. */ @@ -132,17 +146,37 @@ public String getValue() COSBase value = getInheritableAttribute(COSName.V); if (value instanceof COSName) { - return ((COSName)value).getName(); + String stringValue = ((COSName)value).getName(); + List exportValues = getExportValues(); + if (!exportValues.isEmpty()) + { + try + { + int idx = Integer.parseInt(stringValue, 10); + if (idx >= 0 && idx < exportValues.size()) + { + return exportValues.get(idx); + } + } + catch (NumberFormatException nfe) + { + return stringValue; + } + } + return stringValue; } else { - return ""; + // Off is the default value if there is nothing else set. + // See PDF Spec. + return "Off"; } } /** - * Sets the selected option given its name. - * + * Sets the selected option given its name. It also tries to update the visual appearance, + * unless {@link PDAcroForm#getNeedAppearances()} is true. + * * @param value Name of option to select * @throws IOException if the value could not be set * @throws IllegalArgumentException if the value is not a valid option. @@ -150,23 +184,47 @@ public String getValue() @Override public void setValue(String value) throws IOException { - checkValue(value); - getCOSObject().setName(COSName.V, value); - // update the appearance state (AS) - for (PDAnnotationWidget widget : getWidgets()) + checkValue(value); + + // if there are export values/an Opt entry there is a different + // approach to setting the value + boolean hasExportValues = getExportValues().size() > 0; + + if (hasExportValues) { + updateByOption(value); + } + else { - PDAppearanceEntry appearanceEntry = widget.getAppearance().getNormalAppearance(); - if (((COSDictionary) appearanceEntry.getCOSObject()).containsKey(value)) - { - widget.getCOSObject().setName(COSName.AS, value); - } - else - { - widget.getCOSObject().setItem(COSName.AS, COSName.Off); - } + updateByValue(value); + } + + applyChange(); + } + + /** + * Set the selected option given its index, and try to update the visual appearance. + * + * NOTE: this method is only usable if there are export values and used for + * radio buttons with FLAG_RADIOS_IN_UNISON not set. + * + * @param index index of option to be selected + * @throws IOException if the value could not be set + * @throws IllegalArgumentException if the index provided is not a valid index. + */ + public void setValue(int index) throws IOException + { + if (getExportValues().isEmpty() || index < 0 || index >= getExportValues().size()) + { + throw new IllegalArgumentException("index '" + index + + "' is not a valid index for the field " + getFullyQualifiedName() + + ", valid indices are from 0 to " + (getExportValues().size() - 1)); } + + updateByValue(String.valueOf(index)); + applyChange(); } + /** @@ -207,7 +265,7 @@ public String getValueAsString() /** - * This will get the export values. + * This will get the (optional) export values. * *

The export values are defined in the field dictionaries /Opt key.

* @@ -220,13 +278,15 @@ public String getValueAsString() *
  • allow radio buttons having the same export value to be handled independently *
  • * - *

    * - * @return List containing all possible export values. If there is no Opt entry an empty list will be returned. + * @return List containing all possible export values. If there is no /Opt entry an empty list will be returned. + * + * @see #getOnValues() */ public List getExportValues() { COSBase value = getInheritableAttribute(COSName.OPT); + if (value instanceof COSString) { List array = new ArrayList(); @@ -263,28 +323,28 @@ public void setExportValues(List values) @Override void constructAppearances() throws IOException { - for (PDAnnotationWidget widget : getWidgets()) + List exportValues = getExportValues(); + if (exportValues.size() > 0) { - PDAppearanceDictionary appearance = widget.getAppearance(); - if (appearance == null || appearance.getNormalAppearance() == null) + // the value is the index value of the option. So we need to get that + // and use it to set the value + try { - // TODO: implement appearance generation for radio buttons - throw new UnsupportedOperationException("Appearance generation is not implemented yet, see PDFBOX-2849"); - } - else - { - PDAppearanceEntry appearanceEntry = widget.getAppearance().getNormalAppearance(); - String value = getValue(); - if (((COSDictionary) appearanceEntry.getCOSObject()).containsKey(value)) - { - widget.getCOSObject().setName(COSName.AS, value); - } - else + int optionsIndex = Integer.parseInt(getValue()); + if (optionsIndex < exportValues.size()) { - widget.getCOSObject().setItem(COSName.AS, COSName.Off); + updateByOption(exportValues.get(optionsIndex)); } + } catch (NumberFormatException e) + { + // silently ignore that + // and don't update the appearance } } + else + { + updateByValue(getValue()); + } } /** @@ -300,30 +360,59 @@ void constructAppearances() throws IOException public Set getOnValues() { // we need a set as the field can appear multiple times - Set onValues = new HashSet(); + Set onValues = new LinkedHashSet(); + + if (getExportValues().size() > 0) + { + onValues.addAll(getExportValues()); + return onValues; + } List widgets = this.getWidgets(); for (PDAnnotationWidget widget : widgets) { - PDAppearanceDictionary apDictionary = widget.getAppearance(); - if (apDictionary != null) + onValues.add(getOnValueForWidget(widget)); + } + return onValues; + } + + /* + * Get the on value for an individual widget by it's index. + */ + private String getOnValue(int index) + { + List widgets = this.getWidgets(); + if (index < widgets.size()) + { + return getOnValueForWidget(widgets.get(index)); + } + return ""; + } + + /* + * Get the on value for an individual widget. + */ + private String getOnValueForWidget(PDAnnotationWidget widget) + { + PDAppearanceDictionary apDictionary = widget.getAppearance(); + if (apDictionary != null) + { + PDAppearanceEntry normalAppearance = apDictionary.getNormalAppearance(); + if (normalAppearance != null) { - PDAppearanceEntry normalAppearance = apDictionary.getNormalAppearance(); - if (normalAppearance != null) + Set entries = normalAppearance.getSubDictionary().keySet(); + for (COSName entry : entries) { - Set entries = normalAppearance.getSubDictionary().keySet(); - for (COSName entry : entries) + if (COSName.Off.compareTo(entry) != 0) { - if (COSName.Off.compareTo(entry) != 0) - { - onValues.add(entry.getName()); - } + return entry.getName(); } } } - } - return onValues; - } + } + return ""; + } + /** * Checks value. @@ -331,7 +420,7 @@ public Set getOnValues() * @param value Name of radio button to select * @throws IllegalArgumentException if the value is not a valid option. */ - void checkValue(String value) throws IllegalArgumentException + void checkValue(String value) { Set onValues = getOnValues(); if (COSName.Off.getName().compareTo(value) != 0 && !onValues.contains(value)) @@ -341,4 +430,56 @@ void checkValue(String value) throws IllegalArgumentException + ", valid values are: " + onValues + " and " + COSName.Off.getName()); } } + + private void updateByValue(String value) throws IOException + { + getCOSObject().setName(COSName.V, value); + // update the appearance state (AS) + for (PDAnnotationWidget widget : getWidgets()) + { + if (widget.getAppearance() == null) + { + continue; + } + PDAppearanceEntry appearanceEntry = widget.getAppearance().getNormalAppearance(); + if (((COSDictionary) appearanceEntry.getCOSObject()).containsKey(value)) + { + widget.setAppearanceState(value); + } + else + { + widget.setAppearanceState(COSName.Off.getName()); + } + } + } + + private void updateByOption(String value) throws IOException + { + List widgets = getWidgets(); + List options = getExportValues(); + + if (widgets.size() != options.size()) + { + throw new IllegalArgumentException("The number of options doesn't match the number of widgets"); + } + + if (value.equals(COSName.Off.getName())) + { + updateByValue(value); + } + else + { + // the value is the index of the matching option + int optionsIndex = options.indexOf(value); + + // get the values the options are pointing to as + // this might not be numerical + // see PDFBOX-3682 + if (optionsIndex != -1) + { + updateByValue(getOnValue(optionsIndex)); + } + } + } + } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDCheckBox.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDCheckBox.java index f0aec3e031e..8a18878311a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDCheckBox.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDCheckBox.java @@ -101,8 +101,7 @@ public String getOnValue() { PDAnnotationWidget widget = this.getWidgets().get(0); PDAppearanceDictionary apDictionary = widget.getAppearance(); - - String onValue = ""; + if (apDictionary != null) { PDAppearanceEntry normalAppearance = apDictionary.getNormalAppearance(); @@ -113,11 +112,11 @@ public String getOnValue() { if (COSName.Off.compareTo(entry) != 0) { - onValue = entry.getName(); + return entry.getName(); } } } } - return onValue; + return ""; } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDChoice.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDChoice.java index f8db5ac365e..e6b920ce5dc 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDChoice.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDChoice.java @@ -73,7 +73,7 @@ public PDChoice(PDAcroForm acroForm) * *

    * For a choice field the options array can either be an array - * of text strings or an array of a two-element arrays.
    + * of text strings or an array of a two-element arrays.
    * The method always only returns either the text strings or, * in case of two-element arrays, an array of the first element of * the two-element arrays @@ -180,7 +180,7 @@ public void setOptions(List exportValues, List displayValues) * *

    * For options with an array of text strings the display value and export value - * are the same.
    + * are the same.
    * For options with an array of two-element arrays the display value is the * second entry in the two-element array. *

    @@ -198,7 +198,7 @@ public List getOptionsDisplayValues() * *

    * For options with an array of text strings the display value and export value - * are the same.
    + * are the same.
    * For options with an array of two-element arrays the export value is the * first entry in the two-element array. *

    @@ -228,7 +228,7 @@ public List getSelectedOptionsIndex() { return COSArrayList.convertIntegerCOSArrayToList((COSArray) value); } - return Collections.emptyList(); + return Collections.emptyList(); } /** @@ -239,10 +239,8 @@ public List getSelectedOptionsIndex() *
  • do support multiple selections
  • *
  • have export values with the same value
  • * - *

    *

    * Setting the index will set the value too. - *

    * * @param values List containing the indices of all selected options. */ @@ -371,7 +369,8 @@ public void setCombo(boolean combo) } /** - * Sets the selected value of this field. + * Sets the selected value of this field. It also tries to update the visual appearance, unless + * {@link PDAcroForm#getNeedAppearances()} is true. * * @param value The name of the selected item. * @throws IOException if the value could not be set @@ -422,6 +421,7 @@ public void setValue(List values) throws IOException else { getCOSObject().removeItem(COSName.V); + getCOSObject().removeItem(COSName.I); } applyChange(); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDDefaultAppearanceString.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDDefaultAppearanceString.java index 5da6a5c5b84..334f2f0b0c7 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDDefaultAppearanceString.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDDefaultAppearanceString.java @@ -22,11 +22,11 @@ import java.util.List; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNumber; -import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.PDPageContentStream; @@ -34,7 +34,8 @@ import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; -import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; @@ -73,7 +74,8 @@ class PDDefaultAppearanceString { if (defaultAppearance == null) { - throw new IllegalArgumentException("/DA is a required entry"); + throw new IllegalArgumentException("/DA is a required entry. " + + "Please set a default appearance first."); } if (defaultResources == null) @@ -98,11 +100,7 @@ private void processAppearanceStringOperators(byte[] content) throws IOException Object token = parser.parseNextToken(); while (token != null) { - if (token instanceof COSObject) - { - arguments.add(((COSObject) token).getObject()); - } - else if (token instanceof Operator) + if (token instanceof Operator) { processOperator((Operator) token, arguments); arguments = new ArrayList(); @@ -126,11 +124,19 @@ private void processOperator(Operator operator, List operands) throws I { String name = operator.getName(); - if ("Tf".equals(name)) + if (OperatorName.SET_FONT_AND_SIZE.equals(name)) { processSetFont(operands); } - else if ("rg".equals(name)) + else if (OperatorName.NON_STROKING_GRAY.equals(name)) + { + processSetFontColor(operands); + } + else if (OperatorName.NON_STROKING_RGB.equals(name)) + { + processSetFontColor(operands); + } + else if (OperatorName.NON_STROKING_CMYK.equals(name)) { processSetFontColor(operands); } @@ -184,11 +190,21 @@ private void processSetFont(List operands) throws IOException */ private void processSetFontColor(List operands) throws IOException { - PDColorSpace colorSpace = PDDeviceRGB.INSTANCE; - if (colorSpace instanceof PDDeviceColorSpace && - operands.size() < colorSpace.getNumberOfComponents()) + PDColorSpace colorSpace; + + switch (operands.size()) { - throw new IOException("Missing operands for set non stroking color operator " + Arrays.toString(operands.toArray())); + case 1: + colorSpace = PDDeviceGray.INSTANCE; + break; + case 3: + colorSpace = PDDeviceRGB.INSTANCE; + break; + case 4: + colorSpace = PDDeviceCMYK.INSTANCE; + break; + default: + throw new IOException("Missing operands for set non stroking color operator " + Arrays.toString(operands.toArray())); } COSArray array = new COSArray(); array.addAll(operands); @@ -218,7 +234,7 @@ void setFontName(COSName fontName) /** * Returns the font. */ - PDFont getFont() throws IOException + PDFont getFont() { return font; } @@ -301,7 +317,7 @@ void copyNeededResourcesTo(PDAppearanceStream appearanceStream) throws IOExcepti appearanceStream.setResources(streamResources); } - if (streamResources.getFont(getFontName()) == null) + if (streamResources.getFont(fontName) == null) { streamResources.put(fontName, getFont()); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java index 1729ddd094b..7b2a49838aa 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java @@ -23,6 +23,9 @@ import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdmodel.common.COSArrayList; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.pdmodel.fdf.FDFField; import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions; @@ -131,7 +134,10 @@ else if (parent != null) * For {@link PDNonTerminalField} the list will be empty as non terminal fields * have no visual representation in the form. * - * @return A non-null string. + * @return a List of {@link PDAnnotationWidget} annotations. Be aware that this list is + * not backed by the actual widget collection of the field, so adding or deleting has no + * effect on the PDF document. For {@link PDTerminalField} you'd have to call + * {@link PDTerminalField#setWidgets(java.util.List) setWidgets()} with the modified list. */ public abstract List getWidgets(); @@ -155,8 +161,9 @@ public boolean isReadOnly() } /** - * sets the field to be required. - * + * sets the flag whether the field is to be required to have a value at the time it is exported + * by a submit-form action. + * * @param required The new flag for required. */ public void setRequired(boolean required) @@ -165,8 +172,8 @@ public void setRequired(boolean required) } /** - * - * @return true if the field is required + * @return true if the field is required to have a value at the time it is exported by a + * submit-form action. */ public boolean isRequired() { @@ -234,10 +241,37 @@ public PDFormFieldAdditionalActions getActions() void importFDF(FDFField fdfField) throws IOException { COSBase fieldValue = fdfField.getCOSValue(); - if (fieldValue != null) + + if (fieldValue != null && this instanceof PDTerminalField) + { + PDTerminalField currentField = (PDTerminalField) this; + + if (fieldValue instanceof COSName) + { + currentField.setValue(((COSName) fieldValue).getName()); + } + else if (fieldValue instanceof COSString) + { + currentField.setValue(((COSString) fieldValue).getString()); + } + else if (fieldValue instanceof COSStream) + { + currentField.setValue(((COSStream) fieldValue).toTextString()); + } + else if (fieldValue instanceof COSArray && this instanceof PDChoice) + { + ((PDChoice) this).setValue(COSArrayList.convertCOSStringCOSArrayToList((COSArray) fieldValue)); + } + else + { + throw new IOException("Error:Unknown type for field import" + fieldValue); + } + } + else if (fieldValue != null) { dictionary.setItem(COSName.V, fieldValue); } + Integer ff = fdfField.getFieldFlags(); if (ff != null) { @@ -352,13 +386,20 @@ public String getPartialName() { return dictionary.getString(COSName.T); } + /** * This will set the partial name of the field. * * @param name The new name for the field. + * @throws IllegalArgumentException If the name contains a period character. */ public void setPartialName(String name) { + if (name.contains(".")) + { + throw new IllegalArgumentException( + "A field partial name shall not contain a period character: " + name); + } dictionary.setString(COSName.T, name); } @@ -386,8 +427,10 @@ public String getFullyQualifiedName() } /** - * Gets the alternate name of the field. - * + * Gets the alternate name of the field ("shall be used in place of the actual field name + * wherever the field shall be identified in the user interface (such as in error or status + * messages referring to the field)"). + * * @return the alternate name of the field */ public String getAlternateFieldName() @@ -396,9 +439,12 @@ public String getAlternateFieldName() } /** - * This will set the alternate name of the field. - * - * @param alternateFieldName the alternate name of the field + * This will set the alternate name of the field ("shall be used in place of the actual field + * name wherever the field shall be identified in the user interface (such as in error or status + * messages referring to the field)"). The text appears as a tool tip in Adobe Reader. Because + * of the usage for error or status messages, it should be different for each field. + * + * @param alternateFieldName the alternate name of the field. */ public void setAlternateFieldName(String alternateFieldName) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldFactory.java index 5479652f04f..77dcd0a9d79 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldFactory.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldFactory.java @@ -17,13 +17,15 @@ package org.apache.pdfbox.pdmodel.interactive.form; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; /** * A PDField factory. */ -final class PDFieldFactory +public final class PDFieldFactory { private static final String FIELD_TYPE_TEXT = "Tx"; @@ -43,9 +45,30 @@ private PDFieldFactory() * @param parent the parent node of the node to be created * @return the corresponding PDField instance */ - static PDField createField(PDAcroForm form, COSDictionary field, PDNonTerminalField parent) + public static PDField createField(PDAcroForm form, COSDictionary field, PDNonTerminalField parent) { String fieldType = findFieldType(field); + + // Test if we have a non terminal field first as it might have + // properties which do apply to other fields + // A non terminal fields has Kids entries which do have + // a field name (other than annotations) + if (field.containsKey(COSName.KIDS)) + { + COSArray kids = (COSArray) field.getDictionaryObject(COSName.KIDS); + if (kids != null && kids.size() > 0) + { + for (int i = 0; i < kids.size(); i++) + { + COSBase kid = kids.getObject(i); + if (kid instanceof COSDictionary && ((COSDictionary) kid).getString(COSName.T) != null) + { + return new PDNonTerminalField(form, field, parent); + } + } + } + } + if (FIELD_TYPE_CHOICE.equals(fieldType)) { return createChoiceSubType(form, field, parent); @@ -62,10 +85,6 @@ else if (FIELD_TYPE_BUTTON.equals(fieldType)) { return createButtonSubType(form, field, parent); } - else if (field.containsKey(COSName.KIDS)) - { - return new PDNonTerminalField(form, field, parent); - } else { // an erroneous non-field object, see PDFBOX-2885 @@ -113,11 +132,10 @@ private static String findFieldType(COSDictionary dic) String retval = dic.getNameAsString(COSName.FT); if (retval == null) { - COSDictionary parent = (COSDictionary) dic.getDictionaryObject(COSName.PARENT, - COSName.P); - if (parent != null) + COSBase base = dic.getDictionaryObject(COSName.PARENT, COSName.P); + if (base instanceof COSDictionary) { - retval = findFieldType(parent); + retval = findFieldType((COSDictionary) base); } } return retval; diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldTree.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldTree.java index 0bc77e13977..d74f200332c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldTree.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldTree.java @@ -17,17 +17,26 @@ package org.apache.pdfbox.pdmodel.interactive.form; import java.util.ArrayDeque; +import java.util.Collections; +import java.util.HashSet; +import java.util.IdentityHashMap; import java.util.Iterator; import java.util.Queue; import java.util.List; import java.util.NoSuchElementException; +import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSDictionary; /** * The field tree. */ public class PDFieldTree implements Iterable { + private static final Log LOG = LogFactory.getLog(PDFieldTree.class); + private final PDAcroForm acroForm; /** @@ -60,6 +69,11 @@ private static final class FieldIterator implements Iterator { private final Queue queue = new ArrayDeque(); + // PDFBOX-5044: to prevent recursion + // must be COSDictionary and not PDField, because PDField is newly created each time + private final Set set = + Collections.newSetFromMap(new IdentityHashMap()); + private FieldIterator(PDAcroForm form) { List fields = form.getFields(); @@ -78,7 +92,8 @@ public boolean hasNext() @Override public PDField next() { - if(!hasNext()){ + if(!hasNext()) + { throw new NoSuchElementException(); } @@ -94,12 +109,21 @@ public void remove() private void enqueueKids(PDField node) { queue.add(node); + set.add(node.getCOSObject()); if (node instanceof PDNonTerminalField) { List kids = ((PDNonTerminalField) node).getChildren(); for (PDField kid : kids) { - enqueueKids(kid); + if (set.contains(kid.getCOSObject())) + { + LOG.error("Child of field '" + node.getFullyQualifiedName() + + "' already exists elsewhere, ignored to avoid recursion"); + } + else + { + enqueueKids(kid); + } } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDNonTerminalField.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDNonTerminalField.java index 342b8a92b95..25f31d0da49 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDNonTerminalField.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDNonTerminalField.java @@ -20,13 +20,14 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.COSArrayList; -import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.pdmodel.fdf.FDFField; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; @@ -42,6 +43,8 @@ */ public class PDNonTerminalField extends PDField { + private static final Log LOG = LogFactory.getLog(PDNonTerminalField.class); + /** * Constructor. * @@ -84,19 +87,19 @@ void importFDF(FDFField fdfField) throws IOException List fdfKids = fdfField.getKids(); List children = getChildren(); - for (int i = 0; fdfKids != null && i < fdfKids.size(); i++) + if (fdfKids == null) + { + return; + } + for (int i = 0; i < fdfKids.size(); i++) { - for (COSObjectable pdKid : children) + for (PDField pdChild : children) { - if (pdKid instanceof PDField) + FDFField fdfChild = fdfKids.get(i); + String fdfName = fdfChild.getPartialFieldName(); + if (fdfName != null && fdfName.equals(pdChild.getPartialName())) { - PDField pdChild = (PDField) pdKid; - FDFField fdfChild = fdfKids.get(i); - String fdfName = fdfChild.getPartialFieldName(); - if (fdfName != null && fdfName.equals(pdChild.getPartialName())) - { - pdChild.importFDF(fdfChild); - } + pdChild.importFDF(fdfChild); } } } @@ -110,7 +113,7 @@ FDFField exportFDF() throws IOException fdfField.setValue(getValue()); List children = getChildren(); - List fdfChildren = new ArrayList(); + List fdfChildren = new ArrayList(children.size()); for (PDField child : children) { fdfChildren.add(child.exportFDF()); @@ -122,19 +125,34 @@ FDFField exportFDF() throws IOException /** * Returns this field's children. These may be either terminal or non-terminal fields. - * - * @return he list of child fields. + * + * @return the list of child fields. Be aware that this list is not backed by the + * children of the field, so adding or deleting has no effect on the PDF document until you call + * {@link #setChildren(java.util.List) setChildren()} with the modified list. */ public List getChildren() { List children = new ArrayList(); - COSArray kids = (COSArray)getCOSObject().getDictionaryObject(COSName.KIDS); + COSArray kids = getCOSObject().getCOSArray(COSName.KIDS); + if (kids == null) + { + return children; + } for (int i = 0; i < kids.size(); i++) { - PDField field = PDField.fromDictionary(getAcroForm(), (COSDictionary)kids.getObject(i), this); - if (field != null) + COSBase kid = kids.getObject(i); + if (kid instanceof COSDictionary) { - children.add(field); + if (kid.getCOSObject() == this.getCOSObject()) + { + LOG.warn("Child field is same object as parent"); + continue; + } + PDField field = PDField.fromDictionary(getAcroForm(), (COSDictionary) kid, this); + if (field != null) + { + children.add(field); + } } } return children; @@ -152,7 +170,7 @@ public void setChildren(List children) } /** - * @inheritDoc + * {@inheritDoc} * *

    Note: while non-terminal fields do inherit field values, this method returns * the local value, without inheritance. @@ -164,7 +182,7 @@ public String getFieldType() } /** - * @inheritDoc + * Get the field value. * *

    Note: while non-terminal fields do inherit field values, this method returns * the local value, without inheritance. @@ -175,7 +193,7 @@ public COSBase getValue() } /** - * @inheritDoc + * {@inheritDoc} * *

    Note: while non-terminal fields do inherit field values, this method returns * the local value, without inheritance. @@ -193,6 +211,8 @@ public String getValueAsString() * *

    Note: while non-terminal fields do inherit field values, this method returns * the local value, without inheritance. + * @param object + * @throws java.io.IOException */ public void setValue(COSBase object) throws IOException { @@ -207,6 +227,7 @@ public void setValue(COSBase object) throws IOException * @param value Plain text * @throws IOException if the value could not be set */ + @Override public void setValue(String value) throws IOException { getCOSObject().setString(COSName.V, value); @@ -232,6 +253,7 @@ public COSBase getDefaultValue() * *

    Note: while non-terminal fields do inherit field values, this method returns * the local value, without inheritance. + * @param value */ public void setDefaultValue(COSBase value) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDPushButton.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDPushButton.java index 3db04c5c762..8f1de2fb99c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDPushButton.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDPushButton.java @@ -16,10 +16,13 @@ */ package org.apache.pdfbox.pdmodel.interactive.form; +import java.io.IOException; import java.util.Collections; import java.util.List; +import java.util.Set; import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; /** * A pushbutton is a purely interactive control that responds immediately to user @@ -37,7 +40,7 @@ public class PDPushButton extends PDButton public PDPushButton(PDAcroForm acroForm) { super(acroForm); - setPushButton(true); + getCOSObject().setFlag(COSName.FF, FLAG_PUSHBUTTON, true); } /** @@ -84,4 +87,16 @@ public String getValueAsString() { return getValue(); } + + @Override + public Set getOnValues() + { + return Collections.emptySet(); + } + + @Override + void constructAppearances() throws IOException + { + // TODO: add appearance handler to generate/update appearance + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDRadioButton.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDRadioButton.java index 402c59e4140..2bf00181f96 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDRadioButton.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDRadioButton.java @@ -23,6 +23,7 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; /** * Radio button fields contain a set of related buttons that can each be on or off. @@ -44,7 +45,7 @@ public final class PDRadioButton extends PDButton public PDRadioButton(PDAcroForm acroForm) { super(acroForm); - setRadioButton(true); + getCOSObject().setFlag(COSName.FF, FLAG_RADIO, true); } /** @@ -60,7 +61,7 @@ public PDRadioButton(PDAcroForm acroForm) } /** - * From the PDF Spec
    + * From the PDF Spec
    * If set, a group of radio buttons within a radio button field that use the same value for the on state will turn * on and off in unison; that is if one is checked, they are all checked. If clear, the buttons are mutually * exclusive (the same behavior as HTML radio buttons). @@ -81,12 +82,37 @@ public boolean isRadiosInUnison() return getCOSObject().getFlag(COSName.FF, FLAG_RADIOS_IN_UNISON); } + /** + * This will get the selected index. + *

    + * A RadioButton might have multiple same value options which are not selected jointly if + * they are not set in unison {@link #isRadiosInUnison()}.

    + * + *

    + * The method will return the first selected index or -1 if no option is selected.

    + * + * @return the first selected index or -1. + */ + public int getSelectedIndex() + { + int idx = 0; + for (PDAnnotationWidget widget : getWidgets()) + { + if (!COSName.Off.equals(widget.getAppearanceState())) + { + return idx; + } + idx ++; + } + return -1; + } + /** * This will get the selected export values. *

    * A RadioButton might have an export value to allow field values * which can not be encoded as PDFDocEncoding or for the same export value - * being assigned to multiple RadioButtons in a group.
    + * being assigned to multiple RadioButtons in a group.
    * To define an export value the RadioButton must define options {@link #setExportValues(List)} * which correspond to the individual items within the RadioButton.

    *

    @@ -116,6 +142,7 @@ public List getSelectedExportValues() throws IOException { selectedExportValues.add(exportValues.get(idx)); } + ++idx; } return selectedExportValues; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDSignatureField.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDSignatureField.java index 17a91e97ce7..370bedf05b2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDSignatureField.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDSignatureField.java @@ -19,6 +19,10 @@ import java.io.IOException; import java.util.HashSet; import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; @@ -34,6 +38,8 @@ */ public class PDSignatureField extends PDTerminalField { + private static final Log LOG = LogFactory.getLog(PDSignatureField.class); + /** * @see PDTerminalField#PDTerminalField(PDAcroForm) * @@ -70,17 +76,13 @@ public PDSignatureField(PDAcroForm acroForm) throws IOException private String generatePartialName() { String fieldName = "Signature"; - Set sigNames = new HashSet(); - // fixme: this ignores non-terminal fields, so will miss any descendant signatures - for (PDField field : getAcroForm().getFields()) + Set nameSet = new HashSet(); + for (PDField field : getAcroForm().getFieldTree()) { - if(field instanceof PDSignatureField) - { - sigNames.add(field.getPartialName()); - } + nameSet.add(field.getPartialName()); } int i = 1; - while(sigNames.contains(fieldName+i)) + while (nameSet.contains(fieldName + i)) { ++i; } @@ -120,19 +122,17 @@ public void setValue(PDSignature value) throws IOException getCOSObject().setItem(COSName.V, value); applyChange(); } - + /** - * Sets the value of this field. - * - * This will throw an UnsupportedOperationException if used as the signature fields - * value can't be set using a String - * + * This will throw an UnsupportedOperationException if used as the signature fields value + * can't be set using a String + * * @param value the plain text value. - * + * * @throws UnsupportedOperationException in all cases! */ @Override - public void setValue(String value) throws UnsupportedOperationException + public void setValue(String value) { throw new UnsupportedOperationException("Signature fields don't support setting the value as String " + "- use setValue(PDSignature value) instead"); @@ -157,11 +157,11 @@ public void setDefaultValue(PDSignature value) throws IOException public PDSignature getValue() { COSBase value = getCOSObject().getDictionaryObject(COSName.V); - if (value == null) + if (value instanceof COSDictionary) { - return null; + return new PDSignature((COSDictionary) value); } - return new PDSignature((COSDictionary)value); + return null; } /** @@ -233,8 +233,11 @@ void constructAppearances() throws IOException return; } - // TODO: implement appearance generation for signatures - throw new UnsupportedOperationException("not implemented"); + // TODO: implement appearance generation for signatures (PDFBOX-3524) + LOG.warn("Appearance generation for signature fields not implemented here. " + + "You need to generate/update that manually, see the " + + "CreateVisibleSignature*.java files in the examples subproject " + + "of the source code download"); } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDTerminalField.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDTerminalField.java index 5a9b0515119..f2986e9350a 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDTerminalField.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDTerminalField.java @@ -155,8 +155,11 @@ FDFField exportFDF() throws IOException /** * Returns the widget annotations associated with this field. - * - * @return The list of widget annotations. + * + * @return The list of widget annotations. Be aware that this list is not backed by the + * actual widget collection of the field, so adding or deleting has no effect on the PDF + * document until you call {@link #setWidgets(java.util.List) setWidgets()} with the modified + * list. */ @Override public List getWidgets() @@ -192,6 +195,10 @@ public void setWidgets(List children) { COSArray kidsArray = COSArrayList.converterToCOSArray(children); getCOSObject().setItem(COSName.KIDS, kidsArray); + for (PDAnnotationWidget widget : children) + { + widget.getCOSObject().setItem(COSName.PARENT, this); + } } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDTextField.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDTextField.java index bc399d24e30..7c677095157 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDTextField.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDTextField.java @@ -206,8 +206,9 @@ public void setMaxLen(int maxLen) } /** - * Sets the plain text value of this field. - * + * Sets the plain text value of this field. It also tries to update the visual appearance, + * unless {@link PDAcroForm#getNeedAppearances()} is true. + * * @param value Plain text * @throws IOException if the value could not be set */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDVariableText.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDVariableText.java index 248f4b93a1a..547b4f62da7 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDVariableText.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDVariableText.java @@ -34,9 +34,9 @@ */ public abstract class PDVariableText extends PDTerminalField { - static final int QUADDING_LEFT = 0; - static final int QUADDING_CENTERED = 1; - static final int QUADDING_RIGHT = 2; + public static final int QUADDING_LEFT = 0; + public static final int QUADDING_CENTERED = 1; + public static final int QUADDING_RIGHT = 2; /** * @see PDTerminalField#PDTerminalField(PDAcroForm) @@ -96,11 +96,21 @@ PDDefaultAppearanceString getDefaultAppearanceString() throws IOException /** * Set the default appearance. * - * This will set the local default appearance for the variable text field only not + * This will set the local default appearance for the variable text field only, not * affecting a default appearance in the parent hierarchy. * * Providing null as the value will remove the local default appearance. - * + *

    + * This method can also be used to change the font of a field, by replacing the font name from + * this string with another font name found in the AcroForm default resources before + * calling {@link #setValue(java.lang.String) setValue(String)}, see also + * this + * stackoverflow answer. For example, "/Helv 10 Tf 0 g" can be replaced with "/F1 10 Tf 0 + * g". Performance may go down (see + * PDFBOX-4508) if this is done + * for many fields and with a very large font (e.g. ArialUni); to avoid this, save and reload + * the file after changing all fields. + * * @param daValue a string describing the default appearance */ public void setDefaultAppearance(String daValue) @@ -145,10 +155,10 @@ public void setDefaultStyleString(String defaultStyleString) * This will get the 'quadding' or justification of the text to be displayed. * * This is an inheritable attribute. - * - * 0 - Left(default)
    - * 1 - Centered
    - * 2 - Right
    + *
    + * 0 - Left (default)
    + * 1 - Centered
    + * 2 - Right
    * Please see the QUADDING_CONSTANTS. * * @return The justification of the text strings. @@ -193,7 +203,7 @@ public String getRichTextValue() throws IOException *

    * Setting the rich text value will not generate the appearance * for the field. - *
    + *
    * You can set {@link PDAcroForm#setNeedAppearances(Boolean)} to * signal a conforming reader to generate the appearance stream. *

    diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDXFAResource.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDXFAResource.java index aab6bcc7195..46eb01d7b08 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDXFAResource.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDXFAResource.java @@ -21,8 +21,6 @@ import java.io.IOException; import java.io.InputStream; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.pdfbox.cos.COSArray; @@ -149,9 +147,7 @@ else if (xfa.getCOSObject() instanceof COSStream) */ public Document getDocument() throws ParserConfigurationException, SAXException, IOException { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); - DocumentBuilder builder = factory.newDocumentBuilder(); - return builder.parse(new ByteArrayInputStream(this.getBytes())); + return org.apache.pdfbox.util.XMLUtil // + .parse(new ByteArrayInputStream(this.getBytes()), true); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java index 08315969a3b..e78dee1eb33 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java @@ -21,7 +21,6 @@ import java.text.BreakIterator; import java.text.AttributedCharacterIterator.Attribute; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.pdfbox.pdmodel.font.PDFont; @@ -51,11 +50,24 @@ class PlainText */ PlainText(String textValue) { - List parts = Arrays.asList(textValue.split("\\n")); - paragraphs = new ArrayList(); - for (String part : parts) + if (textValue.isEmpty()) { - paragraphs.add(new Paragraph(part)); + paragraphs = new ArrayList(1); + paragraphs.add(new Paragraph("")); + } + else + { + String[] parts = textValue.replace('\t', ' ').split("\\r\\n|\\n|\\r|\\u2028|\\u2029"); + paragraphs = new ArrayList(parts.length); + for (String part : parts) + { + // Acrobat prints a space for an empty paragraph + if (part.length() == 0) + { + part = " "; + } + paragraphs.add(new Paragraph(part)); + } } } @@ -167,6 +179,9 @@ List getLines(PDFont font, float fontSize, float width) throws IOException { String word = textContent.substring(start,end); float wordWidth = font.getStringWidth(word) * scale; + + boolean wordNeedsSplit = false; + int splitOffset = end - start; lineWidth = lineWidth + wordWidth; @@ -177,7 +192,7 @@ List getLines(PDFont font, float fontSize, float width) throws IOException lineWidth = lineWidth - whitespaceWidth; } - if (lineWidth >= width) + if (lineWidth >= width && !textLine.getWords().isEmpty()) { textLine.setWidth(textLine.calculateWidth(font, fontSize)); textLines.add(textLine); @@ -185,13 +200,40 @@ List getLines(PDFont font, float fontSize, float width) throws IOException lineWidth = font.getStringWidth(word) * scale; } + if (wordWidth > width && textLine.getWords().isEmpty()) + { + // single word does not fit into width + wordNeedsSplit = true; + while (true) + { + splitOffset--; + String substring = word.substring(0, splitOffset); + float substringWidth = font.getStringWidth(substring) * scale; + if (substringWidth < width) + { + word = substring; + wordWidth = font.getStringWidth(word) * scale; + lineWidth = wordWidth; + break; + } + } + } + AttributedString as = new AttributedString(word); as.addAttribute(TextAttribute.WIDTH, wordWidth); Word wordInstance = new Word(word); wordInstance.setAttributes(as); textLine.addWord(wordInstance); - start = end; - end = iterator.next(); + + if (wordNeedsSplit) + { + start = start + splitOffset; + } + else + { + start = end; + end = iterator.next(); + } } textLine.setWidth(textLine.calculateWidth(font, fontSize)); textLines.add(textLine); @@ -221,16 +263,18 @@ float calculateWidth(PDFont font, float fontSize) throws IOException { final float scale = fontSize/FONTSCALE; float calculatedWidth = 0f; + int indexOfWord = 0; for (Word word : words) { calculatedWidth = calculatedWidth + (Float) word.getAttributes().getIterator().getAttribute(TextAttribute.WIDTH); String text = word.getText(); - if (words.indexOf(word) == words.size() -1 && Character.isWhitespace(text.charAt(text.length()-1))) + if (indexOfWord == words.size() -1 && Character.isWhitespace(text.charAt(text.length()-1))) { float whitespaceWidth = font.getStringWidth(text.substring(text.length()-1)) * scale; calculatedWidth = calculatedWidth - whitespaceWidth; } + ++indexOfWord; } return calculatedWidth; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/ScriptingHandler.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/ScriptingHandler.java new file mode 100644 index 00000000000..5e9b618d835 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/ScriptingHandler.java @@ -0,0 +1,57 @@ +package org.apache.pdfbox.pdmodel.interactive.form; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript; + +public interface ScriptingHandler +{ + /** + * Handle the fields keyboard event action. + * + * @param javaScriptAction the keyboard event action script + * @param value the current field value + * @return the resulting field value + */ + String keyboard(PDActionJavaScript javaScriptAction, String value); + + /** + * Handle the fields format event action. + * + * @param javaScriptAction the format event action script + * @param value the current field value + * @return the formatted field value + */ + String format(PDActionJavaScript javaScriptAction, String value); + + /** + * Handle the fields validate event action. + * + * @param javaScriptAction the validate event action script + * @param value the current field value + * @return the result of the validity check + */ + boolean validate(PDActionJavaScript javaScriptAction, String value); + + /** + * Handle the fields calculate event action. + * + * @param javaScriptAction the calculate event action script + * @param value the current field value + * @return the result of the field calculation + */ + String calculate(PDActionJavaScript javaScriptAction, String value); +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/package.html index a901b5f5cee..958be8cbcdf 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/PDNumberFormatDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/PDNumberFormatDictionary.java index 1abbf83f0cc..b26c98e8d7c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/PDNumberFormatDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/PDNumberFormatDictionary.java @@ -153,7 +153,8 @@ public String getFractionalDisplay() /** * This will set the value for the manner to display a fractional value. - * Allowed values are "D", "F", "R" and "T" + * Allowed values are "D", "F", "R", "T" and null. + * * @param fractionalDisplay the manner to display a fractional value */ public void setFractionalDisplay(String fractionalDisplay) @@ -301,9 +302,9 @@ public String getLabelPositionToValue() } /** - * This will set the value indicating the ordering of the label specified by U to the calculated unit value. - * Possible values are "S" and "P" - * + * This will set the value indicating the ordering of the label specified by U to the calculated + * unit value. Possible values are "S", "P" and null. + * * @param labelPositionToValue label position */ public void setLabelPositionToValue(String labelPositionToValue) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/PDViewportDictionary.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/PDViewportDictionary.java index 4f62415c56d..4958d208639 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/PDViewportDictionary.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/PDViewportDictionary.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.pdmodel.interactive.measurement; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.COSObjectable; @@ -30,11 +31,11 @@ public class PDViewportDictionary implements COSObjectable { /** - * The type of this annotation. + * The type of this object. */ public static final String TYPE = "Viewport"; - private COSDictionary viewportDictionary; + private final COSDictionary viewportDictionary; /** * Constructor. @@ -83,10 +84,10 @@ public String getType() */ public PDRectangle getBBox() { - COSArray bbox = (COSArray)this.getCOSObject().getDictionaryObject("BBox"); - if (bbox != null) + COSBase bbox = this.getCOSObject().getDictionaryObject(COSName.BBOX); + if (bbox instanceof COSArray) { - return new PDRectangle(bbox); + return new PDRectangle((COSArray) bbox); } return null; } @@ -98,7 +99,7 @@ public PDRectangle getBBox() */ public void setBBox(PDRectangle rectangle) { - this.getCOSObject().setItem("BBox", rectangle); + this.getCOSObject().setItem(COSName.BBOX, rectangle); } /** @@ -128,10 +129,10 @@ public void setName(String name) */ public PDMeasureDictionary getMeasure() { - COSDictionary measure = (COSDictionary)this.getCOSObject().getDictionaryObject("Measure"); - if (measure != null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.MEASURE); + if (base instanceof COSDictionary) { - return new PDMeasureDictionary(measure); + return new PDMeasureDictionary((COSDictionary) base); } return null; } @@ -143,7 +144,7 @@ public PDMeasureDictionary getMeasure() */ public void setMeasure(PDMeasureDictionary measure) { - this.getCOSObject().setItem("Measure", measure); + this.getCOSObject().setItem(COSName.MEASURE, measure); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/package.html index 1b91e603e55..7e975d9e494 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/measurement/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransition.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransition.java index 530b734284e..7640483d990 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransition.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransition.java @@ -44,7 +44,7 @@ public PDTransition() /** * creates a new transition with the given style. * - * @param style + * @param style the style to be used. */ public PDTransition(PDTransitionStyle style) { @@ -56,7 +56,7 @@ public PDTransition(PDTransitionStyle style) /** * creates a new transition for an existing dictionary * - * @param dictionary + * @param dictionary the dictionary to be used. */ public PDTransition(COSDictionary dictionary) { @@ -85,6 +85,9 @@ public String getDimension() /** * Sets the dimension in which the specified transition effect shall occur. Only for {@link PDTransitionStyle#Split} * and {@link PDTransitionStyle#Blinds}. + * + * @param dimension the dimension. + * */ public void setDimension(PDTransitionDimension dimension) { @@ -104,6 +107,8 @@ public String getMotion() /** * Sets the direction of motion for the specified transition effect. Only for {@link PDTransitionStyle#Split}, * {@link PDTransitionStyle#Blinds} and {@link PDTransitionStyle#Fly}. + * + * @param motion the motion for the specified effect. */ public void setMotion(PDTransitionMotion motion) { @@ -114,6 +119,8 @@ public void setMotion(PDTransitionMotion motion) * @return the direction in which the specified transition effect shall moves. It can be either a {@link COSInteger} * or {@link COSName#NONE}. Default to {@link COSInteger#ZERO} * @see PDTransitionDirection + * + * @return the direction. */ public COSBase getDirection() { @@ -129,6 +136,8 @@ public COSBase getDirection() * Sets the direction in which the specified transition effect shall moves. Only for {@link PDTransitionStyle#Wipe}, * {@link PDTransitionStyle#Glitter}, {@link PDTransitionStyle#Fly}, {@link PDTransitionStyle#Cover}, * {@link PDTransitionStyle#Uncover} and {@link PDTransitionStyle#Push}. + * + * @param direction the direction in which the specified transition effect shall move. */ public void setDirection(PDTransitionDirection direction) { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionDimension.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionDimension.java index 4024a252922..fdc597a5c6e 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionDimension.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionDimension.java @@ -32,6 +32,6 @@ public enum PDTransitionDimension /** * Vertical */ - V; + V } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionMotion.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionMotion.java index 7de02b65ffa..5a53402df2b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionMotion.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionMotion.java @@ -32,5 +32,5 @@ public enum PDTransitionMotion /** * Outward from the center of the page */ - O; + O } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionStyle.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionStyle.java index 283e1540411..a9d8d9b3239 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionStyle.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/PDTransitionStyle.java @@ -25,5 +25,5 @@ */ public enum PDTransitionStyle { - Split, Blinds, Box, Wipe, Dissolve, Glitter, R, Fly, Push, Cover, Uncover, Fade; + Split, Blinds, Box, Wipe, Dissolve, Glitter, R, Fly, Push, Cover, Uncover, Fade } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/package.html index 8f299ac0642..4216618ad14 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/pagenavigation/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/viewerpreferences/PDViewerPreferences.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/viewerpreferences/PDViewerPreferences.java index 24ea2a79f9d..9c26b66b4c2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/viewerpreferences/PDViewerPreferences.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/viewerpreferences/PDViewerPreferences.java @@ -57,7 +57,7 @@ public class PDViewerPreferences implements COSObjectable /** * Enumeration containing all valid values for NonFullScreenPageMode. */ - public static enum NON_FULL_SCREEN_PAGE_MODE + public enum NON_FULL_SCREEN_PAGE_MODE { /** * From PDF Reference: "Neither document outline nor thumbnail images visible". @@ -92,7 +92,7 @@ public static enum NON_FULL_SCREEN_PAGE_MODE /** * Enumeration containing all valid values for ReadingDirection. */ - public static enum READING_DIRECTION + public enum READING_DIRECTION { /** * left to right. @@ -137,7 +137,7 @@ public static enum READING_DIRECTION /** * Enumeration containing all valid values for boundaries. */ - public static enum BOUNDARY + public enum BOUNDARY { /** * use media box as boundary. @@ -164,7 +164,7 @@ public static enum BOUNDARY /** * Enumeration containing all valid values for duplex. */ - public static enum DUPLEX + public enum DUPLEX { /** * simplex printing. @@ -183,7 +183,7 @@ public static enum DUPLEX /** * Enumeration containing all valid values for printscaling. */ - public static enum PRINT_SCALING + public enum PRINT_SCALING { /** * no scaling. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/viewerpreferences/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/viewerpreferences/package.html index a67744394c4..88e797c0bb9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/viewerpreferences/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/viewerpreferences/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/package.html b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/package.html index 7ea3c42bda6..a8395f2bc73 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/printing/PDFPageable.java b/pdfbox/src/main/java/org/apache/pdfbox/printing/PDFPageable.java index 3179428da75..1c311b63a8d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/printing/PDFPageable.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/printing/PDFPageable.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.printing; +import java.awt.RenderingHints; import java.awt.print.Book; import java.awt.print.PageFormat; import java.awt.print.Paper; @@ -33,9 +34,12 @@ public final class PDFPageable extends Book { private final PDDocument document; + private final int numberOfPages; private final boolean showPageBorder; private final float dpi; private final Orientation orientation; + private boolean subsamplingAllowed = false; + private RenderingHints renderingHints = null; /** * Creates a new PDFPageable. @@ -87,12 +91,63 @@ public PDFPageable(PDDocument document, Orientation orientation, boolean showPag this.orientation = orientation; this.showPageBorder = showPageBorder; this.dpi = dpi; + numberOfPages = document.getNumberOfPages(); + } + + /** + * Get the rendering hints. + * + * @return the rendering hints or null if none are set. + */ + public RenderingHints getRenderingHints() + { + return renderingHints; + } + + /** + * Set the rendering hints. Use this to influence rendering quality and speed. If you don't set + * them yourself or pass null, PDFBox will decide at runtime depending on the + * destination. + * + * @param renderingHints + */ + public void setRenderingHints(RenderingHints renderingHints) + { + this.renderingHints = renderingHints; + } + + /** + * Value indicating if the renderer is allowed to subsample images before drawing, according to + * image dimensions and requested scale. + * + * Subsampling may be faster and less memory-intensive in some cases, but it may also lead to + * loss of quality, especially in images with high spatial frequency. + * + * @return true if subsampling of images is allowed, false otherwise. + */ + public boolean isSubsamplingAllowed() + { + return subsamplingAllowed; + } + + /** + * Sets a value instructing the renderer whether it is allowed to subsample images before + * drawing. The subsampling frequency is determined according to image size and requested scale. + * + * Subsampling may be faster and less memory-intensive in some cases, but it may also lead to + * loss of quality, especially in images with high spatial frequency. + * + * @param subsamplingAllowed The new value indicating if subsampling is allowed. + */ + public void setSubsamplingAllowed(boolean subsamplingAllowed) + { + this.subsamplingAllowed = subsamplingAllowed; } @Override public int getNumberOfPages() { - return document.getNumberOfPages(); + return numberOfPages; } /** @@ -137,24 +192,19 @@ public PageFormat getPageFormat(int pageIndex) format.setPaper(paper); // auto portrait/landscape - if (orientation == Orientation.AUTO) + switch (orientation) { - if (isLandscape) - { + case AUTO: + format.setOrientation(isLandscape ? PageFormat.LANDSCAPE : PageFormat.PORTRAIT); + break; + case LANDSCAPE: format.setOrientation(PageFormat.LANDSCAPE); - } - else - { + break; + case PORTRAIT: format.setOrientation(PageFormat.PORTRAIT); - } - } - else if (orientation == Orientation.LANDSCAPE) - { - format.setOrientation(PageFormat.LANDSCAPE); - } - else if (orientation == Orientation.PORTRAIT) - { - format.setOrientation(PageFormat.PORTRAIT); + break; + default: + break; } return format; @@ -163,10 +213,13 @@ else if (orientation == Orientation.PORTRAIT) @Override public Printable getPrintable(int i) { - if (i >= getNumberOfPages()) + if (i >= numberOfPages) { - throw new IndexOutOfBoundsException(i + " >= " + getNumberOfPages()); + throw new IndexOutOfBoundsException(i + " >= " + numberOfPages); } - return new PDFPrintable(document, Scaling.ACTUAL_SIZE, showPageBorder, dpi); + PDFPrintable printable = new PDFPrintable(document, Scaling.ACTUAL_SIZE, showPageBorder, dpi); + printable.setSubsamplingAllowed(subsamplingAllowed); + printable.setRenderingHints(renderingHints); + return printable; } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/printing/PDFPrintable.java b/pdfbox/src/main/java/org/apache/pdfbox/printing/PDFPrintable.java index f9480ed0744..d0e4c4580fe 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/printing/PDFPrintable.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/printing/PDFPrintable.java @@ -21,6 +21,7 @@ import java.awt.Color; import java.awt.Graphics; import java.awt.Graphics2D; +import java.awt.RenderingHints; import java.awt.geom.AffineTransform; import java.awt.image.BufferedImage; import java.awt.print.PageFormat; @@ -30,8 +31,10 @@ import java.io.IOException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageTree; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.rendering.RenderDestination; /** * Prints pages from a PDF document using any page size or scaling mode. @@ -40,13 +43,15 @@ */ public final class PDFPrintable implements Printable { - private final PDDocument document; + private final PDPageTree pageTree; private final PDFRenderer renderer; private final boolean showPageBorder; private final Scaling scaling; private final float dpi; private final boolean center; + private boolean subsamplingAllowed = false; + private RenderingHints renderingHints = null; /** * Creates a new PDFPrintable. @@ -108,19 +113,86 @@ public PDFPrintable(PDDocument document, Scaling scaling, boolean showPageBorder public PDFPrintable(PDDocument document, Scaling scaling, boolean showPageBorder, float dpi, boolean center) { - this.document = document; - this.renderer = new PDFRenderer(document); + this(document, scaling, showPageBorder, dpi, center, new PDFRenderer(document)); + } + + /** + * Creates a new PDFPrintable with the given page scaling and with optional page borders shown. + * The image will be rasterized at the given DPI before being sent to the printer. + * + * @param document the document to print + * @param scaling page scaling policy + * @param showPageBorder true if page borders are to be printed + * @param dpi if non-zero then the image will be rasterized at the given DPI + * @param center true if the content is to be centered on the page (otherwise top-left). + * @param renderer the document renderer. Useful if {@link PDFRenderer} has been subclassed. + */ + public PDFPrintable(PDDocument document, Scaling scaling, boolean showPageBorder, float dpi, + boolean center, PDFRenderer renderer) + { + this.pageTree = document.getPages(); + this.renderer = renderer; this.scaling = scaling; this.showPageBorder = showPageBorder; this.dpi = dpi; this.center = center; } - + + /** + * Value indicating if the renderer is allowed to subsample images before drawing, according to + * image dimensions and requested scale. + * + * Subsampling may be faster and less memory-intensive in some cases, but it may also lead to + * loss of quality, especially in images with high spatial frequency. + * + * @return true if subsampling of images is allowed, false otherwise. + */ + public boolean isSubsamplingAllowed() + { + return subsamplingAllowed; + } + + /** + * Sets a value instructing the renderer whether it is allowed to subsample images before + * drawing. The subsampling frequency is determined according to image size and requested scale. + * + * Subsampling may be faster and less memory-intensive in some cases, but it may also lead to + * loss of quality, especially in images with high spatial frequency. + * + * @param subsamplingAllowed The new value indicating if subsampling is allowed. + */ + public void setSubsamplingAllowed(boolean subsamplingAllowed) + { + this.subsamplingAllowed = subsamplingAllowed; + } + + /** + * Get the rendering hints. + * + * @return the rendering hints or null if none are set. + */ + public RenderingHints getRenderingHints() + { + return renderingHints; + } + + /** + * Set the rendering hints. Use this to influence rendering quality and speed. If you don't set + * them yourself or pass null, PDFBox will decide at runtime depending on the + * destination. + * + * @param renderingHints + */ + public void setRenderingHints(RenderingHints renderingHints) + { + this.renderingHints = renderingHints; + } + @Override public int print(Graphics graphics, PageFormat pageFormat, int pageIndex) throws PrinterException { - if (pageIndex < 0 || pageIndex >= document.getNumberOfPages()) + if (pageIndex < 0 || pageIndex >= pageTree.getCount()) { return NO_SUCH_PAGE; } @@ -128,7 +200,7 @@ public int print(Graphics graphics, PageFormat pageFormat, int pageIndex) { Graphics2D graphics2D = (Graphics2D)graphics; - PDPage page = document.getPage(pageIndex); + PDPage page = pageTree.get(pageIndex); PDRectangle cropBox = getRotatedCropBox(page); // the imageable area is the area within the page margins @@ -148,6 +220,12 @@ public int print(Graphics graphics, PageFormat pageFormat, int pageIndex) { scale = 1; } + + // only stretch to fit when enabled + if (scale < 1 && scaling == Scaling.STRETCH_TO_FIT) + { + scale = 1; + } } // set the graphics origin to the origin of the imageable area (i.e the margins) @@ -166,8 +244,8 @@ public int print(Graphics graphics, PageFormat pageFormat, int pageIndex) if (dpi > 0) { float dpiScale = dpi / 72; - image = new BufferedImage((int)(imageableWidth * dpiScale), - (int)(imageableHeight * dpiScale), + image = new BufferedImage((int)(imageableWidth * dpiScale / scale), + (int)(imageableHeight * dpiScale / scale), BufferedImage.TYPE_INT_ARGB); printerGraphics = graphics2D; @@ -181,7 +259,9 @@ public int print(Graphics graphics, PageFormat pageFormat, int pageIndex) // draw to graphics using PDFRender AffineTransform transform = (AffineTransform)graphics2D.getTransform().clone(); graphics2D.setBackground(Color.WHITE); - renderer.renderPageToGraphics(pageIndex, graphics2D, (float)scale); + renderer.setSubsamplingAllowed(subsamplingAllowed); + renderer.setRenderingHints(renderingHints); + renderer.renderPageToGraphics(pageIndex, graphics2D, (float) scale, (float) scale, RenderDestination.PRINT); // draw crop box if (showPageBorder) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/GroupGraphics.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/GroupGraphics.java new file mode 100644 index 00000000000..3025f257238 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/GroupGraphics.java @@ -0,0 +1,730 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.rendering; + +import java.awt.Color; +import java.awt.Composite; +import java.awt.Font; +import java.awt.FontMetrics; +import java.awt.Graphics; +import java.awt.Graphics2D; +import java.awt.GraphicsConfiguration; +import java.awt.Image; +import java.awt.Paint; +import java.awt.Rectangle; +import java.awt.RenderingHints; +import java.awt.Shape; +import java.awt.Stroke; +import java.awt.font.FontRenderContext; +import java.awt.font.GlyphVector; +import java.awt.geom.AffineTransform; +import java.awt.image.BufferedImage; +import java.awt.image.BufferedImageOp; +import java.awt.image.DataBuffer; +import java.awt.image.DataBufferInt; +import java.awt.image.ImageObserver; +import java.awt.image.RenderedImage; +import java.awt.image.renderable.RenderableImage; +import java.text.AttributedCharacterIterator; +import java.util.Map; + +/** + * Graphics implementation for non-isolated transparency groups. + *

    + * Non-isolated groups require that the group backdrop (copied from parent group or + * page) is used as the initial contents of the image to which the group is rendered. + * This allows blend modes to blend the group contents with the graphics behind + * the group. Finally when the group rendering is done, backdrop removal must be + * computed (see {@link #removeBackdrop(java.awt.image.BufferedImage, int, int) removeBackdrop}). + * It ensures the backdrop is not rendered twice on the parent but it leaves the + * effects of blend modes. + *

    + * This class renders the group contents to two images. groupImage is + * initialized with the backdrop and group contents are drawn over it. + * groupAlphaImage is initially fully transparent and it accumulates + * the total alpha of the group contents excluding backdrop. + *

    + * If a non-isolated group uses only the blend mode Normal, it can be optimized + * and rendered like an isolated group; backdrop usage and removal are not needed. + */ + +class GroupGraphics extends Graphics2D +{ + private final BufferedImage groupImage; + private final BufferedImage groupAlphaImage; + private final Graphics2D groupG2D; + private final Graphics2D alphaG2D; + + GroupGraphics(BufferedImage groupImage, Graphics2D groupGraphics) + { + this.groupImage = groupImage; + this.groupG2D = groupGraphics; + this.groupAlphaImage = new BufferedImage(groupImage.getWidth(), groupImage.getHeight(), + BufferedImage.TYPE_INT_ARGB); + this.alphaG2D = groupAlphaImage.createGraphics(); + } + + private GroupGraphics(BufferedImage groupImage, Graphics2D groupGraphics, + BufferedImage groupAlphaImage, Graphics2D alphaGraphics) + { + this.groupImage = groupImage; + this.groupG2D = groupGraphics; + this.groupAlphaImage = groupAlphaImage; + this.alphaG2D = alphaGraphics; + } + + @Override + public void clearRect(int x, int y, int width, int height) + { + groupG2D.clearRect(x, y, width, height); + alphaG2D.clearRect(x, y, width, height); + } + + @Override + public void clipRect(int x, int y, int width, int height) + { + groupG2D.clipRect(x, y, width, height); + alphaG2D.clipRect(x, y, width, height); + } + + @Override + public void copyArea(int x, int y, int width, int height, int dx, int dy) + { + groupG2D.copyArea(x, y, width, height, dx, dy); + alphaG2D.copyArea(x, y, width, height, dx, dy); + } + + @Override + public Graphics create() + { + Graphics g = groupG2D.create(); + Graphics a = alphaG2D.create(); + if (g instanceof Graphics2D && a instanceof Graphics2D) + { + return new GroupGraphics(groupImage, (Graphics2D)g, groupAlphaImage, (Graphics2D)a); + } + g.dispose(); + a.dispose(); + throw new UnsupportedOperationException("Only Graphics2D supported by this method"); + } + + @Override + public void dispose() + { + groupG2D.dispose(); + alphaG2D.dispose(); + } + + @Override + public void drawArc(int x, int y, int width, int height, int startAngle, int arcAngle) + { + groupG2D.drawArc(x, y, width, height, startAngle, arcAngle); + alphaG2D.drawArc(x, y, width, height, startAngle, arcAngle); + } + + @Override + public boolean drawImage(Image img, int x, int y, Color bgcolor, ImageObserver observer) + { + groupG2D.drawImage(img, x, y, bgcolor, observer); + return alphaG2D.drawImage(img, x, y, bgcolor, observer); + } + + @Override + public boolean drawImage(Image img, int x, int y, ImageObserver observer) + { + groupG2D.drawImage(img, x, y, observer); + return alphaG2D.drawImage(img, x, y, observer); + } + + @Override + public boolean drawImage(Image img, int x, int y, int width, int height, + Color bgcolor, ImageObserver observer) + { + groupG2D.drawImage(img, x, y, width, height, bgcolor, observer); + return alphaG2D.drawImage(img, x, y, width, height, bgcolor, observer); + } + + @Override + public boolean drawImage(Image img, int x, int y, int width, int height, ImageObserver observer) + { + groupG2D.drawImage(img, x, y, width, height, observer); + return alphaG2D.drawImage(img, x, y, width, height, observer); + } + + @Override + public boolean drawImage(Image img, int dx1, int dy1, int dx2, int dy2, int sx1, + int sy1, int sx2, int sy2, Color bgcolor, ImageObserver observer) + { + groupG2D.drawImage(img, dx1, dy1, dx2, dy2, sx1, sy1, sx2, sy2, bgcolor, observer); + return alphaG2D.drawImage(img, dx1, dy1, dx2, dy2, sx1, sy1, sx2, sy2, bgcolor, observer); + } + + @Override + public boolean drawImage(Image img, int dx1, int dy1, int dx2, int dy2, int sx1, + int sy1, int sx2, int sy2, ImageObserver observer) + { + groupG2D.drawImage(img, dx1, dy1, dx2, dy2, sx1, sy1, sx2, sy2, observer); + return alphaG2D.drawImage(img, dx1, dy1, dx2, dy2, sx1, sy1, sx2, sy2, observer); + } + + @Override + public void drawLine(int x1, int y1, int x2, int y2) + { + groupG2D.drawLine(x1, y1, x2, y2); + alphaG2D.drawLine(x1, y1, x2, y2); + } + + @Override + public void drawOval(int x, int y, int width, int height) + { + groupG2D.drawOval(x, y, width, height); + alphaG2D.drawOval(x, y, width, height); + } + + @Override + public void drawPolygon(int[] xPoints, int[] yPoints, int nPoints) + { + groupG2D.drawPolygon(xPoints, yPoints, nPoints); + alphaG2D.drawPolygon(xPoints, yPoints, nPoints); + } + + @Override + public void drawPolyline(int[] xPoints, int[] yPoints, int nPoints) + { + groupG2D.drawPolyline(xPoints, yPoints, nPoints); + alphaG2D.drawPolyline(xPoints, yPoints, nPoints); + } + + @Override + public void drawRoundRect(int x, int y, int width, int height, int arcWidth, int arcHeight) + { + groupG2D.drawRoundRect(x, y, width, height, arcWidth, arcHeight); + alphaG2D.drawRoundRect(x, y, width, height, arcWidth, arcHeight); + } + + @Override + public void drawString(AttributedCharacterIterator iterator, int x, int y) + { + groupG2D.drawString(iterator, x, y); + alphaG2D.drawString(iterator, x, y); + } + + @Override + public void drawString(String str, int x, int y) + { + groupG2D.drawString(str, x, y); + alphaG2D.drawString(str, x, y); + } + + @Override + public void fillArc(int x, int y, int width, int height, int startAngle, int arcAngle) + { + groupG2D.fillArc(x, y, width, height, startAngle, arcAngle); + alphaG2D.fillArc(x, y, width, height, startAngle, arcAngle); + } + + @Override + public void fillOval(int x, int y, int width, int height) + { + groupG2D.fillOval(x, y, width, height); + alphaG2D.fillOval(x, y, width, height); + } + + @Override + public void fillPolygon(int[] xPoints, int[] yPoints, int nPoints) + { + groupG2D.fillPolygon(xPoints, yPoints, nPoints); + alphaG2D.fillPolygon(xPoints, yPoints, nPoints); + } + + @Override + public void fillRect(int x, int y, int width, int height) + { + groupG2D.fillRect(x, y, width, height); + alphaG2D.fillRect(x, y, width, height); + } + + @Override + public void fillRoundRect(int x, int y, int width, int height, int arcWidth, int arcHeight) + { + groupG2D.fillRoundRect(x, y, width, height, arcWidth, arcHeight); + alphaG2D.fillRoundRect(x, y, width, height, arcWidth, arcHeight); + } + + @Override + public Shape getClip() + { + return groupG2D.getClip(); + } + + @Override + public Rectangle getClipBounds() + { + return groupG2D.getClipBounds(); + } + + @Override + public Color getColor() + { + return groupG2D.getColor(); + } + + @Override + public Font getFont() + { + return groupG2D.getFont(); + } + + @Override + public FontMetrics getFontMetrics(Font f) + { + return groupG2D.getFontMetrics(f); + } + + @Override + public void setClip(int x, int y, int width, int height) + { + groupG2D.setClip(x, y, width, height); + alphaG2D.setClip(x, y, width, height); + } + + @Override + public void setClip(Shape clip) + { + groupG2D.setClip(clip); + alphaG2D.setClip(clip); + } + + @Override + public void setColor(Color c) + { + groupG2D.setColor(c); + alphaG2D.setColor(c); + } + + @Override + public void setFont(Font font) + { + groupG2D.setFont(font); + alphaG2D.setFont(font); + } + + @Override + public void setPaintMode() + { + groupG2D.setPaintMode(); + alphaG2D.setPaintMode(); + } + + @Override + public void setXORMode(Color c1) + { + groupG2D.setXORMode(c1); + alphaG2D.setXORMode(c1); + } + + @Override + public void translate(int x, int y) + { + groupG2D.translate(x, y); + alphaG2D.translate(x, y); + } + + @Override + public void addRenderingHints(Map hints) + { + groupG2D.addRenderingHints(hints); + alphaG2D.addRenderingHints(hints); + } + + @Override + public void clip(Shape s) + { + groupG2D.clip(s); + alphaG2D.clip(s); + } + + @Override + public void draw(Shape s) + { + groupG2D.draw(s); + alphaG2D.draw(s); + } + + @Override + public void drawGlyphVector(GlyphVector g, float x, float y) + { + groupG2D.drawGlyphVector(g, x, y); + alphaG2D.drawGlyphVector(g, x, y); + } + + @Override + public void drawImage(BufferedImage img, BufferedImageOp op, int x, int y) + { + groupG2D.drawImage(img, op, x, y); + alphaG2D.drawImage(img, op, x, y); + } + + @Override + public boolean drawImage(Image img, AffineTransform xform, ImageObserver obs) + { + groupG2D.drawImage(img, xform, obs); + return alphaG2D.drawImage(img, xform, obs); + } + + @Override + public void drawRenderableImage(RenderableImage img, AffineTransform xform) + { + groupG2D.drawRenderableImage(img, xform); + alphaG2D.drawRenderableImage(img, xform); + } + + @Override + public void drawRenderedImage(RenderedImage img, AffineTransform xform) + { + groupG2D.drawRenderedImage(img, xform); + alphaG2D.drawRenderedImage(img, xform); + } + + @Override + public void drawString(AttributedCharacterIterator iterator, float x, float y) + { + groupG2D.drawString(iterator, x, y); + alphaG2D.drawString(iterator, x, y); + } + + @Override + public void drawString(String str, float x, float y) + { + groupG2D.drawString(str, x, y); + alphaG2D.drawString(str, x, y); + } + + @Override + public void fill(Shape s) + { + groupG2D.fill(s); + alphaG2D.fill(s); + } + + @Override + public Color getBackground() + { + return groupG2D.getBackground(); + } + + @Override + public Composite getComposite() + { + return groupG2D.getComposite(); + } + + @Override + public GraphicsConfiguration getDeviceConfiguration() + { + return groupG2D.getDeviceConfiguration(); + } + + @Override + public FontRenderContext getFontRenderContext() + { + return groupG2D.getFontRenderContext(); + } + + @Override + public Paint getPaint() + { + return groupG2D.getPaint(); + } + + @Override + public Object getRenderingHint(RenderingHints.Key hintKey) + { + return groupG2D.getRenderingHint(hintKey); + } + + @Override + public RenderingHints getRenderingHints() + { + return groupG2D.getRenderingHints(); + } + + @Override + public Stroke getStroke() + { + return groupG2D.getStroke(); + } + + @Override + public AffineTransform getTransform() + { + return groupG2D.getTransform(); + } + + @Override + public boolean hit(Rectangle rect, Shape s, boolean onStroke) + { + return groupG2D.hit(rect, s, onStroke); + } + + @Override + public void rotate(double theta) + { + groupG2D.rotate(theta); + alphaG2D.rotate(theta); + } + + @Override + public void rotate(double theta, double x, double y) + { + groupG2D.rotate(theta, x, y); + alphaG2D.rotate(theta, x, y); + } + + @Override + public void scale(double sx, double sy) + { + groupG2D.scale(sx, sy); + alphaG2D.scale(sx, sy); + } + + @Override + public void setBackground(Color color) + { + groupG2D.setBackground(color); + alphaG2D.setBackground(color); + } + + @Override + public void setComposite(Composite comp) + { + groupG2D.setComposite(comp); + alphaG2D.setComposite(comp); + } + + @Override + public void setPaint(Paint paint) + { + groupG2D.setPaint(paint); + alphaG2D.setPaint(paint); + } + + @Override + public void setRenderingHint(RenderingHints.Key hintKey, Object hintValue) + { + groupG2D.setRenderingHint(hintKey, hintValue); + alphaG2D.setRenderingHint(hintKey, hintValue); + } + + @Override + public void setRenderingHints(Map hints) + { + groupG2D.setRenderingHints(hints); + alphaG2D.setRenderingHints(hints); + } + + @Override + public void setStroke(Stroke s) + { + groupG2D.setStroke(s); + alphaG2D.setStroke(s); + } + + @Override + public void setTransform(AffineTransform tx) + { + groupG2D.setTransform(tx); + alphaG2D.setTransform(tx); + } + + @Override + public void shear(double shx, double shy) + { + groupG2D.shear(shx, shy); + alphaG2D.shear(shx, shy); + } + + @Override + public void transform(AffineTransform tx) + { + groupG2D.transform(tx); + alphaG2D.transform(tx); + } + + @Override + public void translate(double tx, double ty) + { + groupG2D.translate(tx, ty); + alphaG2D.translate(tx, ty); + } + + /** + * Computes backdrop removal. + * The backdrop removal equation is given in section 11.4.4 in the PDF 32000-1:2008 + * standard. It returns the final color C for each pixel in the group:
    + * C = Cn + (Cn - C0) * (alpha0 / alphagn - alpha0)
    + * where
    + * Cn is the group color including backdrop (read from groupImage),
    + * C0 is the backdrop color,
    + * alpha0 is the backdrop alpha,
    + * alphagn is the group alpha excluding backdrop (read the + * alpha channel from groupAlphaImage)
    + *

    + * The alpha of the result is equal to alphagn, i.e., the alpha + * channel of groupAlphaImage. + *

    + * The backdrop image may be much larger than groupImage if, + * for example, the current page is used as the backdrop. Only a specific rectangular + * region of backdrop is used in the backdrop removal: upper-left corner + * is at (offsetX, offsetY); width and height are equal to those of + * groupImage. + * + * @param backdrop group backdrop + * @param offsetX backdrop left X coordinate + * @param offsetY backdrop upper Y coordinate + */ + void removeBackdrop(BufferedImage backdrop, int offsetX, int offsetY) + { + int groupWidth = groupImage.getWidth(); + int groupHeight = groupImage.getHeight(); + int backdropWidth = backdrop.getWidth(); + int backdropHeight = backdrop.getHeight(); + int groupType = groupImage.getType(); + int groupAlphaType = groupAlphaImage.getType(); + int backdropType = backdrop.getType(); + DataBuffer groupDataBuffer = groupImage.getRaster().getDataBuffer(); + DataBuffer groupAlphaDataBuffer = groupAlphaImage.getRaster().getDataBuffer(); + DataBuffer backdropDataBuffer = backdrop.getRaster().getDataBuffer(); + + if (groupType == BufferedImage.TYPE_INT_ARGB && + groupAlphaType == BufferedImage.TYPE_INT_ARGB && + (backdropType == BufferedImage.TYPE_INT_ARGB || backdropType == BufferedImage.TYPE_INT_RGB) && + groupDataBuffer instanceof DataBufferInt && + groupAlphaDataBuffer instanceof DataBufferInt && + backdropDataBuffer instanceof DataBufferInt) + { + // Optimized computation for int[] buffers. + + int[] groupData = ((DataBufferInt)groupDataBuffer).getData(); + int[] groupAlphaData = ((DataBufferInt)groupAlphaDataBuffer).getData(); + int[] backdropData = ((DataBufferInt)backdropDataBuffer).getData(); + boolean backdropHasAlpha = backdropType == BufferedImage.TYPE_INT_ARGB; + + for (int y = 0; y < groupHeight; y++) + { + for (int x = 0; x < groupWidth; x++) + { + int index = x + y * groupWidth; + + // alphagn is the total alpha of the group contents excluding backdrop. + int alphagn = (groupAlphaData[index] >> 24) & 0xFF; + if (alphagn == 0) + { + // Avoid division by 0 and set the result to fully transparent. + groupData[index] = 0; + continue; + } + + int backdropX = x + offsetX; + int backdropY = y + offsetY; + int backdropRGB; // color of backdrop pixel + float alpha0; // alpha of backdrop pixel + + if (backdropX >= 0 && backdropX < backdropWidth && + backdropY >= 0 && backdropY < backdropHeight) + { + backdropRGB = backdropData[backdropX + backdropY * backdropWidth]; + alpha0 = backdropHasAlpha ? ((backdropRGB >> 24) & 0xFF) : 255; + } + else + { + // Backdrop pixel is out of bounds. Use a transparent value. + backdropRGB = 0; + alpha0 = 0; + } + + // Alpha factor alpha0 / alphagn - alpha0 is in range 0.0-1.0. + float alphaFactor = alpha0 / alphagn - alpha0 / 255.0f; + int groupRGB = groupData[index]; // color of group pixel + + // Compute backdrop removal for RGB components. + int r = backdropRemoval(groupRGB, backdropRGB, 16, alphaFactor); + int g = backdropRemoval(groupRGB, backdropRGB, 8, alphaFactor); + int b = backdropRemoval(groupRGB, backdropRGB, 0, alphaFactor); + + // Copy the result back to groupImage. The alpha of the result + // is equal to alphagn. + groupData[index] = (alphagn << 24) | (r << 16) | (g << 8) | b; + } + } + } + else + { + // Non-optimized computation for other types of color spaces and pixel buffers. + + for (int y = 0; y < groupHeight; y++) + { + for (int x = 0; x < groupWidth; x++) + { + int alphagn = (groupAlphaImage.getRGB(x, y) >> 24) & 0xFF; + if (alphagn == 0) + { + groupImage.setRGB(x, y, 0); + continue; + } + + int backdropX = x + offsetX; + int backdropY = y + offsetY; + int backdropRGB; + float alpha0; + if (backdropX >= 0 && backdropX < backdropWidth && + backdropY >= 0 && backdropY < backdropHeight) + { + backdropRGB = backdrop.getRGB(backdropX, backdropY); + alpha0 = (backdropRGB >> 24) & 0xFF; + } + else + { + backdropRGB = 0; + alpha0 = 0; + } + + int groupRGB = groupImage.getRGB(x, y); + float alphaFactor = alpha0 / alphagn - alpha0 / 255.0f; + + int r = backdropRemoval(groupRGB, backdropRGB, 16, alphaFactor); + int g = backdropRemoval(groupRGB, backdropRGB, 8, alphaFactor); + int b = backdropRemoval(groupRGB, backdropRGB, 0, alphaFactor); + + groupImage.setRGB(x, y, (alphagn << 24) | (r << 16) | (g << 8) | b); + } + } + } + } + + /** + * Computes the backdrop removal equation. + * C = Cn + (Cn - C0) * (alpha0 / alphagn - alpha0) + */ + private int backdropRemoval(int groupRGB, int backdropRGB, int shift, float alphaFactor) + { + float cn = (groupRGB >> shift) & 0xFF; + float c0 = (backdropRGB >> shift) & 0xFF; + int c = Math.round(cn + (cn - c0) * alphaFactor); + return (c < 0) ? 0 : (c > 255 ? 255 : c); + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/PDFRenderer.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/PDFRenderer.java index 61931834d67..61f6fca18f4 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/rendering/PDFRenderer.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/PDFRenderer.java @@ -1,218 +1,657 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.rendering; - -import java.awt.Color; -import java.awt.Graphics2D; -import java.awt.image.BufferedImage; -import java.io.IOException; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDRectangle; - -/** - * Renders a PDF document to an AWT BufferedImage. - * This class may be overridden in order to perform custom rendering. - * - * @author John Hewson - */ -public class PDFRenderer -{ - protected final PDDocument document; - // TODO keep rendering state such as caches here - - /** - * Creates a new PDFRenderer. - * @param document the document to render - */ - public PDFRenderer(PDDocument document) - { - this.document = document; - } - - /** - * Returns the given page as an RGB image at 72 DPI - * @param pageIndex the zero-based index of the page to be converted. - * @return the rendered page image - * @throws IOException if the PDF cannot be read - */ - public BufferedImage renderImage(int pageIndex) throws IOException - { - return renderImage(pageIndex, 1); - } - - /** - * Returns the given page as an RGB image at the given scale. - * A scale of 1 will render at 72 DPI. - * @param pageIndex the zero-based index of the page to be converted - * @param scale the scaling factor, where 1 = 72 DPI - * @return the rendered page image - * @throws IOException if the PDF cannot be read - */ - public BufferedImage renderImage(int pageIndex, float scale) throws IOException - { - return renderImage(pageIndex, scale, ImageType.RGB); - } - - /** - * Returns the given page as an RGB image at the given DPI. - * @param pageIndex the zero-based index of the page to be converted - * @param dpi the DPI (dots per inch) to render at - * @return the rendered page image - * @throws IOException if the PDF cannot be read - */ - public BufferedImage renderImageWithDPI(int pageIndex, float dpi) throws IOException - { - return renderImage(pageIndex, dpi / 72f, ImageType.RGB); - } - - /** - * Returns the given page as an RGB image at the given DPI. - * @param pageIndex the zero-based index of the page to be converted - * @param dpi the DPI (dots per inch) to render at - * @param imageType the type of image to return - * @return the rendered page image - * @throws IOException if the PDF cannot be read - */ - public BufferedImage renderImageWithDPI(int pageIndex, float dpi, ImageType imageType) - throws IOException - { - return renderImage(pageIndex, dpi / 72f, imageType); - } - - /** - * Returns the given page as an RGB or ARGB image at the given scale. - * @param pageIndex the zero-based index of the page to be converted - * @param scale the scaling factor, where 1 = 72 DPI - * @param imageType the type of image to return - * @return the rendered page image - * @throws IOException if the PDF cannot be read - */ - public BufferedImage renderImage(int pageIndex, float scale, ImageType imageType) - throws IOException - { - PDPage page = document.getPage(pageIndex); - - PDRectangle cropbBox = page.getCropBox(); - float widthPt = cropbBox.getWidth(); - float heightPt = cropbBox.getHeight(); - int widthPx = Math.round(widthPt * scale); - int heightPx = Math.round(heightPt * scale); - int rotationAngle = page.getRotation(); - - // swap width and height - BufferedImage image; - if (rotationAngle == 90 || rotationAngle == 270) - { - image = new BufferedImage(heightPx, widthPx, imageType.toBufferedImageType()); - } - else - { - image = new BufferedImage(widthPx, heightPx, imageType.toBufferedImageType()); - } - - // use a transparent background if the imageType supports alpha - Graphics2D g = image.createGraphics(); - if (imageType == ImageType.ARGB) - { - g.setBackground(new Color(0, 0, 0, 0)); - } - else - { - g.setBackground(Color.WHITE); - } - - renderPage(page, g, image.getWidth(), image.getHeight(), scale, scale); - g.dispose(); - - return image; - } - - /** - * Renders a given page to an AWT Graphics2D instance. - * @param pageIndex the zero-based index of the page to be converted - * @param graphics the Graphics2D on which to draw the page - * @throws IOException if the PDF cannot be read - */ - public void renderPageToGraphics(int pageIndex, Graphics2D graphics) throws IOException - { - renderPageToGraphics(pageIndex, graphics, 1); - } - - /** - * Renders a given page to an AWT Graphics2D instance. - * @param pageIndex the zero-based index of the page to be converted - * @param graphics the Graphics2D on which to draw the page - * @param scale the scale to draw the page at - * @throws IOException if the PDF cannot be read - */ - public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scale) - throws IOException - { - PDPage page = document.getPage(pageIndex); - // TODO need width/wight calculations? should these be in PageDrawer? - PDRectangle adjustedCropBox = page.getCropBox(); - renderPage(page, graphics, (int)adjustedCropBox.getWidth(), (int)adjustedCropBox.getHeight(), scale, scale); - } - - // renders a page to the given graphics - private void renderPage(PDPage page, Graphics2D graphics, int width, int height, float scaleX, - float scaleY) throws IOException - { - graphics.clearRect(0, 0, width, height); - - graphics.scale(scaleX, scaleY); - // TODO should we be passing the scale to PageDrawer rather than messing with Graphics? - - PDRectangle cropBox = page.getCropBox(); - int rotationAngle = page.getRotation(); - - if (rotationAngle != 0) - { - float translateX = 0; - float translateY = 0; - switch (rotationAngle) - { - case 90: - translateX = cropBox.getHeight(); - break; - case 270: - translateY = cropBox.getWidth(); - break; - case 180: - translateX = cropBox.getWidth(); - translateY = cropBox.getHeight(); - break; - } - graphics.translate(translateX, translateY); - graphics.rotate((float) Math.toRadians(rotationAngle)); - } - - // the end-user may provide a custom PageDrawer - PageDrawerParameters parameters = new PageDrawerParameters(this, page); - PageDrawer drawer = createPageDrawer(parameters); - drawer.drawPage(graphics, cropBox); - } - - /** - * Returns a new PageDrawer instance, using the given parameters. May be overridden. - */ - protected PageDrawer createPageDrawer(PageDrawerParameters parameters) throws IOException - { - return new PageDrawer(parameters); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.rendering; + +import java.awt.Color; +import java.awt.DisplayMode; +import java.awt.Graphics2D; +import java.awt.GraphicsConfiguration; +import java.awt.GraphicsDevice; +import java.awt.RenderingHints; +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.util.StringTokenizer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; + +/** + * Renders a PDF document to an AWT BufferedImage. + * This class may be overridden in order to perform custom rendering. + * + * @author John Hewson + */ +public class PDFRenderer +{ + private static final Log LOG = LogFactory.getLog(PDFRenderer.class); + + protected final PDDocument document; + // TODO keep rendering state such as caches here + + /** + * Default annotations filter, returns all annotations + */ + private AnnotationFilter annotationFilter = new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return true; + } + }; + + private boolean subsamplingAllowed = false; + + private RenderDestination defaultDestination; + + private RenderingHints renderingHints = null; + + private BufferedImage pageImage; + + private static boolean kcmsLogged = false; + + private float imageDownscalingOptimizationThreshold = 0.5f; + + /** + * Creates a new PDFRenderer. + * @param document the document to render + */ + public PDFRenderer(PDDocument document) + { + this.document = document; + + if (!kcmsLogged) + { + suggestKCMS(); + kcmsLogged = true; + } + } + + /** + * Return the AnnotationFilter. + * + * @return the AnnotationFilter + */ + public AnnotationFilter getAnnotationsFilter() + { + return annotationFilter; + } + + /** + * Set the AnnotationFilter. + * + *

    Allows to only render annotation accepted by the filter. + * + * @param annotationsFilter the AnnotationFilter + */ + public void setAnnotationsFilter(AnnotationFilter annotationsFilter) + { + this.annotationFilter = annotationsFilter; + } + + /** + * Value indicating if the renderer is allowed to subsample images before drawing, according to + * image dimensions and requested scale. + * + * Subsampling may be faster and less memory-intensive in some cases, but it may also lead to + * loss of quality, especially in images with high spatial frequency. + * + * @return true if subsampling of images is allowed, false otherwise. + */ + public boolean isSubsamplingAllowed() + { + return subsamplingAllowed; + } + + /** + * Sets a value instructing the renderer whether it is allowed to subsample images before + * drawing. The subsampling frequency is determined according to image size and requested scale. + * + * Subsampling may be faster and less memory-intensive in some cases, but it may also lead to + * loss of quality, especially in images with high spatial frequency. + * + * @param subsamplingAllowed The new value indicating if subsampling is allowed. + */ + public void setSubsamplingAllowed(boolean subsamplingAllowed) + { + this.subsamplingAllowed = subsamplingAllowed; + } + + /** + * @return the defaultDestination + */ + public RenderDestination getDefaultDestination() + { + return defaultDestination; + } + + /** + * @param defaultDestination the defaultDestination to set + */ + public void setDefaultDestination(RenderDestination defaultDestination) + { + this.defaultDestination = defaultDestination; + } + + /** + * Get the rendering hints. + * + * @return the rendering hints or null if none are set. + */ + public RenderingHints getRenderingHints() + { + return renderingHints; + } + + /** + * Set the rendering hints. Use this to influence rendering quality and speed. If you don't set + * them yourself or pass null, PDFBox will decide at runtime depending on the + * destination. + * + * @param renderingHints + */ + public void setRenderingHints(RenderingHints renderingHints) + { + this.renderingHints = renderingHints; + } + + /** + * + * @return get the image downscaling optimization threshold. See + * {@link #getImageDownscalingOptimizationThreshold()} for details. + */ + public float getImageDownscalingOptimizationThreshold() + { + return imageDownscalingOptimizationThreshold; + } + + /** + * Set the image downscaling optimization threshold. This must be a value between 0 and 1. When + * rendering downscaled images and rendering hints are set to bicubic+quality and the scaling is + * smaller than the threshold, a more quality-optimized but slower method will be used. The + * default is 0.5 which is a good compromise. + * + * @param imageDownscalingOptimizationThreshold + */ + public void setImageDownscalingOptimizationThreshold(float imageDownscalingOptimizationThreshold) + { + this.imageDownscalingOptimizationThreshold = imageDownscalingOptimizationThreshold; + } + + /** + * Returns the given page as an RGB image at 72 DPI + * @param pageIndex the zero-based index of the page to be converted. + * @return the rendered page image + * @throws IOException if the PDF cannot be read + */ + public BufferedImage renderImage(int pageIndex) throws IOException + { + return renderImage(pageIndex, 1); + } + + /** + * Returns the given page as an RGB image at the given scale. + * A scale of 1 will render at 72 DPI. + * @param pageIndex the zero-based index of the page to be converted + * @param scale the scaling factor, where 1 = 72 DPI + * @return the rendered page image + * @throws IOException if the PDF cannot be read + */ + public BufferedImage renderImage(int pageIndex, float scale) throws IOException + { + return renderImage(pageIndex, scale, ImageType.RGB); + } + + /** + * Returns the given page as an RGB image at the given DPI. + * @param pageIndex the zero-based index of the page to be converted + * @param dpi the DPI (dots per inch) to render at + * @return the rendered page image + * @throws IOException if the PDF cannot be read + */ + public BufferedImage renderImageWithDPI(int pageIndex, float dpi) throws IOException + { + return renderImage(pageIndex, dpi / 72f, ImageType.RGB); + } + + /** + * Returns the given page as an RGB image at the given DPI. + * @param pageIndex the zero-based index of the page to be converted + * @param dpi the DPI (dots per inch) to render at + * @param imageType the type of image to return + * @return the rendered page image + * @throws IOException if the PDF cannot be read + */ + public BufferedImage renderImageWithDPI(int pageIndex, float dpi, ImageType imageType) + throws IOException + { + return renderImage(pageIndex, dpi / 72f, imageType); + } + + /** + * Returns the given page as an RGB or ARGB image at the given scale. + * @param pageIndex the zero-based index of the page to be converted + * @param scale the scaling factor, where 1 = 72 DPI + * @param imageType the type of image to return + * @return the rendered page image + * @throws IOException if the PDF cannot be read + */ + public BufferedImage renderImage(int pageIndex, float scale, ImageType imageType) + throws IOException + { + return renderImage(pageIndex, scale, imageType, + defaultDestination == null ? RenderDestination.EXPORT : defaultDestination); + } + + /** + * Returns the given page as an RGB or ARGB image at the given scale. + * @param pageIndex the zero-based index of the page to be converted + * @param scale the scaling factor, where 1 = 72 DPI + * @param imageType the type of image to return + * @param destination controlling visibility of optional content groups + * @return the rendered page image + * @throws IOException if the PDF cannot be read + */ + public BufferedImage renderImage(int pageIndex, float scale, ImageType imageType, RenderDestination destination) + throws IOException + { + PDPage page = document.getPage(pageIndex); + + PDRectangle cropbBox = page.getCropBox(); + float widthPt = cropbBox.getWidth(); + float heightPt = cropbBox.getHeight(); + + // PDFBOX-4306 avoid single blank pixel line on the right or on the bottom + int widthPx = (int) Math.max(Math.floor(widthPt * scale), 1); + int heightPx = (int) Math.max(Math.floor(heightPt * scale), 1); + + // PDFBOX-4518 the maximum size (w*h) of a buffered image is limited to Integer.MAX_VALUE + if ((long) widthPx * (long) heightPx > Integer.MAX_VALUE) + { + throw new IOException("Maximum size of image exceeded (w * h * scale ^ 2) = "// + + widthPt + " * " + heightPt + " * " + scale + " ^ 2 > " + Integer.MAX_VALUE); + } + + int rotationAngle = page.getRotation(); + + int bimType = imageType.toBufferedImageType(); + if (imageType != ImageType.ARGB && hasBlendMode(page)) + { + // PDFBOX-4095: if the PDF has blending on the top level, draw on transparent background + // Inpired from PDF.js: if a PDF page uses any blend modes other than Normal, + // PDF.js renders everything on a fully transparent RGBA canvas. + // Finally when the page has been rendered, PDF.js draws the RGBA canvas on a white canvas. + bimType = BufferedImage.TYPE_INT_ARGB; + } + + // swap width and height + BufferedImage image; + if (rotationAngle == 90 || rotationAngle == 270) + { + image = new BufferedImage(heightPx, widthPx, bimType); + } + else + { + image = new BufferedImage(widthPx, heightPx, bimType); + } + + pageImage = image; + + // use a transparent background if the image type supports alpha + Graphics2D g = image.createGraphics(); + if (image.getType() == BufferedImage.TYPE_INT_ARGB) + { + g.setBackground(new Color(0, 0, 0, 0)); + } + else + { + g.setBackground(Color.WHITE); + } + g.clearRect(0, 0, image.getWidth(), image.getHeight()); + + transform(g, page, scale, scale); + + // the end-user may provide a custom PageDrawer + RenderingHints actualRenderingHints = + renderingHints == null ? createDefaultRenderingHints(g) : renderingHints; + PageDrawerParameters parameters = + new PageDrawerParameters(this, page, subsamplingAllowed, destination, + actualRenderingHints, imageDownscalingOptimizationThreshold); + PageDrawer drawer = createPageDrawer(parameters); + drawer.drawPage(g, page.getCropBox()); + + g.dispose(); + + if (image.getType() != imageType.toBufferedImageType()) + { + // PDFBOX-4095: draw temporary transparent image on white background + BufferedImage newImage = + new BufferedImage(image.getWidth(), image.getHeight(), imageType.toBufferedImageType()); + Graphics2D dstGraphics = newImage.createGraphics(); + dstGraphics.setBackground(Color.WHITE); + dstGraphics.clearRect(0, 0, image.getWidth(), image.getHeight()); + dstGraphics.drawImage(image, 0, 0, null); + dstGraphics.dispose(); + image = newImage; + } + + return image; + } + + /** + * Renders a given page to an AWT Graphics2D instance at 72 DPI. + *

    + * Read {@link #renderPageToGraphics(int, java.awt.Graphics2D, float, float, org.apache.pdfbox.rendering.RenderDestination) renderPageToGraphics(int, Graphics2D, float, float, RenderDestination)} + * before using this. + * + * @param pageIndex the zero-based index of the page to be converted + * @param graphics the Graphics2D on which to draw the page + * @throws IOException if the PDF cannot be read + */ + public void renderPageToGraphics(int pageIndex, Graphics2D graphics) throws IOException + { + renderPageToGraphics(pageIndex, graphics, 1); + } + + /** + * Renders a given page to an AWT Graphics2D instance. + *

    + * Read {@link #renderPageToGraphics(int, java.awt.Graphics2D, float, float, org.apache.pdfbox.rendering.RenderDestination) renderPageToGraphics(int, Graphics2D, float, float, RenderDestination)} + * before using this. + * + * @param pageIndex the zero-based index of the page to be converted + * @param graphics the Graphics2D on which to draw the page + * @param scale the scaling factor, where 1 = 72 DPI + * @throws IOException if the PDF cannot be read + */ + public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scale) + throws IOException + { + renderPageToGraphics(pageIndex, graphics, scale, scale); + } + + /** + * Renders a given page to an AWT Graphics2D instance. + *

    + * Read {@link #renderPageToGraphics(int, java.awt.Graphics2D, float, float, org.apache.pdfbox.rendering.RenderDestination) renderPageToGraphics(int, Graphics2D, float, float, RenderDestination)} + * before using this. + * + * @param pageIndex the zero-based index of the page to be converted + * @param graphics the Graphics2D on which to draw the page + * @param scaleX the scale to draw the page at for the x-axis, where 1 = 72 DPI + * @param scaleY the scale to draw the page at for the y-axis, where 1 = 72 DPI + * @throws IOException if the PDF cannot be read + */ + public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scaleX, float scaleY) + throws IOException + { + renderPageToGraphics(pageIndex, graphics, scaleX, scaleY, + defaultDestination == null ? RenderDestination.VIEW : defaultDestination); + } + + /** + * Renders a given page to an AWT Graphics2D instance. + *

    + * Known problems: + *

      + *
    • rendering of PDF files with transparencies is not supported on Ubuntu, see + * PDFBOX-4581 and + * JDK-6689349. Rendering will + * not abort, but the pages will be rendered incorrectly.
    • + *
    • Clipping the Graphics2D will not work properly, see + * PDFBOX-4583.
    • + *
    + * If you encounter these problems, then you should render into an image by using the + * {@link #renderImage(int) renderImage} methods. + * + * @param pageIndex the zero-based index of the page to be converted + * @param graphics the Graphics2D on which to draw the page + * @param scaleX the scale to draw the page at for the x-axis, where 1 = 72 DPI + * @param scaleY the scale to draw the page at for the y-axis, where 1 = 72 DPI + * @param destination controlling visibility of optional content groups + * @throws IOException if the PDF cannot be read + */ + public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scaleX, float scaleY, RenderDestination destination) + throws IOException + { + PDPage page = document.getPage(pageIndex); + // TODO need width/height calculations? should these be in PageDrawer? + + transform(graphics, page, scaleX, scaleY); + + PDRectangle cropBox = page.getCropBox(); + graphics.clearRect(0, 0, (int) cropBox.getWidth(), (int) cropBox.getHeight()); + + // the end-user may provide a custom PageDrawer + RenderingHints actualRenderingHints = + renderingHints == null ? createDefaultRenderingHints(graphics) : renderingHints; + PageDrawerParameters parameters = + new PageDrawerParameters(this, page, subsamplingAllowed, destination, + actualRenderingHints, imageDownscalingOptimizationThreshold); + PageDrawer drawer = createPageDrawer(parameters); + drawer.drawPage(graphics, cropBox); + } + + /** + * Indicates whether an optional content group is enabled. + * @param group the group + * @return true if the group is enabled + */ + public boolean isGroupEnabled(PDOptionalContentGroup group) + { + PDOptionalContentProperties ocProperties = document.getDocumentCatalog().getOCProperties(); + return ocProperties == null || ocProperties.isGroupEnabled(group); + } + + // scale rotate translate + private void transform(Graphics2D graphics, PDPage page, float scaleX, float scaleY) + { + graphics.scale(scaleX, scaleY); + + // TODO should we be passing the scale to PageDrawer rather than messing with Graphics? + int rotationAngle = page.getRotation(); + if (rotationAngle != 0) + { + PDRectangle cropBox = page.getCropBox(); + float translateX = 0; + float translateY = 0; + switch (rotationAngle) + { + case 90: + translateX = cropBox.getHeight(); + break; + case 270: + translateY = cropBox.getWidth(); + break; + case 180: + translateX = cropBox.getWidth(); + translateY = cropBox.getHeight(); + break; + default: + break; + } + graphics.translate(translateX, translateY); + graphics.rotate(Math.toRadians(rotationAngle)); + } + } + + private boolean isBitonal(Graphics2D graphics) + { + GraphicsConfiguration deviceConfiguration = graphics.getDeviceConfiguration(); + if (deviceConfiguration == null) + { + return false; + } + GraphicsDevice device = deviceConfiguration.getDevice(); + if (device == null) + { + return false; + } + DisplayMode displayMode = device.getDisplayMode(); + if (displayMode == null) + { + return false; + } + return displayMode.getBitDepth() == 1; + } + + private RenderingHints createDefaultRenderingHints(Graphics2D graphics) + { + RenderingHints r = new RenderingHints(null); + r.put(RenderingHints.KEY_INTERPOLATION, isBitonal(graphics) ? + RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR : + RenderingHints.VALUE_INTERPOLATION_BICUBIC); + r.put(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); + r.put(RenderingHints.KEY_ANTIALIASING, isBitonal(graphics) ? + RenderingHints.VALUE_ANTIALIAS_OFF : + RenderingHints.VALUE_ANTIALIAS_ON); + return r; + } + + /** + * Returns a new PageDrawer instance, using the given parameters. May be overridden. + */ + protected PageDrawer createPageDrawer(PageDrawerParameters parameters) throws IOException + { + PageDrawer pageDrawer = new PageDrawer(parameters); + pageDrawer.setAnnotationFilter(annotationFilter); + return pageDrawer; + } + + private boolean hasBlendMode(PDPage page) + { + // check the current resources for blend modes + PDResources resources = page.getResources(); + if (resources == null) + { + return false; + } + for (COSName name : resources.getExtGStateNames()) + { + PDExtendedGraphicsState extGState = resources.getExtGState(name); + if (extGState == null) + { + // can happen if key exists but no value + // see PDFBOX-3950-23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf + continue; + } + BlendMode blendMode = extGState.getBlendMode(); + if (blendMode != BlendMode.NORMAL) + { + return true; + } + } + return false; + } + + /** + * Returns the image to which the current page is being rendered. + * May be null if the page is rendered to a Graphics2D object + * instead of a BufferedImage. + */ + BufferedImage getPageImage() + { + return pageImage; + } + + private static void suggestKCMS() + { + String cmmProperty = System.getProperty("sun.java2d.cmm"); + if (isMinJdk8() && !"sun.java2d.cmm.kcms.KcmsServiceProvider".equals(cmmProperty)) + { + try + { + // Make sure that class exists + Class.forName("sun.java2d.cmm.kcms.KcmsServiceProvider"); + + String version = System.getProperty("java.version"); + if (version == null || + isGoodVersion(version, "1.8.0_(\\d+)", 191) || + isGoodVersion(version, "9.0.(\\d+)", 4)) + { + return; + } + LOG.info("Your current java version is: " + version); + LOG.info("To get higher rendering speed on old java 1.8 or 9 versions,"); + LOG.info(" update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),"); + LOG.info(" or"); + LOG.info(" use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider"); + LOG.info(" or call System.setProperty(\"sun.java2d.cmm\", \"sun.java2d.cmm.kcms.KcmsServiceProvider\")"); + } + catch (ClassNotFoundException e) + { + // KCMS not available + } + } + } + + private static boolean isGoodVersion(String version, String regex, int min) + { + Matcher matcher = Pattern.compile(regex).matcher(version); + if (matcher.matches() && matcher.groupCount() >= 1) + { + try + { + int v = Integer.parseInt(matcher.group(1)); + if (v >= min) + { + // LCMS no longer bad + return true; + } + } + catch (NumberFormatException ex) + { + return true; + } + } + return false; + } + + private static boolean isMinJdk8() + { + // strategy from lucene-solr/lucene/core/src/java/org/apache/lucene/util/Constants.java + String version = System.getProperty("java.specification.version"); + final StringTokenizer st = new StringTokenizer(version, "."); + try + { + int major = Integer.parseInt(st.nextToken()); + int minor = 0; + if (st.hasMoreTokens()) + { + minor = Integer.parseInt(st.nextToken()); + } + return major > 1 || (major == 1 && minor >= 8); + } + catch (NumberFormatException nfe) + { + // maybe some new numbering scheme in the 22nd century + return true; + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java index 80187627448..0fa01d51b69 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java @@ -20,23 +20,43 @@ import java.awt.Color; import java.awt.Graphics; import java.awt.Graphics2D; +import java.awt.GraphicsConfiguration; import java.awt.GraphicsDevice; +import java.awt.Image; import java.awt.Paint; +import java.awt.Point; +import java.awt.Rectangle; import java.awt.RenderingHints; import java.awt.Shape; import java.awt.Stroke; import java.awt.TexturePaint; +import java.awt.Transparency; +import java.awt.color.ColorSpace; import java.awt.geom.AffineTransform; import java.awt.geom.Area; import java.awt.geom.GeneralPath; +import java.awt.geom.Path2D; import java.awt.geom.PathIterator; import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; import java.awt.image.BufferedImage; +import java.awt.image.ColorModel; +import java.awt.image.ComponentColorModel; +import java.awt.image.DataBuffer; +import java.awt.image.DataBufferByte; import java.awt.image.Raster; +import java.awt.image.WritableRaster; import java.io.IOException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine; @@ -44,9 +64,10 @@ import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.function.PDFunction; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; import org.apache.pdfbox.pdmodel.font.PDCIDFontType0; import org.apache.pdfbox.pdmodel.font.PDCIDFontType2; import org.apache.pdfbox.pdmodel.font.PDFont; @@ -54,36 +75,49 @@ import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType1CFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.pdmodel.graphics.PDLineDashPattern; -import org.apache.pdfbox.pdmodel.graphics.blend.SoftMaskPaint; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.graphics.color.PDICCBased; import org.apache.pdfbox.pdmodel.graphics.color.PDPattern; +import org.apache.pdfbox.pdmodel.graphics.color.PDSeparation; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup; import org.apache.pdfbox.pdmodel.graphics.image.PDImage; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup.RenderState; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentMembershipDictionary; import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern; import org.apache.pdfbox.pdmodel.graphics.pattern.PDShadingPattern; import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; import org.apache.pdfbox.pdmodel.graphics.shading.PDShading; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState; import org.apache.pdfbox.pdmodel.graphics.state.PDSoftMask; import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; +import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationUnknown; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; import org.apache.pdfbox.util.Matrix; import org.apache.pdfbox.util.Vector; /** * Paints a page in a PDF document to a Graphics context. May be subclassed to provide custom * rendering. - * - *

    If you want to do custom graphics processing rather than Graphics2D rendering, then you should - * subclass PDFGraphicsStreamEngine instead. Subclassing PageDrawer is only suitable for cases - * where the goal is to render onto a Graphics2D surface. - * + * + *

    + * If you want to do custom graphics processing rather than Graphics2D rendering, then you should + * subclass {@link PDFGraphicsStreamEngine} instead. Subclassing PageDrawer is only suitable for + * cases where the goal is to render onto a {@link Graphics2D} surface. In that case you'll also + * have to subclass {@link PDFRenderer} and modify + * {@link PDFRenderer#createPageDrawer(PageDrawerParameters)}. + * * @author Ben Litchfield */ public class PageDrawer extends PDFGraphicsStreamEngine @@ -93,13 +127,19 @@ public class PageDrawer extends PDFGraphicsStreamEngine // parent document renderer - note: this is needed for not-yet-implemented resource caching private final PDFRenderer renderer; + private final boolean subsamplingAllowed; + // the graphics device to draw to, xform is the initial transform of the device (i.e. DPI) private Graphics2D graphics; private AffineTransform xform; // the page box to draw (usually the crop box but may be another) private PDRectangle pageSize; - + + // whether image of a transparency group must be flipped + // needed when in a tiling pattern + private boolean flipTG = false; + // clipping winding rule used for the clipping path private int clipWindingRule = -1; private GeneralPath linePath = new GeneralPath(); @@ -107,12 +147,40 @@ public class PageDrawer extends PDFGraphicsStreamEngine // last clipping path private Area lastClip; - // buffered clipping area for text being drawn - private Area textClippingArea; + // clip when drawPage() is called, can be null, must be intersected when clipping + private Shape initialClip; + + // shapes of glyphs being drawn to be used for clipping + private List textClippings; // glyph cache private final Map fontGlyph2D = new HashMap(); + private final TilingPaintFactory tilingPaintFactory = new TilingPaintFactory(this); + + private final Deque transparencyGroupStack = new ArrayDeque(); + + // if greater zero the content is hidden and will not be rendered + private int nestedHiddenOCGCount; + + private final RenderDestination destination; + private final RenderingHints renderingHints; + private final float imageDownscalingOptimizationThreshold; + + static final int JAVA_VERSION = PageDrawer.getJavaVersion(); + + /** + * Default annotations filter, returns all annotations + */ + private AnnotationFilter annotationFilter = new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return true; + } + }; + /** * Constructor. * @@ -123,8 +191,35 @@ public PageDrawer(PageDrawerParameters parameters) throws IOException { super(parameters.getPage()); this.renderer = parameters.getRenderer(); + this.subsamplingAllowed = parameters.isSubsamplingAllowed(); + this.destination = parameters.getDestination(); + this.renderingHints = parameters.getRenderingHints(); + this.imageDownscalingOptimizationThreshold = + parameters.getImageDownscalingOptimizationThreshold(); } + /** + * Return the AnnotationFilter. + * + * @return the AnnotationFilter + */ + public AnnotationFilter getAnnotationFilter() + { + return annotationFilter; + } + + /** + * Set the AnnotationFilter. + * + *

    Allows to only render annotation accepted by the filter. + * + * @param annotationFilter the AnnotationFilter + */ + public void setAnnotationFilter(AnnotationFilter annotationFilter) + { + this.annotationFilter = annotationFilter; + } + /** * Returns the parent renderer. */ @@ -154,12 +249,7 @@ protected final GeneralPath getLinePath() */ private void setRenderingHints() { - graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, - RenderingHints.VALUE_INTERPOLATION_BICUBIC); - graphics.setRenderingHint(RenderingHints.KEY_RENDERING, - RenderingHints.VALUE_RENDER_QUALITY); - graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, - RenderingHints.VALUE_ANTIALIAS_ON); + graphics.addRenderingHints(renderingHints); } /** @@ -173,6 +263,7 @@ public void drawPage(Graphics g, PDRectangle pageSize) throws IOException { graphics = (Graphics2D) g; xform = graphics.getTransform(); + initialClip = graphics.getClip(); this.pageSize = pageSize; setRenderingHints(); @@ -180,15 +271,12 @@ public void drawPage(Graphics g, PDRectangle pageSize) throws IOException graphics.translate(0, pageSize.getHeight()); graphics.scale(1, -1); - // TODO use getStroke() to set the initial stroke - graphics.setStroke(new BasicStroke(1.0f, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER)); - // adjust for non-(0,0) crop box graphics.translate(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY()); processPage(getPage()); - for (PDAnnotation annotation : getPage().getAnnotations()) + for (PDAnnotation annotation : getPage().getAnnotations(annotationFilter)) { showAnnotation(annotation); } @@ -209,33 +297,58 @@ public void drawPage(Graphics g, PDRectangle pageSize) throws IOException void drawTilingPattern(Graphics2D g, PDTilingPattern pattern, PDColorSpace colorSpace, PDColor color, Matrix patternMatrix) throws IOException { - Graphics2D oldGraphics = graphics; + Graphics2D savedGraphics = graphics; graphics = g; - GeneralPath oldLinePath = linePath; + GeneralPath savedLinePath = linePath; linePath = new GeneralPath(); + int savedClipWindingRule = clipWindingRule; + clipWindingRule = -1; - Area oldLastClip = lastClip; + Area savedLastClip = lastClip; lastClip = null; + Shape savedInitialClip = initialClip; + initialClip = null; + + boolean savedFlipTG = flipTG; + flipTG = true; setRenderingHints(); processTilingPattern(pattern, color, colorSpace, patternMatrix); - graphics = oldGraphics; - linePath = oldLinePath; - lastClip = oldLastClip; + flipTG = savedFlipTG; + graphics = savedGraphics; + linePath = savedLinePath; + lastClip = savedLastClip; + initialClip = savedInitialClip; + clipWindingRule = savedClipWindingRule; + } + + private float clampColor(float color) + { + return color < 0 ? 0 : (color > 1 ? 1 : color); } /** * Returns an AWT paint for the given PDColor. + * + * @param color The color to get a paint for. This can be an actual color or a pattern. + * @throws IOException */ protected Paint getPaint(PDColor color) throws IOException { PDColorSpace colorSpace = color.getColorSpace(); - if (!(colorSpace instanceof PDPattern)) + if (colorSpace instanceof PDSeparation && + "None".equals(((PDSeparation) colorSpace).getColorantName())) + { + // PDFBOX-4900: "The special colorant name None shall not produce any visible output" + //TODO better solution needs to be found for all occurences where toRGB is called + return new Color(0, 0, 0, 0); + } + else if (!(colorSpace instanceof PDPattern)) { float[] rgb = colorSpace.toRGB(color.getComponents()); - return new Color(rgb[0], rgb[1], rgb[2]); + return new Color(clampColor(rgb[0]), clampColor(rgb[1]), clampColor(rgb[2])); } else { @@ -248,12 +361,12 @@ protected Paint getPaint(PDColor color) throws IOException if (tilingPattern.getPaintType() == PDTilingPattern.PAINT_COLORED) { // colored tiling pattern - return new TilingPaint(this, tilingPattern, xform); + return tilingPaintFactory.create(tilingPattern, null, null, xform); } else { // uncolored tiling pattern - return new TilingPaint(this, tilingPattern, + return tilingPaintFactory.create(tilingPattern, patternSpace.getUnderlyingColorSpace(), color, xform); } } @@ -268,19 +381,37 @@ protected Paint getPaint(PDColor color) throws IOException } return shading.toPaint(Matrix.concatenate(getInitialMatrix(), shadingPattern.getMatrix())); - } } } - // sets the clipping path using caching for performance, we track lastClip manually because - // Graphics2D#getClip() returns a new object instead of the same one passed to setClip - private void setClip() + /** + * Sets the clipping path using caching for performance. We track lastClip manually because + * {@link Graphics2D#getClip()} returns a new object instead of the same one passed to + * {@link Graphics2D#setClip(java.awt.Shape) setClip()}. You may need to call this if you + * override {@link #showGlyph(Matrix, PDFont, int, Vector) showGlyph()}. See + * PDFBOX-5093 for more. + */ + protected final void setClip() { Area clippingPath = getGraphicsState().getCurrentClippingPath(); if (clippingPath != lastClip) { - graphics.setClip(clippingPath); + if (clippingPath.getPathIterator(null).isDone()) + { + // PDFBOX-4821: avoid bug with java printing that empty clipping path is ignored by + // replacing with empty rectangle, works because this is not an empty path + graphics.setClip(new Rectangle()); + } + else + { + graphics.setClip(clippingPath); + } + if (initialClip != null) + { + // apply the remembered initial clip, but transform it first + //TODO see PDFBOX-4583 + } lastClip = clippingPath; } } @@ -303,8 +434,8 @@ public void endText() throws IOException */ private void beginTextClip() { - // buffer the text clip because it represents a single clipping area - textClippingArea = new Area(); + // buffer the text clippings because they represents a single clipping area + textClippings = new ArrayList(); } /** @@ -316,16 +447,27 @@ private void endTextClip() RenderingMode renderingMode = state.getTextState().getRenderingMode(); // apply the buffered clip as one area - if (renderingMode.isClip() && !textClippingArea.isEmpty()) + if (renderingMode.isClip() && !textClippings.isEmpty()) { - state.intersectClippingPath(textClippingArea); - textClippingArea = null; + // PDFBOX-4150: this is much faster than using textClippingArea.add(new Area(glyph)) + // https://stackoverflow.com/questions/21519007/fast-union-of-shapes-in-java + GeneralPath path = new GeneralPath(Path2D.WIND_NON_ZERO, textClippings.size()); + for (Shape shape : textClippings) + { + path.append(shape, false); + } + state.intersectClippingPath(path); + textClippings = new ArrayList(); + + // PDFBOX-3681: lastClip needs to be reset, because after intersection it is still the same + // object, thus setClip() would believe that it is cached. + lastClip = null; } } @Override - protected void showFontGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, - Vector displacement) throws IOException + protected void showFontGlyph(Matrix textRenderingMatrix, PDFont font, int code, + Vector displacement) throws IOException { AffineTransform at = textRenderingMatrix.createAffineTransform(); at.concatenate(font.getFontMatrix().createAffineTransform()); @@ -353,8 +495,10 @@ private void drawGlyph2D(Glyph2D glyph2D, PDFont font, int code, Vector displace GeneralPath path = glyph2D.getPathForCharacterCode(code); if (path != null) { - // stretch non-embedded glyph if it does not match the width contained in the PDF - if (!font.isEmbedded()) + // Stretch non-embedded glyph if it does not match the height/width contained in the PDF. + // Vertical fonts have zero X displacement, so the following code scales to 0 if we don't skip it. + // TODO: How should vertical fonts be handled? + if (!font.isEmbedded() && !font.isVertical() && !font.isStandard14() && font.hasExplicitWidth(code)) { float fontWidth = font.getWidthFromFont(code); if (fontWidth > 0 && // ignore spaces @@ -373,7 +517,10 @@ private void drawGlyph2D(Glyph2D glyph2D, PDFont font, int code, Vector displace graphics.setComposite(state.getNonStrokingJavaComposite()); graphics.setPaint(getNonStrokingPaint()); setClip(); - graphics.fill(glyph); + if (isContentRendered()) + { + graphics.fill(glyph); + } } if (renderingMode.isStroke()) @@ -382,16 +529,31 @@ private void drawGlyph2D(Glyph2D glyph2D, PDFont font, int code, Vector displace graphics.setPaint(getStrokingPaint()); graphics.setStroke(getStroke()); setClip(); - graphics.draw(glyph); + if (isContentRendered()) + { + graphics.draw(glyph); + } } if (renderingMode.isClip()) { - textClippingArea.add(new Area(glyph)); + textClippings.add(glyph); } } } + @Override + protected void showType3Glyph(Matrix textRenderingMatrix, PDType3Font font, int code, + Vector displacement) throws IOException + { + PDGraphicsState state = getGraphicsState(); + RenderingMode renderingMode = state.getTextState().getRenderingMode(); + if (!RenderingMode.NEITHER.equals(renderingMode)) + { + super.showType3Glyph(textRenderingMatrix, font, code, displacement); + } + } + /** * Provide a Glyph2D for the given font. * @@ -472,47 +634,94 @@ public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) linePath.closePath(); } - /** - * Generates AWT raster for a soft mask - * - * @param softMask soft mask - * @return AWT raster for soft mask - * @throws IOException - */ - private Raster createSoftMaskRaster(PDSoftMask softMask) throws IOException + //TODO: move soft mask apply to getPaint()? + private Paint applySoftMaskToPaint(Paint parentPaint, PDSoftMask softMask) throws IOException { - TransparencyGroup transparencyGroup = new TransparencyGroup(softMask.getGroup(), true); - COSName subtype = softMask.getSubType(); - if (COSName.ALPHA.equals(subtype)) + if (softMask == null || softMask.getGroup() == null) + { + return parentPaint; + } + PDColor backdropColor = null; + if (COSName.LUMINOSITY.equals(softMask.getSubType())) + { + COSArray backdropColorArray = softMask.getBackdropColor(); + if (backdropColorArray != null) + { + PDTransparencyGroup form = softMask.getGroup(); + PDColorSpace colorSpace = form.getGroup().getColorSpace(form.getResources()); + if (colorSpace != null) + { + backdropColor = new PDColor(backdropColorArray, colorSpace); + } + } + } + TransparencyGroup transparencyGroup = new TransparencyGroup(softMask.getGroup(), true, + softMask.getInitialTransformationMatrix(), backdropColor); + BufferedImage image = transparencyGroup.getImage(); + if (image == null) { - return transparencyGroup.getAlphaRaster(); + // Adobe Reader ignores empty softmasks instead of using bc color + // sample file: PDFJS-6967_reduced_outside_softmask.pdf + return parentPaint; } - else if (COSName.LUMINOSITY.equals(subtype)) + BufferedImage gray = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); + if (COSName.ALPHA.equals(softMask.getSubType())) { - return transparencyGroup.getLuminosityRaster(); + gray.setData(image.getAlphaRaster()); + } + else if (COSName.LUMINOSITY.equals(softMask.getSubType())) + { + Graphics g = gray.getGraphics(); + g.drawImage(image, 0, 0, null); + g.dispose(); } else { throw new IOException("Invalid soft mask subtype."); } + gray = adjustImage(gray); + Rectangle2D tpgBounds = transparencyGroup.getBounds(); + adjustRectangle(tpgBounds); + return new SoftMask(parentPaint, gray, tpgBounds, backdropColor, softMask.getTransferFunction()); } - private Paint applySoftMaskToPaint(Paint parentPaint, PDSoftMask softMask) throws IOException + // this adjusts the rectangle to the rotated image to put the soft mask at the correct position + //TODO after all transparency problems have been solved: + // 1. shouldn't this be done in transparencyGroup.getBounds() ? + // 2. change transparencyGroup.getBounds() to getOrigin(), because size isn't used in SoftMask + // 3. Is it possible to create the softmask and transparency group in the correct rotation? + // (needs rendering identity testing before committing!) + private void adjustRectangle(Rectangle2D r) { - if (softMask != null) - { - //TODO PDFBOX-2934 - if (COSName.ALPHA.equals(softMask.getSubType())) - { - LOG.info("alpha smask not implemented yet, is ignored"); - return parentPaint; - } - return new SoftMaskPaint(parentPaint, createSoftMaskRaster(softMask)); - } - else - { - return parentPaint; - } + Matrix m = new Matrix(xform); + float scaleX = Math.abs(m.getScalingFactorX()); + float scaleY = Math.abs(m.getScalingFactorY()); + + AffineTransform adjustedTransform = new AffineTransform(xform); + adjustedTransform.scale(1.0 / scaleX, 1.0 / scaleY); + r.setRect(adjustedTransform.createTransformedShape(r).getBounds2D()); + } + + // returns the image adjusted for applySoftMaskToPaint(). + private BufferedImage adjustImage(BufferedImage gray) + { + AffineTransform at = new AffineTransform(xform); + Matrix m = new Matrix(at); + at.scale(1.0 / Math.abs(m.getScalingFactorX()), 1.0 / Math.abs(m.getScalingFactorY())); + + Rectangle originalBounds = new Rectangle(gray.getWidth(), gray.getHeight()); + Rectangle2D transformedBounds = at.createTransformedShape(originalBounds).getBounds2D(); + at.preConcatenate(AffineTransform.getTranslateInstance(-transformedBounds.getMinX(), + -transformedBounds.getMinY())); + + int width = (int) Math.ceil(transformedBounds.getWidth()); + int height = (int) Math.ceil(transformedBounds.getHeight()); + BufferedImage transformedGray = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY); + + Graphics2D g2 = (Graphics2D) transformedGray.getGraphics(); + g2.drawImage(gray, at, null); + g2.dispose(); + return transformedGray; } // returns the stroking AWT Paint @@ -523,14 +732,23 @@ private Paint getStrokingPaint() throws IOException getGraphicsState().getSoftMask()); } - // returns the non-stroking AWT Paint - private Paint getNonStrokingPaint() throws IOException + /** + * Returns the non-stroking AWT Paint. You may need to call this if you override + * {@link #showGlyph(Matrix, PDFont, int, Vector) showGlyph()}. See + * PDFBOX-5093 for more. + * + * @return The non-stroking AWT Paint. + * @throws IOException + */ + protected final Paint getNonStrokingPaint() throws IOException { - return getPaint(getGraphicsState().getNonStrokingColor()); + return applySoftMaskToPaint( + getPaint(getGraphicsState().getNonStrokingColor()), + getGraphicsState().getSoftMask()); } // create a new stroke based on the current CTM and the current stroke - private BasicStroke getStroke() + private Stroke getStroke() { PDGraphicsState state = getGraphicsState(); @@ -544,30 +762,98 @@ private BasicStroke getStroke() } PDLineDashPattern dashPattern = state.getLineDashPattern(); - int phaseStart = dashPattern.getPhase(); + // PDFBOX-5168: show an all-zero dash array line invisible like Adobe does + // must do it here because getDashArray() sets minimum width because of JVM bugs float[] dashArray = dashPattern.getDashArray(); - if (dashArray != null) + if (isAllZeroDash(dashArray)) + { + return new Stroke() + { + @Override + public Shape createStrokedShape(Shape p) + { + return new Area(); + } + }; + } + float phaseStart = dashPattern.getPhase(); + dashArray = getDashArray(dashPattern); + phaseStart = transformWidth(phaseStart); + + int lineCap = Math.min(2, Math.max(0, state.getLineCap())); + int lineJoin = Math.min(2, Math.max(0, state.getLineJoin())); + float miterLimit = state.getMiterLimit(); + if (miterLimit < 1) + { + LOG.warn("Miter limit must be >= 1, value " + miterLimit + " is ignored"); + miterLimit = 10; + } + return new BasicStroke(lineWidth, lineCap, lineJoin, + miterLimit, dashArray, phaseStart); + } + + private boolean isAllZeroDash(float[] dashArray) + { + if (dashArray.length > 0) { - // apply the CTM for (int i = 0; i < dashArray.length; ++i) { - // minimum line dash width avoids JVM crash, see PDFBOX-2373, PDFBOX-2929, PDFBOX-3204 - float w = transformWidth(dashArray[i]); - if (w != 0) + if (dashArray[i] != 0) { - dashArray[i] = Math.max(w, 0.035f); + return false; } } - phaseStart = (int)transformWidth(phaseStart); + return true; + } + return false; + } - // empty dash array is illegal - if (dashArray.length == 0) + private float[] getDashArray(PDLineDashPattern dashPattern) + { + float[] dashArray = dashPattern.getDashArray(); + int phase = dashPattern.getPhase(); + // avoid empty, infinite and NaN values (PDFBOX-3360) + if (dashArray.length == 0 || Float.isInfinite(phase) || Float.isNaN(phase)) + { + return null; + } + for (int i = 0; i < dashArray.length; ++i) + { + if (Float.isInfinite(dashArray[i]) || Float.isNaN(dashArray[i])) + { + return null; + } + } + if (JAVA_VERSION < 10) + { + float scalingFactorX = new Matrix(xform).getScalingFactorX(); + for (int i = 0; i < dashArray.length; ++i) + { + // apply the CTM + float w = transformWidth(dashArray[i]); + // minimum line dash width avoids JVM crash, + // see PDFBOX-2373, PDFBOX-2929, PDFBOX-3204, PDFBOX-3813 + // also avoid 0 in array like "[ 0 1000 ] 0 d", see PDFBOX-3724 + if (scalingFactorX < 0.5f) + { + // PDFBOX-4492 + dashArray[i] = Math.max(w, 0.2f); + } + else + { + dashArray[i] = Math.max(w, 0.062f); + } + } + } + else + { + for (int i = 0; i < dashArray.length; ++i) { - dashArray = null; + // apply the CTM + dashArray[i] = transformWidth(dashArray[i]); } } - return new BasicStroke(lineWidth, state.getLineCap(), state.getLineJoin(), - state.getMiterLimit(), dashArray, phaseStart); + return dashArray; } @Override @@ -577,7 +863,11 @@ public void strokePath() throws IOException graphics.setPaint(getStrokingPaint()); graphics.setStroke(getStroke()); setClip(); - graphics.draw(linePath); + //TODO bbox of shading pattern should be used here? (see fillPath) + if (isContentRendered()) + { + graphics.draw(linePath); + } linePath.reset(); } @@ -602,16 +892,22 @@ public void fillPath(int windingRule) throws IOException RenderingHints.VALUE_ANTIALIAS_OFF); } + Shape shape; if (!(graphics.getPaint() instanceof Color)) { // apply clip to path to avoid oversized device bounds in shading contexts (PDFBOX-2901) Area area = new Area(linePath); area.intersect(new Area(graphics.getClip())); - graphics.fill(area); + intersectShadingBBox(getGraphicsState().getNonStrokingColor(), area); + shape = area; } else { - graphics.fill(linePath); + shape = linePath; + } + if (isContentRendered()) + { + graphics.fill(shape); } linePath.reset(); @@ -624,6 +920,29 @@ public void fillPath(int windingRule) throws IOException } } + // checks whether this is a shading pattern and if yes, + // get the transformed BBox and intersect with current paint area + // need to do it here and not in shading getRaster() because it may have been rotated + private void intersectShadingBBox(PDColor color, Area area) throws IOException + { + if (color.getColorSpace() instanceof PDPattern) + { + PDColorSpace colorSpace = color.getColorSpace(); + PDAbstractPattern pat = ((PDPattern) colorSpace).getPattern(color); + if (pat instanceof PDShadingPattern) + { + PDShading shading = ((PDShadingPattern) pat).getShading(); + PDRectangle bbox = shading.getBBox(); + if (bbox != null) + { + Matrix m = Matrix.concatenate(getInitialMatrix(), pat.getMatrix()); + Area bboxArea = new Area(bbox.transform(m)); + area.intersect(bboxArea); + } + } + } + } + /** * Returns true if the given path is rectangular. */ @@ -669,6 +988,9 @@ private boolean isRectangular(GeneralPath path) case PathIterator.SEG_CLOSE: break; + + default: + break; } iter.next(); } @@ -740,7 +1062,17 @@ public void endPath() if (clipWindingRule != -1) { linePath.setWindingRule(clipWindingRule); - getGraphicsState().intersectClippingPath(linePath); + + if (!linePath.getPathIterator(null).isDone()) + { + // PDFBOX-4949 / PDF.js 12306: don't clip if "W n" only + getGraphicsState().intersectClippingPath(linePath); + } + + // PDFBOX-3836: lastClip needs to be reset, because after intersection it is still the same + // object, thus setClip() would believe that it is cached. + lastClip = null; + clipWindingRule = -1; } linePath.reset(); @@ -749,18 +1081,38 @@ public void endPath() @Override public void drawImage(PDImage pdImage) throws IOException { + if (pdImage instanceof PDImageXObject && + isHiddenOCG(((PDImageXObject) pdImage).getOptionalContent())) + { + return; + } + if (!isContentRendered()) + { + return; + } Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); AffineTransform at = ctm.createAffineTransform(); if (!pdImage.getInterpolate()) { - boolean isScaledUp = pdImage.getWidth() < Math.round(at.getScaleX()) || - pdImage.getHeight() < Math.round(at.getScaleY()); - // if the image is scaled down, we use smooth interpolation, eg PDFBOX-2364 // only when scaled up do we use nearest neighbour, eg PDFBOX-2302 / mori-cvpr01.pdf - // stencils are excluded from this rule (see survey.pdf) - if (isScaledUp || pdImage.isStencil()) + // PDFBOX-4930: we use the sizes of the ARGB image. These can be different + // than the original sizes of the base image, when the mask is bigger. + // PDFBOX-5091: also consider subsampling, the sizes are different too. + BufferedImage bim; + if (subsamplingAllowed) + { + bim = pdImage.getImage(null, getSubsampling(pdImage, at)); + } + else + { + bim = pdImage.getImage(); + } + boolean isScaledUp = bim.getWidth() < Math.round(at.getScaleX()) || + bim.getHeight() < Math.round(at.getScaleY()); + + if (isScaledUp) { graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR); @@ -769,16 +1121,106 @@ public void drawImage(PDImage pdImage) throws IOException if (pdImage.isStencil()) { - // fill the image with paint - BufferedImage image = pdImage.getStencilImage(getNonStrokingPaint()); + if (getGraphicsState().getNonStrokingColor().getColorSpace() instanceof PDPattern) + { + // The earlier code for stencils (see "else") doesn't work with patterns because the + // CTM is not taken into consideration. + // this code is based on the fact that it is easily possible to draw the mask and + // the paint at the correct place with the existing code, but not in one step. + // Thus what we do is to draw both in separate images, then combine the two and draw + // the result. + // Note that the device scale is not used. In theory, some patterns can get better + // at higher resolutions but the stencil would become more and more "blocky". + // If anybody wants to do this, have a look at the code in showTransparencyGroup(). + + // draw the paint + Paint paint = getNonStrokingPaint(); + Rectangle2D unitRect = new Rectangle2D.Float(0, 0, 1, 1); + Rectangle2D bounds = at.createTransformedShape(unitRect).getBounds2D(); + GraphicsConfiguration deviceConfiguration = graphics.getDeviceConfiguration(); + int w; + int h; + if (deviceConfiguration != null && deviceConfiguration.getBounds() != null) + { + // PDFBOX-4690: bounds doesn't need to be larger than device bounds (OOM risk) + Rectangle deviceBounds = deviceConfiguration.getBounds(); + w = (int) Math.ceil(Math.min(bounds.getWidth(), deviceBounds.getWidth())); + h = (int) Math.ceil(Math.min(bounds.getHeight(), deviceBounds.getHeight())); + } + else + { + w = (int) Math.ceil(bounds.getWidth()); + h = (int) Math.ceil(bounds.getHeight()); + } + BufferedImage renderedPaint = new BufferedImage(w, h, BufferedImage.TYPE_INT_ARGB); + Graphics2D g = (Graphics2D) renderedPaint.getGraphics(); + g.translate(-bounds.getMinX(), -bounds.getMinY()); + g.setPaint(paint); + g.setRenderingHints(graphics.getRenderingHints()); + g.fill(bounds); + g.dispose(); + + // draw the mask + BufferedImage mask = pdImage.getImage(); + BufferedImage renderedMask = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); + g = (Graphics2D) renderedMask.getGraphics(); + g.translate(-bounds.getMinX(), -bounds.getMinY()); + AffineTransform imageTransform = new AffineTransform(at); + imageTransform.scale(1.0 / mask.getWidth(), -1.0 / mask.getHeight()); + imageTransform.translate(0, -mask.getHeight()); + g.setRenderingHints(graphics.getRenderingHints()); + g.drawImage(mask, imageTransform, null); + g.dispose(); + + // apply the mask + final int[] transparent = new int[4]; + int[] alphaPixel = null; + WritableRaster raster = renderedPaint.getRaster(); + WritableRaster alpha = renderedMask.getRaster(); + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + alphaPixel = alpha.getPixel(x, y, alphaPixel); + if (alphaPixel[0] == 255) + { + raster.setPixel(x, y, transparent); + } + } + } - // draw the image - drawBufferedImage(image, at); + // draw the image + setClip(); + graphics.setComposite(getGraphicsState().getNonStrokingJavaComposite()); + if (isContentRendered()) + { + graphics.drawImage(renderedPaint, + AffineTransform.getTranslateInstance(bounds.getMinX(), bounds.getMinY()), + null); + } + } + else + { + // fill the image with stenciled paint + BufferedImage image = pdImage.getStencilImage(getNonStrokingPaint()); + + // draw the image + drawBufferedImage(image, at); + } } else { - // draw the image - drawBufferedImage(pdImage.getImage(), at); + if (subsamplingAllowed) + { + int subsampling = getSubsampling(pdImage, at); + // draw the subsampled image + drawBufferedImage(pdImage.getImage(null, subsampling), at); + } + else + { + // subsampling not allowed, draw the image + drawBufferedImage(pdImage.getImage(), at); + } } if (!pdImage.getInterpolate()) @@ -789,23 +1231,60 @@ public void drawImage(PDImage pdImage) throws IOException } } + /** + * Calculated the subsampling frequency for a given PDImage based on the current transformation + * and its calculated transform + * + * @param pdImage PDImage to be drawn + * @param at Transform that will be applied to the image when drawing + * @return The rounded-down ratio of image pixels to drawn pixels. Returned value will always be + * >=1. + */ + private int getSubsampling(PDImage pdImage, AffineTransform at) + { + // calculate subsampling according to the resulting image size + double scale = Math.abs(at.getDeterminant() * xform.getDeterminant()); + + int subsampling = (int) Math.floor(Math.sqrt(pdImage.getWidth() * pdImage.getHeight() / scale)); + if (subsampling > 8) + { + subsampling = 8; + } + if (subsampling < 1) + { + subsampling = 1; + } + if (subsampling > pdImage.getWidth() || subsampling > pdImage.getHeight()) + { + // For very small images it is possible that the subsampling would imply 0 size. + // To avoid problems, the subsampling is set to no less than the smallest dimension. + subsampling = Math.min(pdImage.getWidth(), pdImage.getHeight()); + } + return subsampling; + } + private void drawBufferedImage(BufferedImage image, AffineTransform at) throws IOException { graphics.setComposite(getGraphicsState().getNonStrokingJavaComposite()); setClip(); + AffineTransform imageTransform = new AffineTransform(at); + int width = image.getWidth(); + int height = image.getHeight(); + imageTransform.scale(1.0 / width, -1.0 / height); + imageTransform.translate(0, -height); + PDSoftMask softMask = getGraphicsState().getSoftMask(); if( softMask != null ) { - AffineTransform imageTransform = new AffineTransform(at); - imageTransform.scale(1, -1); - imageTransform.translate(0, -1); - Paint awtPaint = new TexturePaint(image, - new Rectangle2D.Double(imageTransform.getTranslateX(), imageTransform.getTranslateY(), - imageTransform.getScaleX(), imageTransform.getScaleY())); + Rectangle2D rectangle = new Rectangle2D.Float(0, 0, width, height); + Paint awtPaint = new TexturePaint(image, rectangle); awtPaint = applySoftMaskToPaint(awtPaint, softMask); graphics.setPaint(awtPaint); - Rectangle2D unitRect = new Rectangle2D.Float(0, 0, 1, 1); - graphics.fill(at.createTransformedShape(unitRect)); + + AffineTransform originalTransform = graphics.getTransform(); + graphics.transform(imageTransform); + graphics.fill(rectangle); + graphics.setTransform(originalTransform); } else { @@ -815,12 +1294,46 @@ private void drawBufferedImage(BufferedImage image, AffineTransform at) throws I image = applyTransferFunction(image, transfer); } - int width = image.getWidth(null); - int height = image.getHeight(null); - AffineTransform imageTransform = new AffineTransform(at); - imageTransform.scale(1.0 / width, -1.0 / height); - imageTransform.translate(0, -height); - graphics.drawImage(image, imageTransform, null); + // PDFBOX-4516, PDFBOX-4527, PDFBOX-4815, PDFBOX-4886, PDFBOX-4863: + // graphics.drawImage() has terrible quality when scaling down, even when + // RenderingHints.VALUE_INTERPOLATION_BICUBIC, VALUE_ALPHA_INTERPOLATION_QUALITY, + // VALUE_COLOR_RENDER_QUALITY and VALUE_RENDER_QUALITY are all set. + // A workaround is to get a pre-scaled image with Image.getScaledInstance() + // and then draw that one. To reduce differences in testing + // (partly because the method needs integer parameters), only smaller scalings + // will trigger the workaround. Because of the slowness we only do it if the user + // expects quality rendering and interpolation. + Matrix imageTransformMatrix = new Matrix(imageTransform); + AffineTransform graphicsTransformA = graphics.getTransform(); + Matrix graphicsTransformMatrix = new Matrix(graphicsTransformA); + float scaleX = Math.abs(imageTransformMatrix.getScalingFactorX() * graphicsTransformMatrix.getScalingFactorX()); + float scaleY = Math.abs(imageTransformMatrix.getScalingFactorY() * graphicsTransformMatrix.getScalingFactorY()); + + if ((scaleX < imageDownscalingOptimizationThreshold || scaleY < imageDownscalingOptimizationThreshold) && + RenderingHints.VALUE_RENDER_QUALITY.equals(graphics.getRenderingHint(RenderingHints.KEY_RENDERING)) && + RenderingHints.VALUE_INTERPOLATION_BICUBIC.equals(graphics.getRenderingHint(RenderingHints.KEY_INTERPOLATION))) + { + int w = Math.round(image.getWidth() * scaleX); + int h = Math.round(image.getHeight() * scaleY); + if (w < 1 || h < 1) + { + graphics.drawImage(image, imageTransform, null); + return; + } + Image imageToDraw = image.getScaledInstance(w, h, Image.SCALE_SMOOTH); + // remove the scale (extracted from w and h, to have it from the rounded values + // hoping to reverse the rounding: without this, we get an horizontal line + // when rendering PDFJS-8860-Pattern-Size1.pdf at 100% ) + imageTransform.scale(1f / w * image.getWidth(), 1f / h * image.getHeight()); + imageTransform.preConcatenate(graphicsTransformA); + graphics.setTransform(new AffineTransform()); + graphics.drawImage(imageToDraw, imageTransform, null); + graphics.setTransform(graphicsTransformA); + } + else + { + graphics.drawImage(image, imageTransform, null); + } } } @@ -838,8 +1351,12 @@ private BufferedImage applyTransferFunction(BufferedImage image, COSBase transfe // prepare transfer functions (either one per color or one for all) // and maps (actually arrays[256] to be faster) to avoid calculating values several times - Integer rMap[], gMap[], bMap[]; - PDFunction rf, gf, bf; + Integer[] rMap; + Integer[] gMap; + Integer[] bMap; + PDFunction rf; + PDFunction gf; + PDFunction bf; if (transfer instanceof COSArray) { COSArray ar = (COSArray) transfer; @@ -861,7 +1378,7 @@ private BufferedImage applyTransferFunction(BufferedImage image, COSBase transfe } // apply the transfer function to each color, but keep alpha - float input[] = new float[1]; + float[] input = new float[1]; for (int x = 0; x < image.getWidth(); ++x) { for (int y = 0; y < image.getHeight(); ++y) @@ -870,7 +1387,9 @@ private BufferedImage applyTransferFunction(BufferedImage image, COSBase transfe int ri = (rgb >> 16) & 0xFF; int gi = (rgb >> 8) & 0xFF; int bi = rgb & 0xFF; - int ro, go, bo; + int ro; + int go; + int bo; if (rMap[ri] != null) { ro = rMap[ri]; @@ -911,23 +1430,66 @@ private BufferedImage applyTransferFunction(BufferedImage image, COSBase transfe public void shadingFill(COSName shadingName) throws IOException { PDShading shading = getResources().getShading(shadingName); + if (shading == null) + { + LOG.error("shading " + shadingName + " does not exist in resources dictionary"); + return; + } Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); Paint paint = shading.toPaint(ctm); + paint = applySoftMaskToPaint(paint, getGraphicsState().getSoftMask()); graphics.setComposite(getGraphicsState().getNonStrokingJavaComposite()); graphics.setPaint(paint); graphics.setClip(null); lastClip = null; - graphics.fill(getGraphicsState().getCurrentClippingPath()); + + // get the transformed BBox and intersect with current clipping path + // need to do it here and not in shading getRaster() because it may have been rotated + PDRectangle bbox = shading.getBBox(); + Area area; + if (bbox != null) + { + area = new Area(bbox.transform(ctm)); + area.intersect(getGraphicsState().getCurrentClippingPath()); + } + else + { + Rectangle2D bounds = shading.getBounds(new AffineTransform(), ctm); + if (bounds != null) + { + bounds.add(new Point2D.Double(Math.floor(bounds.getMinX() - 1), + Math.floor(bounds.getMinY() - 1))); + bounds.add(new Point2D.Double(Math.ceil(bounds.getMaxX() + 1), + Math.ceil(bounds.getMaxY() + 1))); + area = new Area(bounds); + area.intersect(getGraphicsState().getCurrentClippingPath()); + } + else + { + area = getGraphicsState().getCurrentClippingPath(); + } + } + if (isContentRendered()) + { + graphics.fill(area); + } } @Override public void showAnnotation(PDAnnotation annotation) throws IOException { lastClip = null; - //TODO support more annotation flags (Invisible, NoZoom, NoRotate) - // Example for NoZoom can be found in p5 of PDFBOX-2348 - int deviceType = graphics.getDeviceConfiguration().getDevice().getType(); + int deviceType = -1; + GraphicsConfiguration graphicsConfiguration = graphics.getDeviceConfiguration(); + if (graphicsConfiguration != null) + { + GraphicsDevice graphicsDevice = graphicsConfiguration.getDevice(); + if (graphicsDevice != null) + { + deviceType = graphicsDevice.getType(); + } + } if (deviceType == GraphicsDevice.TYPE_PRINTER && !annotation.isPrinted()) { return; @@ -940,179 +1502,77 @@ public void showAnnotation(PDAnnotation annotation) throws IOException { return; } - super.showAnnotation(annotation); - - if (annotation.getAppearance() == null) + if (annotation.isInvisible() && annotation instanceof PDAnnotationUnknown) { - if (annotation instanceof PDAnnotationLink) - { - drawAnnotationLinkBorder((PDAnnotationLink) annotation); - } - - if (annotation instanceof PDAnnotationMarkup && annotation.getSubtype().equals(PDAnnotationMarkup.SUB_TYPE_INK)) - { - drawAnnotationInk((PDAnnotationMarkup) annotation); - } + // "If set, do not display the annotation if it does not belong to one + // of the standard annotation types and no annotation handler is available." + return; } - } + //TODO support NoZoom, example can be found in p5 of PDFBOX-2348 - private static class AnnotationBorder - { - private float[] dashArray = null; - private boolean underline = false; - private float width = 0; - private PDColor color; - } - - // return border info. BorderStyle must be provided as parameter because - // method is not available in the base class - private AnnotationBorder getAnnotationBorder(PDAnnotation annotation, - PDBorderStyleDictionary borderStyle) - { - AnnotationBorder ab = new AnnotationBorder(); - COSArray border = annotation.getBorder(); - if (borderStyle == null) + if (isHiddenOCG(annotation.getOptionalContent())) { - if (border.get(2) instanceof COSNumber) - { - ab.width = ((COSNumber) border.getObject(2)).floatValue(); - } - if (border.size() > 3) - { - COSBase base3 = border.getObject(3); - if (base3 instanceof COSArray) - { - ab.dashArray = ((COSArray) base3).toFloatArray(); - } - } + return; } - else + + PDAppearanceDictionary appearance = annotation.getAppearance(); + if (appearance == null || appearance.getNormalAppearance() == null) { - ab.width = borderStyle.getWidth(); - if (borderStyle.getStyle().equals(PDBorderStyleDictionary.STYLE_DASHED)) - { - ab.dashArray = borderStyle.getDashStyle().getDashArray(); - } - if (borderStyle.getStyle().equals(PDBorderStyleDictionary.STYLE_UNDERLINE)) - { - ab.underline = true; - } + annotation.constructAppearances(renderer.document); } - ab.color = annotation.getColor(); - if (ab.color == null) + + if (annotation.isNoRotate() && getCurrentPage().getRotation() != 0) { - // spec is unclear, but black seems to be the right thing to do - ab.color = new PDColor(new float[] { 0 }, PDDeviceGray.INSTANCE); + PDRectangle rect = annotation.getRectangle(); + AffineTransform savedTransform = graphics.getTransform(); + // "The upper-left corner of the annotation remains at the same point in + // default user space; the annotation pivots around that point." + graphics.rotate(Math.toRadians(getCurrentPage().getRotation()), + rect.getLowerLeftX(), rect.getUpperRightY()); + super.showAnnotation(annotation); + graphics.setTransform(savedTransform); } - if (ab.dashArray != null) + else { - boolean allZero = true; - for (float f : ab.dashArray) - { - if (f != 0) - { - allZero = false; - break; - } - } - if (allZero) - { - ab.dashArray = null; - } + super.showAnnotation(annotation); } - return ab; } - private void drawAnnotationLinkBorder(PDAnnotationLink link) throws IOException + /** + * {@inheritDoc} + */ + @Override + public void showForm(PDFormXObject form) throws IOException { - AnnotationBorder ab = getAnnotationBorder(link, link.getBorderStyle()); - if (ab.width == 0) + if (isHiddenOCG(form.getOptionalContent())) { return; } - PDRectangle rectangle = link.getRectangle(); - Stroke oldStroke = graphics.getStroke(); - graphics.setPaint(getPaint(ab.color)); - BasicStroke stroke = new BasicStroke(ab.width, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER, 10, ab.dashArray, 0); - graphics.setStroke(stroke); - graphics.setClip(null); - if (ab.underline) - { - graphics.drawLine((int) rectangle.getLowerLeftX(), (int) rectangle.getLowerLeftY(), - (int) (rectangle.getLowerLeftX() + rectangle.getWidth()), (int) rectangle.getLowerLeftY()); - } - else + if (isContentRendered()) { - graphics.drawRect((int) rectangle.getLowerLeftX(), (int) rectangle.getLowerLeftY(), - (int) rectangle.getWidth(), (int) rectangle.getHeight()); + super.showForm(form); } - graphics.setStroke(oldStroke); } - private void drawAnnotationInk(PDAnnotationMarkup inkAnnotation) throws IOException + @Override + public void showTransparencyGroup(PDTransparencyGroup form) throws IOException { - if (!inkAnnotation.getCOSObject().containsKey(COSName.INKLIST)) + if (isHiddenOCG(form.getOptionalContent())) { return; } - //TODO there should be an InkAnnotation class with a getInkList method - COSBase base = inkAnnotation.getCOSObject().getDictionaryObject(COSName.INKLIST); - if (!(base instanceof COSArray)) + if (!isContentRendered()) { return; } - // PDF spec does not mention /Border for ink annotations, but it is used if /BS is not available - AnnotationBorder ab = getAnnotationBorder(inkAnnotation, inkAnnotation.getBorderStyle()); - if (ab.width == 0) + TransparencyGroup group = + new TransparencyGroup(form, false, getGraphicsState().getCurrentTransformationMatrix(), null); + BufferedImage image = group.getImage(); + if (image == null) { + // image is empty, don't bother return; } - graphics.setPaint(getPaint(ab.color)); - Stroke oldStroke = graphics.getStroke(); - BasicStroke stroke = - new BasicStroke(ab.width, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER, 10, ab.dashArray, 0); - graphics.setStroke(stroke); - graphics.setClip(null); - COSArray pathsArray = (COSArray) base; - for (COSBase baseElement : (Iterable) pathsArray.toList()) - { - if (!(baseElement instanceof COSArray)) - { - continue; - } - COSArray pathArray = (COSArray) baseElement; - int nPoints = pathArray.size() / 2; - - // "When drawn, the points shall be connected by straight lines or curves - // in an implementation-dependent way" - we do lines. - GeneralPath path = new GeneralPath(); - for (int i = 0; i < nPoints; ++i) - { - COSBase bx = pathArray.getObject(i * 2); - COSBase by = pathArray.getObject(i * 2 + 1); - if (bx instanceof COSNumber && by instanceof COSNumber) - { - float x = ((COSNumber) bx).floatValue(); - float y = ((COSNumber) by).floatValue(); - if (i == 0) - { - path.moveTo(x, y); - } - else - { - path.lineTo(x, y); - } - } - } - graphics.draw(path); - } - graphics.setStroke(oldStroke); - } - - @Override - public void showTransparencyGroup(PDTransparencyGroup form) throws IOException - { - TransparencyGroup group = new TransparencyGroup(form, false); graphics.setComposite(getGraphicsState().getNonStrokingJavaComposite()); setClip(); @@ -1120,29 +1580,60 @@ public void showTransparencyGroup(PDTransparencyGroup form) throws IOException // both the DPI xform and the CTM were already applied to the group, so all we do // here is draw it directly onto the Graphics2D device at the appropriate position PDRectangle bbox = group.getBBox(); - AffineTransform prev = graphics.getTransform(); - float x = bbox.getLowerLeftX(); - float y = pageSize.getHeight() - bbox.getLowerLeftY() - bbox.getHeight(); - graphics.setTransform(AffineTransform.getTranslateInstance(x * xform.getScaleX(), - y * xform.getScaleY())); + AffineTransform savedTransform = graphics.getTransform(); + + Matrix m = new Matrix(xform); + float xScale = Math.abs(m.getScalingFactorX()); + float yScale = Math.abs(m.getScalingFactorY()); + + AffineTransform transform = new AffineTransform(xform); + transform.scale(1.0 / xScale, 1.0 / yScale); + graphics.setTransform(transform); + + // adjust bbox (x,y) position at the initial scale + cropbox + float x = bbox.getLowerLeftX() - pageSize.getLowerLeftX(); + float y = pageSize.getUpperRightY() - bbox.getUpperRightY(); + + if (flipTG) + { + graphics.translate(0, image.getHeight()); + graphics.scale(1, -1); + } + else + { + graphics.translate(x * xScale, y * yScale); + } PDSoftMask softMask = getGraphicsState().getSoftMask(); if (softMask != null) { - BufferedImage image = group.getImage(); Paint awtPaint = new TexturePaint(image, new Rectangle2D.Float(0, 0, image.getWidth(), image.getHeight())); - awtPaint = applySoftMaskToPaint(awtPaint, softMask); // todo: PDFBOX-994 problem here? + awtPaint = applySoftMaskToPaint(awtPaint, softMask); graphics.setPaint(awtPaint); - graphics.fill(new Rectangle2D.Float(0, 0, bbox.getWidth() * (float)xform.getScaleX(), - bbox.getHeight() * (float)xform.getScaleY())); + if (isContentRendered()) + { + graphics.fill( + new Rectangle2D.Float(0, 0, bbox.getWidth() * xScale, bbox.getHeight() * yScale)); + } } else { - graphics.drawImage(group.getImage(), null, null); + if (isContentRendered()) + { + try + { + graphics.drawImage(image, null, null); + } + catch (InternalError ie) + { + LOG.error("Exception drawing image, see JDK-6689349, " + + "try rendering into a BufferedImage instead", ie); + } + } } - graphics.setTransform(prev); + graphics.setTransform(savedTransform); } /** @@ -1155,19 +1646,33 @@ private final class TransparencyGroup private final int minX; private final int minY; + private final int maxX; + private final int maxY; private final int width; private final int height; + private final float scaleX; + private final float scaleY; /** * Creates a buffered image for a transparency group result. + * + * @param form the transparency group of the form or soft mask. + * @param isSoftMask true if this is a soft mask. + * @param ctm the relevant current transformation matrix. For soft masks, this is the CTM at + * the time the soft mask is set (not at the time the soft mask is used for fill/stroke!), + * for forms, this is the CTM at the time the form is invoked. + * @param backdropColor the color according to the /bc entry to be used for luminosity soft + * masks. + * @throws IOException */ - private TransparencyGroup(PDTransparencyGroup form, boolean isSoftMask) throws IOException + private TransparencyGroup(PDTransparencyGroup form, boolean isSoftMask, Matrix ctm, + PDColor backdropColor) throws IOException { - Graphics2D g2dOriginal = graphics; - Area lastClipOriginal = lastClip; + Graphics2D savedGraphics = graphics; + Area savedLastClip = lastClip; + Shape savedInitialClip = initialClip; // get the CTM x Form Matrix transform - Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); Matrix transform = Matrix.concatenate(ctm, form.getMatrix()); // transform the bbox @@ -1177,35 +1682,122 @@ private TransparencyGroup(PDTransparencyGroup form, boolean isSoftMask) throws I Area clip = (Area)getGraphicsState().getCurrentClippingPath().clone(); clip.intersect(new Area(transformedBox)); Rectangle2D clipRect = clip.getBounds2D(); + Matrix m = new Matrix(xform); + scaleX = Math.abs(m.getScalingFactorX()); + scaleY = Math.abs(m.getScalingFactorY()); + if (clipRect.isEmpty()) + { + image = null; + bbox = null; + minX = 0; + minY = 0; + maxX = 0; + maxY = 0; + width = 0; + height = 0; + return; + } this.bbox = new PDRectangle((float)clipRect.getX(), (float)clipRect.getY(), (float)clipRect.getWidth(), (float)clipRect.getHeight()); // apply the underlying Graphics2D device's DPI transform - Shape deviceClip = xform.createTransformedShape(clip); - Rectangle2D bounds = deviceClip.getBounds2D(); + AffineTransform dpiTransform = AffineTransform.getScaleInstance(scaleX, scaleY); + Rectangle2D bounds = dpiTransform.createTransformedShape(clip.getBounds2D()).getBounds2D(); minX = (int) Math.floor(bounds.getMinX()); minY = (int) Math.floor(bounds.getMinY()); - int maxX = (int) Math.floor(bounds.getMaxX()) + 1; - int maxY = (int) Math.floor(bounds.getMaxY()) + 1; + maxX = (int) Math.floor(bounds.getMaxX()) + 1; + maxY = (int) Math.floor(bounds.getMaxY()) + 1; width = maxX - minX; height = maxY - minY; - image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); // FIXME - color space + // FIXME - color space + if (isGray(form.getGroup().getColorSpace(form.getResources()))) + { + image = create2ByteGrayAlphaImage(width, height); + } + else + { + image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); + } + + boolean needsBackdrop = !isSoftMask && !form.getGroup().isIsolated() && + hasBlendMode(form, new HashSet()); + BufferedImage backdropImage = null; + // Position of this group in parent group's coordinates + int backdropX = 0; + int backdropY = 0; + if (needsBackdrop) + { + if (transparencyGroupStack.isEmpty()) + { + // Use the current page as the parent group. + backdropImage = renderer.getPageImage(); + if (backdropImage == null) + { + needsBackdrop = false; + } + else + { + backdropX = minX; + backdropY = backdropImage.getHeight() - maxY; + } + } + else + { + TransparencyGroup parentGroup = transparencyGroupStack.peek(); + backdropImage = parentGroup.image; + backdropX = minX - parentGroup.minX; + backdropY = parentGroup.maxY - maxY; + } + } + Graphics2D g = image.createGraphics(); + if (needsBackdrop) + { + // backdropImage must be included in group image but not in group alpha. + g.drawImage(backdropImage, 0, 0, width, height, + backdropX, backdropY, backdropX + width, backdropY + height, null); + g = new GroupGraphics(image, g); + } + if (isSoftMask && backdropColor != null) + { + // "If the subtype is Luminosity, the transparency group XObject G shall be + // composited with a fully opaque backdrop whose colour is everywhere defined + // by the soft-mask dictionary's BC entry." + g.setBackground(new Color(backdropColor.toRGB())); + g.clearRect(0, 0, width, height); + } // flip y-axis - g.translate(0, height); + g.translate(0, image.getHeight()); g.scale(1, -1); + boolean savedFlipTG = flipTG; + flipTG = false; + // apply device transform (DPI) - g.transform(xform); + // the initial translation is ignored, because we're not writing into the initial graphics device + g.transform(dpiTransform); + + AffineTransform xformOriginal = xform; + xform = AffineTransform.getScaleInstance(scaleX, scaleY); + PDRectangle pageSizeOriginal = pageSize; + pageSize = new PDRectangle(minX / scaleX, + minY / scaleY, + (float) bounds.getWidth() / scaleX, + (float) bounds.getHeight() / scaleY); + int clipWindingRuleOriginal = clipWindingRule; + clipWindingRule = -1; + GeneralPath linePathOriginal = linePath; + linePath = new GeneralPath(); // adjust the origin g.translate(-clipRect.getX(), -clipRect.getY()); graphics = g; + setRenderingHints(); try { if (isSoftMask) @@ -1214,17 +1806,78 @@ private TransparencyGroup(PDTransparencyGroup form, boolean isSoftMask) throws I } else { + transparencyGroupStack.push(this); processTransparencyGroup(form); + if (!transparencyGroupStack.isEmpty()) + { + transparencyGroupStack.pop(); + } } } finally { - lastClip = lastClipOriginal; + flipTG = savedFlipTG; + lastClip = savedLastClip; graphics.dispose(); - graphics = g2dOriginal; + graphics = savedGraphics; + initialClip = savedInitialClip; + clipWindingRule = clipWindingRuleOriginal; + linePath = linePathOriginal; + pageSize = pageSizeOriginal; + xform = xformOriginal; + + if (needsBackdrop) + { + ((GroupGraphics) g).removeBackdrop(backdropImage, backdropX, backdropY); + } } } + // http://stackoverflow.com/a/21181943/535646 + private BufferedImage create2ByteGrayAlphaImage(int width, int height) + { + // gray + alpha + int[] bandOffsets = new int[] {1, 0}; + int bands = bandOffsets.length; + + // Color Model used for raw GRAY + ALPHA + final ColorModel CM_GRAY_ALPHA + = new ComponentColorModel( + ColorSpace.getInstance(ColorSpace.CS_GRAY), + true, false, Transparency.TRANSLUCENT, DataBuffer.TYPE_BYTE); + + // Init data buffer of type byte + DataBuffer buffer = new DataBufferByte(width * height * bands); + + // Wrap the data buffer in a raster + WritableRaster raster = + Raster.createInterleavedRaster(buffer, width, height, + width * bands, bands, bandOffsets, new Point(0, 0)); + + // Create a custom BufferedImage with the raster and a suitable color model + return new BufferedImage(CM_GRAY_ALPHA, raster, false, null); + } + + private boolean isGray(PDColorSpace colorSpace) + { + if (colorSpace instanceof PDDeviceGray) + { + return true; + } + if (colorSpace instanceof PDICCBased) + { + try + { + return ((PDICCBased) colorSpace).getAlternateColorSpace() instanceof PDDeviceGray; + } + catch (IOException ex) + { + return false; + } + } + return false; + } + public BufferedImage getImage() { return image; @@ -1235,19 +1888,212 @@ public PDRectangle getBBox() return bbox; } - public Raster getAlphaRaster() + public Rectangle2D getBounds() + { + Point2D size = new Point2D.Double(pageSize.getWidth(), pageSize.getHeight()); + // apply the underlying Graphics2D device's DPI transform and y-axis flip + AffineTransform dpiTransform = AffineTransform.getScaleInstance(scaleX, scaleY); + size = dpiTransform.transform(size, size); + // Flip y + return new Rectangle2D.Double(minX - pageSize.getLowerLeftX() * scaleX, + size.getY() - minY - height + pageSize.getLowerLeftY() * scaleY, + width, height); + } + } + + private boolean hasBlendMode(PDTransparencyGroup group, Set groupsDone) + { + if (groupsDone.contains(group.getCOSObject())) { - return image.getAlphaRaster(); + // The group was already processed. Avoid endless recursion. + return false; } + groupsDone.add(group.getCOSObject()); - public Raster getLuminosityRaster() + PDResources resources = group.getResources(); + if (resources == null) { - BufferedImage gray = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY); - Graphics g = gray.getGraphics(); - g.drawImage(image, 0, 0, null); - g.dispose(); + return false; + } + for (COSName name : resources.getExtGStateNames()) + { + PDExtendedGraphicsState extGState = resources.getExtGState(name); + if (extGState == null) + { + continue; + } + BlendMode blendMode = extGState.getBlendMode(); + if (blendMode != BlendMode.NORMAL) + { + return true; + } + } + + // Recursively process nested transparency groups + for (COSName name : resources.getXObjectNames()) + { + PDXObject xObject; + try + { + xObject = resources.getXObject(name); + } + catch (IOException ex) + { + continue; + } + if (xObject instanceof PDTransparencyGroup && + hasBlendMode((PDTransparencyGroup)xObject, groupsDone)) + { + return true; + } + } + + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public void beginMarkedContentSequence(COSName tag, COSDictionary properties) + { + if (nestedHiddenOCGCount > 0) + { + nestedHiddenOCGCount++; + return; + } + if (tag == null || getPage().getResources() == null) + { + return; + } + if (isHiddenOCG(getPage().getResources().getProperties(tag))) + { + nestedHiddenOCGCount = 1; + } + } + + /** + * {@inheritDoc} + */ + @Override + public void endMarkedContentSequence() + { + if (nestedHiddenOCGCount > 0) + { + nestedHiddenOCGCount--; + } + } + + private boolean isContentRendered() + { + return nestedHiddenOCGCount <= 0; + } + + private boolean isHiddenOCG(PDPropertyList propertyList) + { + if (propertyList instanceof PDOptionalContentGroup) + { + PDOptionalContentGroup group = (PDOptionalContentGroup) propertyList; + RenderState printState = group.getRenderState(destination); + if (printState == null) + { + if (!getRenderer().isGroupEnabled(group)) + { + return true; + } + } + else if (RenderState.OFF.equals(printState)) + { + return true; + } + } + else if (propertyList instanceof PDOptionalContentMembershipDictionary) + { + return isHiddenOCMD((PDOptionalContentMembershipDictionary) propertyList); + } + return false; + } - return gray.getRaster(); + private boolean isHiddenOCMD(PDOptionalContentMembershipDictionary ocmd) + { + if (ocmd.getCOSObject().getCOSArray(COSName.VE) != null) + { + // support seems to be optional, and is approximated by /P and /OCGS + LOG.info("/VE entry ignored in Optional Content Membership Dictionary"); + } + List visibles = new ArrayList(); + for (PDPropertyList prop : ocmd.getOCGs()) + { + visibles.add(!isHiddenOCG(prop)); + } + COSName visibilityPolicy = ocmd.getVisibilityPolicy(); + // visible if any of the entries in OCGs are OFF + if (COSName.ANY_OFF.equals(visibilityPolicy)) + { + for (boolean visible : visibles) + { + if (!visible) + { + return false; + } + } + return true; + } + // visible only if all of the entries in OCGs are ON + if (COSName.ALL_ON.equals(visibilityPolicy)) + { + for (boolean visible : visibles) + { + if (!visible) + { + return true; + } + } + return false; + } + // visible only if all of the entries in OCGs are OFF + if (COSName.ALL_OFF.equals(visibilityPolicy)) + { + for (boolean visible : visibles) + { + if (visible) + { + return true; + } + } + return false; + } + // visible if any of the entries in OCGs are ON + // AnyOn is default + for (boolean visible : visibles) + { + if (visible) + { + return false; + } + } + return true; + } + + private static int getJavaVersion() + { + // strategy from lucene-solr/lucene/core/src/java/org/apache/lucene/util/Constants.java + String version = System.getProperty("java.specification.version"); + final StringTokenizer st = new StringTokenizer(version, "."); + try + { + int major = Integer.parseInt(st.nextToken()); + int minor = 0; + if (st.hasMoreTokens()) + { + minor = Integer.parseInt(st.nextToken()); + } + return major == 1 ? minor : major; + } + catch (NumberFormatException nfe) + { + // maybe some new numbering scheme in the 22nd century + return 0; } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawerParameters.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawerParameters.java index e1ccb8099b3..19b7dc39e8c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawerParameters.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawerParameters.java @@ -17,6 +17,8 @@ package org.apache.pdfbox.rendering; +import java.awt.RenderingHints; + import org.apache.pdfbox.pdmodel.PDPage; /** @@ -30,14 +32,24 @@ public final class PageDrawerParameters { private final PDFRenderer renderer; private final PDPage page; + private final boolean subsamplingAllowed; + private final RenderDestination destination; + private final RenderingHints renderingHints; + private final float imageDownscalingOptimizationThreshold; /** * Package-private constructor. */ - PageDrawerParameters(PDFRenderer renderer, PDPage page) + PageDrawerParameters(PDFRenderer renderer, PDPage page, boolean subsamplingAllowed, + RenderDestination destination, RenderingHints renderingHints, + float imageDownscalingOptimizationThreshold) { this.renderer = renderer; this.page = page; + this.subsamplingAllowed = subsamplingAllowed; + this.destination = destination; + this.renderingHints = renderingHints; + this.imageDownscalingOptimizationThreshold = imageDownscalingOptimizationThreshold; } /** @@ -55,4 +67,38 @@ PDFRenderer getRenderer() { return renderer; } + + /** + * Returns whether to allow subsampling of images. + */ + public boolean isSubsamplingAllowed() + { + return subsamplingAllowed; + } + + /** + * @return the destination + */ + public RenderDestination getDestination() + { + return this.destination; + } + + /** + * @return the rendering hints. + */ + public RenderingHints getRenderingHints() + { + return renderingHints; + } + + /** + * + * @return the imageDownscalingOptimizationThreshold + */ + + public float getImageDownscalingOptimizationThreshold() + { + return imageDownscalingOptimizationThreshold; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/RenderDestination.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/RenderDestination.java new file mode 100644 index 00000000000..455d393cf58 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/RenderDestination.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.rendering; + +/** + * Optional content groups are visible depending on the render purpose. + */ +public enum RenderDestination +{ + /** graphics export */ + EXPORT, + /** viewing */ + VIEW, + /** printing */ + PRINT +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/SoftMask.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/SoftMask.java new file mode 100644 index 00000000000..5930705d78c --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/SoftMask.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.rendering; + +import java.awt.Color; +import java.awt.Paint; +import java.awt.PaintContext; +import java.awt.Rectangle; +import java.awt.RenderingHints; +import java.awt.geom.AffineTransform; +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; +import java.awt.image.ColorModel; +import java.awt.image.Raster; +import java.awt.image.WritableRaster; +import java.io.IOException; +import org.apache.pdfbox.pdmodel.common.function.PDFunction; +import org.apache.pdfbox.pdmodel.common.function.PDFunctionTypeIdentity; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; + +/** + * A Paint which applies a soft mask to an underlying Paint. + * + * @author Petr Slaby + * @author John Hewson + * @author Matthias Bläsing + * @author Tilman Hausherr + */ +class SoftMask implements Paint +{ + private static final ColorModel ARGB_COLOR_MODEL = + new BufferedImage(1, 1, BufferedImage.TYPE_INT_ARGB).getColorModel(); + + private final Paint paint; + private final BufferedImage mask; + private final Rectangle2D bboxDevice; + private int bc = 0; + private final PDFunction transferFunction; + + /** + * Creates a new soft mask paint. + * + * @param paint underlying paint. + * @param mask soft mask + * @param bboxDevice bbox of the soft mask in the underlying Graphics2D device space + * @param backdropColor the color to be used outside the transparency group’s bounding box; if + * null, black will be used. + * @param transferFunction the transfer function, may be null. + */ + SoftMask(Paint paint, BufferedImage mask, Rectangle2D bboxDevice, PDColor backdropColor, PDFunction transferFunction) + { + this.paint = paint; + this.mask = mask; + this.bboxDevice = bboxDevice; + if (transferFunction instanceof PDFunctionTypeIdentity) + { + this.transferFunction = null; + } + else + { + this.transferFunction = transferFunction; + } + if (backdropColor != null) + { + try + { + Color color = new Color(backdropColor.toRGB()); + // http://stackoverflow.com/a/25463098/535646 + bc = (299 * color.getRed() + 587 * color.getGreen() + 114 * color.getBlue()) / 1000; + } + catch (IOException ex) + { + // keep default + } + } + } + + @Override + public PaintContext createContext(ColorModel cm, Rectangle deviceBounds, + Rectangle2D userBounds, AffineTransform xform, + RenderingHints hints) + { + PaintContext ctx = paint.createContext(cm, deviceBounds, userBounds, xform, hints); + return new SoftPaintContext(ctx); + } + + @Override + public int getTransparency() + { + return TRANSLUCENT; + } + + private class SoftPaintContext implements PaintContext + { + private final PaintContext context; + + SoftPaintContext(PaintContext context) + { + this.context = context; + } + + @Override + public ColorModel getColorModel() + { + return ARGB_COLOR_MODEL; + } + + @Override + public Raster getRaster(int x1, int y1, int w, int h) + { + Raster raster = context.getRaster(x1, y1, w, h); + ColorModel rasterCM = context.getColorModel(); + float[] input = null; + Float[] map = null; + + if (transferFunction != null) + { + map = new Float[256]; + input = new float[1]; + } + + // buffer + WritableRaster output = getColorModel().createCompatibleWritableRaster(w, h); + + // the soft mask has its own bbox + x1 = x1 - (int)bboxDevice.getX(); + y1 = y1 - (int)bboxDevice.getY(); + + int[] gray = new int[4]; + Object pixelInput = null; + int[] pixelOutput = new int[4]; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + pixelInput = raster.getDataElements(x, y, pixelInput); + + pixelOutput[0] = rasterCM.getRed(pixelInput); + pixelOutput[1] = rasterCM.getGreen(pixelInput); + pixelOutput[2] = rasterCM.getBlue(pixelInput); + pixelOutput[3] = rasterCM.getAlpha(pixelInput); + + // get the alpha value from the gray mask, if within mask bounds + gray[0] = 0; + if (x1 + x >= 0 && y1 + y >= 0 && x1 + x < mask.getWidth() && y1 + y < mask.getHeight()) + { + mask.getRaster().getPixel(x1 + x, y1 + y, gray); + int g = gray[0]; + if (transferFunction != null) + { + // apply transfer function + try + { + if (map[g] != null) + { + // was calculated before + pixelOutput[3] = Math.round(pixelOutput[3] * map[g]); + } + else + { + // calculate and store in map + input[0] = g / 255f; + float f = transferFunction.eval(input)[0]; + map[g] = f; + pixelOutput[3] = Math.round(pixelOutput[3] * f); + } + } + catch (IOException ex) + { + // ignore exception, treat as outside + pixelOutput[3] = Math.round(pixelOutput[3] * (bc / 255f)); + } + } + else + { + pixelOutput[3] = Math.round(pixelOutput[3] * (g / 255f)); + } + } + else + { + pixelOutput[3] = Math.round(pixelOutput[3] * (bc / 255f)); + } + output.setPixel(x, y, pixelOutput); + } + } + + return output; + } + + @Override + public void dispose() + { + context.dispose(); + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/TTFGlyph2D.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/TTFGlyph2D.java index 6ab4d2c79f5..b8c8afe794b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/rendering/TTFGlyph2D.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/TTFGlyph2D.java @@ -116,6 +116,13 @@ private int getGIDForCharacterCode(int code) throws IOException */ public GeneralPath getPathForGID(int gid, int code) throws IOException { + if (gid == 0 && !isCIDFont && code == 10 && font.isStandard14()) + { + // PDFBOX-4001 return empty path for line feed on std14 + // need to catch this early because all "bad" glyphs have gid 0 + LOG.warn("No glyph for code " + code + " in font " + font.getName()); + return new GeneralPath(); + } GeneralPath glyphPath = glyphs.get(gid); if (glyphPath == null) { @@ -125,7 +132,7 @@ public GeneralPath getPathForGID(int gid, int code) throws IOException { int cid = ((PDType0Font) font).codeToCID(code); String cidHex = String.format("%04x", cid); - LOG.warn("No glyph for " + code + " (CID " + cidHex + ") in font " + + LOG.warn("No glyph for code " + code + " (CID " + cidHex + ") in font " + font.getName()); } else diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/TilingPaint.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/TilingPaint.java index f890ca7d192..dd2861f5cc8 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/rendering/TilingPaint.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/TilingPaint.java @@ -23,18 +23,15 @@ import java.awt.RenderingHints; import java.awt.TexturePaint; import java.awt.Transparency; -import java.awt.color.ColorSpace; import java.awt.geom.AffineTransform; import java.awt.geom.Rectangle2D; import java.awt.image.BufferedImage; import java.awt.image.ColorModel; -import java.awt.image.ComponentColorModel; -import java.awt.image.DataBuffer; -import java.awt.image.WritableRaster; import java.io.IOException; import java.math.BigDecimal; import java.math.RoundingMode; - +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; @@ -48,43 +45,62 @@ */ class TilingPaint implements Paint { - private final PDTilingPattern pattern; - private final TexturePaint paint; - private final PageDrawer drawer; + private static final Log LOG = LogFactory.getLog(TilingPaint.class); + private final Paint paint; + private final Matrix patternMatrix; + private static final int MAXEDGE; + private static final String DEFAULTMAXEDGE = "3000"; + + static + { + String s = System.getProperty("pdfbox.rendering.tilingpaint.maxedge", DEFAULTMAXEDGE); + int val; + try + { + val = Integer.parseInt(s); + } + catch (NumberFormatException ex) + { + LOG.error("Default will be used", ex); + val = Integer.parseInt(DEFAULTMAXEDGE); + } + MAXEDGE = val; + } /** - * Creates a new colored tiling Paint. + * Creates a new colored tiling Paint, i.e. one that has its own colors. * * @param drawer renderer to render the page * @param pattern tiling pattern dictionary + * @param xform device scale transform * - * @throws java.io.IOException if something goes wrong while drawing the - * pattern + * @throws java.io.IOException if something goes wrong while drawing the pattern */ TilingPaint(PageDrawer drawer, PDTilingPattern pattern, AffineTransform xform) throws IOException { - this.drawer = drawer; - this.pattern = pattern; - this.paint = new TexturePaint(getImage(null, null, xform), getAnchorRect()); + this(drawer, pattern, null, null, xform); } /** - * Creates a new uncolored tiling Paint. + * Creates a new tiling Paint. The parameters color and colorSpace must be null for a colored + * tiling Paint (because it has its own colors), and non null for an uncolored tiling Paint. * * @param drawer renderer to render the page * @param pattern tiling pattern dictionary * @param colorSpace color space for this tiling * @param color color for this tiling + * @param xform device scale transform * * @throws java.io.IOException if something goes wrong while drawing the pattern */ TilingPaint(PageDrawer drawer, PDTilingPattern pattern, PDColorSpace colorSpace, PDColor color, AffineTransform xform) throws IOException { - this.drawer = drawer; - this.pattern = pattern; - this.paint = new TexturePaint(getImage(colorSpace, color, xform), getAnchorRect()); + // pattern space -> user space + patternMatrix = Matrix.concatenate(drawer.getInitialMatrix(), pattern.getMatrix()); + Rectangle2D anchorRect = getAnchorRect(pattern); + paint = new TexturePaint(getImage(drawer, pattern, colorSpace, color, xform, anchorRect), anchorRect); } /** @@ -96,9 +112,6 @@ public PaintContext createContext(ColorModel cm, Rectangle deviceBounds, Rectang { AffineTransform xformPattern = (AffineTransform)xform.clone(); - // pattern space -> user space - Matrix patternMatrix = Matrix.concatenate(drawer.getInitialMatrix(), pattern.getMatrix()); - // applies the pattern matrix with scaling removed AffineTransform patternNoScale = patternMatrix.createAffineTransform(); patternNoScale.scale(1 / patternMatrix.getScalingFactorX(), @@ -111,28 +124,23 @@ public PaintContext createContext(ColorModel cm, Rectangle deviceBounds, Rectang /** * Returns the pattern image in parent stream coordinates. */ - private BufferedImage getImage(PDColorSpace colorSpace, PDColor color, - AffineTransform xform) throws IOException + private BufferedImage getImage(PageDrawer drawer, PDTilingPattern pattern, PDColorSpace colorSpace, + PDColor color, AffineTransform xform, Rectangle2D anchorRect) throws IOException { - ColorSpace outputCS = ColorSpace.getInstance(ColorSpace.CS_sRGB); - ColorModel cm = new ComponentColorModel(outputCS, true, false, - Transparency.TRANSLUCENT, DataBuffer.TYPE_BYTE); - - Rectangle2D anchor = getAnchorRect(); - float width = (float)Math.abs(anchor.getWidth()); - float height = (float)Math.abs(anchor.getHeight()); + float width = (float) Math.abs(anchorRect.getWidth()); + float height = (float) Math.abs(anchorRect.getHeight()); // device scale transform (i.e. DPI) (see PDFBOX-1466.pdf) Matrix xformMatrix = new Matrix(xform); - width *= xformMatrix.getScalingFactorX(); - height *= xformMatrix.getScalingFactorY(); + float xScale = Math.abs(xformMatrix.getScalingFactorX()); + float yScale = Math.abs(xformMatrix.getScalingFactorY()); + width *= xScale; + height *= yScale; int rasterWidth = Math.max(1, ceiling(width)); int rasterHeight = Math.max(1, ceiling(height)); - // create raster - WritableRaster raster = cm.createCompatibleWritableRaster(rasterWidth, rasterHeight); - BufferedImage image = new BufferedImage(cm, raster, false, null); + BufferedImage image = new BufferedImage(rasterWidth, rasterHeight, BufferedImage.TYPE_INT_ARGB); Graphics2D graphics = image.createGraphics(); @@ -151,24 +159,22 @@ private BufferedImage getImage(PDColorSpace colorSpace, PDColor color, } // device scale transform (i.e. DPI) - graphics.scale(xformMatrix.getScalingFactorX(), xformMatrix.getScalingFactorY()); - - // pattern space -> user space - Matrix patternMatrix = Matrix.concatenate(drawer.getInitialMatrix(), pattern.getMatrix()); + graphics.scale(xScale, yScale); // apply only the scaling from the pattern transform, doing scaling here improves the // image quality and prevents large scale-down factors from creating huge tiling cells. - patternMatrix = Matrix.getScaleInstance( + Matrix newPatternMatrix; + newPatternMatrix = Matrix.getScaleInstance( Math.abs(patternMatrix.getScalingFactorX()), Math.abs(patternMatrix.getScalingFactorY())); // move origin to (0,0) - patternMatrix.concatenate( - Matrix.getTranslateInstance(-pattern.getBBox().getLowerLeftX(), - -pattern.getBBox().getLowerLeftY())); + PDRectangle bbox = pattern.getBBox(); + newPatternMatrix.concatenate( + Matrix.getTranslateInstance(-bbox.getLowerLeftX(), -bbox.getLowerLeftY())); // render using PageDrawer - drawer.drawTilingPattern(graphics, pattern, colorSpace, color, patternMatrix); + drawer.drawTilingPattern(graphics, pattern, colorSpace, color, newPatternMatrix); graphics.dispose(); return image; @@ -180,7 +186,7 @@ private BufferedImage getImage(PDColorSpace colorSpace, PDColor color, */ private static int ceiling(double num) { - BigDecimal decimal = new BigDecimal(num); + BigDecimal decimal = BigDecimal.valueOf(num); decimal = decimal.setScale(5, RoundingMode.CEILING); // 5 decimal places of accuracy return decimal.intValue(); } @@ -194,30 +200,49 @@ public int getTransparency() /** * Returns the anchor rectangle, which includes the XStep/YStep and scaling. */ - private Rectangle2D getAnchorRect() + private Rectangle2D getAnchorRect(PDTilingPattern pattern) throws IOException { + PDRectangle bbox = pattern.getBBox(); + if (bbox == null) + { + throw new IOException("Pattern /BBox is missing"); + } float xStep = pattern.getXStep(); if (xStep == 0) { - xStep = pattern.getBBox().getWidth(); + LOG.warn("/XStep is 0, using pattern /BBox width"); + xStep = bbox.getWidth(); } float yStep = pattern.getYStep(); if (yStep == 0) { - yStep = pattern.getBBox().getHeight(); + LOG.warn("/YStep is 0, using pattern /BBox height"); + yStep = bbox.getHeight(); } - // pattern space -> user space - Matrix patternMatrix = Matrix.concatenate(drawer.getInitialMatrix(), pattern.getMatrix()); - float xScale = patternMatrix.getScalingFactorX(); float yScale = patternMatrix.getScalingFactorY(); + float width = xStep * xScale; + float height = yStep * yScale; + + if (Math.abs(width * height) > MAXEDGE * MAXEDGE) + { + // PDFBOX-3653: prevent huge sizes + LOG.info("Pattern surface is too large, will be clipped"); + LOG.info("width: " + width + ", height: " + height); + LOG.info("XStep: " + xStep + ", YStep: " + yStep); + LOG.info("bbox: " + bbox); + LOG.info("pattern matrix: " + pattern.getMatrix()); + LOG.info("concatenated matrix: " + patternMatrix); + width = Math.min(MAXEDGE, Math.abs(width)) * Math.signum(width); + height = Math.min(MAXEDGE, Math.abs(height)) * Math.signum(height); + //TODO better solution needed + } // returns the anchor rect with scaling applied - PDRectangle anchor = pattern.getBBox(); - return new Rectangle2D.Float(anchor.getLowerLeftX() * xScale, - anchor.getLowerLeftY() * yScale, - xStep * xScale, yStep * yScale); + return new Rectangle2D.Float(bbox.getLowerLeftX() * xScale, + bbox.getLowerLeftY() * yScale, + width, height); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/TilingPaintFactory.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/TilingPaintFactory.java new file mode 100644 index 00000000000..1542879b2c9 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/TilingPaintFactory.java @@ -0,0 +1,162 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.rendering; + +import java.awt.Paint; +import java.awt.geom.AffineTransform; +import java.io.IOException; +import java.lang.ref.WeakReference; +import java.util.Map; +import java.util.WeakHashMap; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; +import org.apache.pdfbox.util.Matrix; + +/** + * Factory class to cache TilingPaint generation. + * + * @author Tilman Hausherr + */ +class TilingPaintFactory +{ + private final PageDrawer drawer; + private final Map> weakCache + = new WeakHashMap>(); + + TilingPaintFactory(PageDrawer drawer) + { + this.drawer = drawer; + } + + Paint create(PDTilingPattern pattern, PDColorSpace colorSpace, + PDColor color, AffineTransform xform) throws IOException + { + Paint paint = null; + TilingPaintParameter tilingPaintParameter + = new TilingPaintParameter(drawer.getInitialMatrix(), pattern.getCOSObject(), colorSpace, color, xform); + WeakReference weakRef = weakCache.get(tilingPaintParameter); + if (weakRef != null) + { + // PDFBOX-4058: additional WeakReference makes gc work better + paint = weakRef.get(); + } + if (paint == null) + { + paint = new TilingPaint(drawer, pattern, colorSpace, color, xform); + weakCache.put(tilingPaintParameter, new WeakReference(paint)); + } + return paint; + } + + // class to characterize a TilingPaint object. It is important that TilingPaint does not + // keep any objects from this class, so that the weak cache works. + private static class TilingPaintParameter + { + private final Matrix matrix; + private final COSDictionary patternDict; + private final PDColorSpace colorSpace; + private final PDColor color; + private final AffineTransform xform; + + private TilingPaintParameter(Matrix matrix, COSDictionary patternDict, PDColorSpace colorSpace, + PDColor color, AffineTransform xform) + { + this.matrix = matrix.clone(); + this.patternDict = patternDict; + this.colorSpace = colorSpace; + this.color = color; + this.xform = xform; + } + + // this may not catch all equals, but at least those related to one resource dictionary. + // it isn't needed to investigate further because matrix or transform would be different anyway. + @Override + public boolean equals(Object obj) + { + if (this == obj) + { + return true; + } + if (obj == null) + { + return false; + } + if (getClass() != obj.getClass()) + { + return false; + } + final TilingPaintParameter other = (TilingPaintParameter) obj; + if (this.matrix != other.matrix && (this.matrix == null || !this.matrix.equals(other.matrix))) + { + return false; + } + if (this.patternDict != other.patternDict && (this.patternDict == null || !this.patternDict.equals(other.patternDict))) + { + return false; + } + if (this.colorSpace != other.colorSpace && (this.colorSpace == null || !this.colorSpace.equals(other.colorSpace))) + { + return false; + } + if (this.color == null && other.color != null) + { + return false; + } + if (this.color != null && other.color == null) + { + return false; + } + if (this.color != null && this.color.getColorSpace() != other.color.getColorSpace()) + { + return false; + } + try + { + if (this.color != null && other.color != null && + this.color != other.color && this.color.toRGB() != other.color.toRGB()) + { + return false; + } + } + catch (IOException ex) + { + return false; + } + return !(this.xform != other.xform && (this.xform == null || !this.xform.equals(other.xform))); + } + + @Override + public int hashCode() + { + int hash = 7; + hash = 23 * hash + (this.matrix != null ? this.matrix.hashCode() : 0); + hash = 23 * hash + (this.patternDict != null ? this.patternDict.hashCode() : 0); + hash = 23 * hash + (this.colorSpace != null ? this.colorSpace.hashCode() : 0); + hash = 23 * hash + (this.color != null ? this.color.hashCode() : 0); + hash = 23 * hash + (this.xform != null ? this.xform.hashCode() : 0); + return hash; + } + + @Override + public String toString() + { + return "TilingPaintParameter{" + "matrix=" + matrix + ", pattern=" + patternDict + + ", colorSpace=" + colorSpace + ", color=" + color + ", xform=" + xform + '}'; + } + } +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/rendering/Type1Glyph2D.java b/pdfbox/src/main/java/org/apache/pdfbox/rendering/Type1Glyph2D.java index fd1035a22cb..bfd74676b80 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/rendering/Type1Glyph2D.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/rendering/Type1Glyph2D.java @@ -19,6 +19,7 @@ import java.awt.geom.GeneralPath; import java.io.IOException; import java.util.HashMap; +import java.util.Locale; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -57,7 +58,25 @@ public GeneralPath getPathForCharacterCode(int code) String name = font.getEncoding().getName(code); if (!font.hasGlyph(name)) { - LOG.warn("No glyph for " + code + " (" + name + ") in font " + font.getName()); + LOG.warn("No glyph for code " + code + " (" + name + ") in font " + font.getName()); + if (code == 10 && font.isStandard14()) + { + // PDFBOX-4001 return empty path for line feed on std14 + path = new GeneralPath(); + cache.put(code, path); + return path; + } + + // try unicode name + String unicodes = font.getGlyphList().toUnicode(name); + if (unicodes != null && unicodes.length() == 1) + { + String uniName = getUniNameOfCodePoint(unicodes.codePointAt(0)); + if (font.hasGlyph(uniName)) + { + name = uniName; + } + } } // todo: can this happen? should it be encapsulated? @@ -85,4 +104,21 @@ public void dispose() { cache.clear(); } + + // copied from UniUtil + private static String getUniNameOfCodePoint(int codePoint) + { + String hex = Integer.toString(codePoint, 16).toUpperCase(Locale.US); + switch (hex.length()) + { + case 1: + return "uni000" + hex; + case 2: + return "uni00" + hex; + case 3: + return "uni0" + hex; + default: + return "uni" + hex; + } + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStreamEngine.java b/pdfbox/src/main/java/org/apache/pdfbox/text/LegacyPDFStreamEngine.java similarity index 77% rename from pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStreamEngine.java rename to pdfbox/src/main/java/org/apache/pdfbox/text/LegacyPDFStreamEngine.java index ac4135238c6..eb8b6c020b3 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStreamEngine.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/text/LegacyPDFStreamEngine.java @@ -17,8 +17,16 @@ package org.apache.pdfbox.text; import java.io.InputStream; +import java.io.IOException; +import java.util.Map; +import java.util.WeakHashMap; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; + +import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.fontbox.util.BoundingBox; + import org.apache.pdfbox.contentstream.PDFStreamEngine; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.font.encoding.GlyphList; @@ -31,11 +39,6 @@ import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState; - -import java.io.IOException; - -import org.apache.fontbox.ttf.TrueTypeFont; -import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.util.Matrix; import org.apache.pdfbox.util.Vector; import org.apache.pdfbox.contentstream.operator.DrawObject; @@ -60,28 +63,33 @@ import org.apache.pdfbox.contentstream.operator.text.SetTextRise; import org.apache.pdfbox.contentstream.operator.text.SetWordSpacing; import org.apache.pdfbox.contentstream.operator.text.ShowText; +import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; /** - * PDFStreamEngine subclass for advanced processing of text via TextPosition. + * LEGACY text calculations which are known to be incorrect but are depended on by PDFTextStripper. + * + * This class exists only so that we don't break the code of users who have their own subclasses of + * PDFTextStripper. It replaces the mostly empty implementation of showGlyph() in PDFStreamEngine + * with a heuristic implementation which is backwards compatible. * - * @see org.apache.pdfbox.text.TextPosition - * @author Ben Litchfield - * @author John Hewson + * DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper. + * THIS CODE IS DELIBERATELY INCORRECT, USE PDFStreamEngine INSTEAD. */ -class PDFTextStreamEngine extends PDFStreamEngine +class LegacyPDFStreamEngine extends PDFStreamEngine { - private static final Log LOG = LogFactory.getLog(PDFTextStreamEngine.class); + private static final Log LOG = LogFactory.getLog(LegacyPDFStreamEngine.class); private int pageRotation; private PDRectangle pageSize; private Matrix translateMatrix; private final GlyphList glyphList; + private final Map fontHeightMap = new WeakHashMap(); /** * Constructor. */ - PDFTextStreamEngine() throws IOException + LegacyPDFStreamEngine() throws IOException { addOperator(new BeginText()); addOperator(new Concatenate()); @@ -107,13 +115,13 @@ class PDFTextStreamEngine extends PDFStreamEngine addOperator(new ShowTextLineAndSpace()); // load additional glyph list for Unicode mapping - String path = "org/apache/pdfbox/resources/glyphlist/additional.txt"; - InputStream input = GlyphList.class.getClassLoader().getResourceAsStream(path); + String path = "/org/apache/pdfbox/resources/glyphlist/additional.txt"; + InputStream input = GlyphList.class.getResourceAsStream(path); glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input); } /** - * This will initialise and process the contents of the stream. + * This will initialize and process the contents of the stream. * * @param page the page to process * @throws java.io.IOException if there is an error accessing the stream. @@ -123,7 +131,7 @@ public void processPage(PDPage page) throws IOException { this.pageRotation = page.getRotation(); this.pageSize = page.getCropBox(); - + if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) { translateMatrix = null; @@ -132,20 +140,26 @@ public void processPage(PDPage page) throws IOException { // translation matrix for cropbox translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY()); - } + } super.processPage(page); } /** - * This method was originally written by Ben Litchfield for PDFStreamEngine. + * Called when a glyph is to be processed. The heuristic calculations here were originally + * written by Ben Litchfield for PDFStreamEngine. */ @Override - protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, - Vector displacement) throws IOException + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, + String unicode, + Vector displacement) + throws IOException { // // legacy calculations which were previously in PDFStreamEngine // + // DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper. + // THIS CODE IS DELIBERATELY INCORRECT + // PDGraphicsState state = getGraphicsState(); Matrix ctm = state.getCurrentTransformationMatrix(); @@ -153,38 +167,6 @@ protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, Stri float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f; Matrix textMatrix = getTextMatrix(); - BoundingBox bbox = font.getBoundingBox(); - if (bbox.getLowerLeftY() < Short.MIN_VALUE) - { - // PDFBOX-2158 and PDFBOX-3130 - // files by Salmat eSolutions / ClibPDF Library - bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); - } - // 1/2 the bbox is used as the height todo: why? - float glyphHeight = bbox.getHeight() / 2; - - // sometimes the bbox has very high values, but CapHeight is OK - PDFontDescriptor fontDescriptor = font.getFontDescriptor(); - if (fontDescriptor != null) - { - float capHeight = fontDescriptor.getCapHeight(); - if (capHeight != 0 && capHeight < glyphHeight) - { - glyphHeight = capHeight; - } - } - - // transformPoint from glyph space -> text space - float height; - if (font instanceof PDType3Font) - { - height = font.getFontMatrix().transformPoint(0, glyphHeight).y; - } - else - { - height = glyphHeight / 1000; - } - float displacementX = displacement.getX(); // the sorting algorithm is based on the width of the character. As the displacement // for vertical characters doesn't provide any suitable value for it, we have to @@ -196,7 +178,7 @@ protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, Stri TrueTypeFont ttf = null; if (font instanceof PDTrueTypeFont) { - ttf = ((PDTrueTypeFont)font).getTrueTypeFont(); + ttf = ((PDTrueTypeFont)font).getTrueTypeFont(); } else if (font instanceof PDType0Font) { @@ -211,6 +193,14 @@ else if (font instanceof PDType0Font) displacementX *= 1000f / ttf.getUnitsPerEm(); } } + + // + // legacy calculations which were previously in PDFStreamEngine + // + // DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper. + // THIS CODE IS DELIBERATELY INCORRECT + // + // (modified) combined displacement, this is calculated *without* taking the character // spacing and word spacing into account, due to legacy code in TextStripper float tx = displacementX * fontSize * horizontalScaling; @@ -226,7 +216,13 @@ else if (font instanceof PDType0Font) // (modified) width and height calculations float dxDisplay = nextX - textRenderingMatrix.getTranslateX(); - float dyDisplay = height * textRenderingMatrix.getScalingFactorY(); + Float fontHeight = fontHeightMap.get(font.getCOSObject()); + if (fontHeight == null) + { + fontHeight = computeFontHeight(font); + fontHeightMap.put(font.getCOSObject(), fontHeight); + } + float dyDisplay = fontHeight * textRenderingMatrix.getScalingFactorY(); // // start of the original method @@ -270,17 +266,17 @@ else if (font instanceof PDType0Font) float spaceWidthDisplay = spaceWidthText * textRenderingMatrix.getScalingFactorX(); // use our additional glyph list for Unicode mapping - unicode = font.toUnicode(code, glyphList); + String unicodeMapping = font.toUnicode(code, glyphList); // when there is no Unicode mapping available, Acrobat simply coerces the character code // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want // this, which is why we leave it until this point in PDFTextStreamEngine. - if (unicode == null) + if (unicodeMapping == null) { if (font instanceof PDSimpleFont) { char c = (char) code; - unicode = new String(new char[] { c }); + unicodeMapping = new String(new char[] { c }); } else { @@ -305,11 +301,67 @@ else if (font instanceof PDType0Font) processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY, - dyDisplay, dxDisplay, - spaceWidthDisplay, unicode, new int[] { code } , font, fontSize, + Math.abs(dyDisplay), dxDisplay, + Math.abs(spaceWidthDisplay), unicodeMapping, new int[] { code }, font, + fontSize, (int)(fontSize * textMatrix.getScalingFactorX()))); } + /** + * Compute the font height. Override this if you want to use own calculations. + * + * @param font the font. + * @return the font height. + * + * @throws IOException if there is an error while getting the font bounding box. + */ + protected float computeFontHeight(PDFont font) throws IOException + { + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) + { + // PDFBOX-2158 and PDFBOX-3130 + // files by Salmat eSolutions / ClibPDF Library + bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); + } + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = bbox.getHeight() / 2; + + // sometimes the bbox has very high values, but CapHeight is OK + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) + { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 && + (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = capHeight; + } + // PDFBOX-3464, PDFBOX-4480, PDFBOX-4553: + // sometimes even CapHeight has very high value, but Ascent and Descent are ok + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (capHeight > ascent && ascent > 0 && descent < 0 && + ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = (ascent - descent) / 2; + } + } + + // transformPoint from glyph space -> text space + float height; + if (font instanceof PDType3Font) + { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } + else + { + height = glyphHeight / 1000; + } + + return height; + } + /** * A method provided as an event interface to allow a subclass to perform some specific * functionality when text needs to be processed. diff --git a/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java b/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java index dfcdb8cff31..4be400b14eb 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java @@ -17,11 +17,12 @@ package org.apache.pdfbox.text; import java.io.IOException; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Stack; +import java.util.Deque; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; @@ -37,11 +38,11 @@ * * @author Johannes Koch */ -public class PDFMarkedContentExtractor extends PDFTextStreamEngine +public class PDFMarkedContentExtractor extends LegacyPDFStreamEngine { - private final boolean suppressDuplicateOverlappingText = true; + private boolean suppressDuplicateOverlappingText = true; private final List markedContents = new ArrayList(); - private final Stack currentMarkedContents = new Stack(); + private final Deque currentMarkedContents = new ArrayDeque(); private final Map> characterListMapping = new HashMap>(); /** @@ -67,6 +68,28 @@ public PDFMarkedContentExtractor(String encoding) throws IOException // todo: MP - Marked Content Point with Properties } + /** + * @return the suppressDuplicateOverlappingText setting. + */ + public boolean isSuppressDuplicateOverlappingText() + { + return suppressDuplicateOverlappingText; + } + + /** + * By default the class will attempt to remove text that overlaps each other. Word paints the + * same character several times in order to make it look bold. By setting this to false all text + * will be extracted, which means that certain sections will be duplicated, but better + * performance will be noticed. + * + * @param suppressDuplicateOverlappingText The suppressDuplicateOverlappingText setting to set. + */ + public void setSuppressDuplicateOverlappingText(boolean suppressDuplicateOverlappingText) + { + this.suppressDuplicateOverlappingText = suppressDuplicateOverlappingText; + } + + /** * This will determine of two floating point numbers are within a specified variance. * @@ -79,6 +102,7 @@ private boolean within( float first, float second, float variance ) return second > first - variance && second < first + variance; } + @Override public void beginMarkedContentSequence(COSName tag, COSDictionary properties) { PDMarkedContent markedContent = PDMarkedContent.create(tag, properties); @@ -98,6 +122,7 @@ public void beginMarkedContentSequence(COSName tag, COSDictionary properties) this.currentMarkedContents.push(markedContent); } + @Override public void endMarkedContentSequence() { if (!this.currentMarkedContents.isEmpty()) @@ -153,10 +178,9 @@ protected void processTextPosition( TextPosition text ) float tolerance = (text.getWidth()/textCharacter.length())/3.0f; for (TextPosition sameTextCharacter : sameTextCharacters) { - TextPosition character = sameTextCharacter; - String charCharacter = character.getUnicode(); - float charX = character.getX(); - float charY = character.getY(); + String charCharacter = sameTextCharacter.getUnicode(); + float charX = sameTextCharacter.getX(); + float charY = sameTextCharacter.getY(); //only want to suppress if( charCharacter != null && //charCharacter.equals( textCharacter ) && @@ -182,7 +206,7 @@ protected void processTextPosition( TextPosition text ) /* In the wild, some PDF encoded documents put diacritics (accents on * top of characters) into a separate Tj element. When displaying them - * graphically, the two chunks get overlayed. With text output though, + * graphically, the two chunks get overlaid. With text output though, * we need to do the overlay. This code recombines the diacritic with * its associated character if the two are consecutive. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java b/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java index 8727edc80fd..52751af9a60 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java @@ -16,6 +16,7 @@ */ package org.apache.pdfbox.text; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -58,7 +59,7 @@ * * @author Ben Litchfield */ -public class PDFTextStripper extends PDFTextStreamEngine +public class PDFTextStripper extends LegacyPDFStreamEngine { private static float defaultIndentThreshold = 2.0f; private static float defaultDropThreshold = 2.5f; @@ -214,8 +215,13 @@ public PDFTextStripper() throws IOException } /** - * This will return the text of a document. See writeText.
    + * This will return the text of a document. See writeText.
    * NOTE: The document must not be encrypted when coming into this method. + * + *

    IMPORTANT: By default, text extraction is done in the same sequence as the text in the PDF page content stream. + * PDF is a graphic format, not a text format, and unlike HTML, it has no requirements that text one on page + * be rendered in a certain order. The order is the one that was determined by the software that created the + * PDF. To get text sorted from left to right and top to botton, use {@link #setSortByPosition(boolean)}. * * @param doc The document to get the text from. * @return The text of the PDF document. @@ -236,10 +242,7 @@ private void resetEngine() { charactersByArticle.clear(); } - if (characterListMapping != null) - { - characterListMapping.clear(); - } + characterListMapping.clear(); } /** @@ -399,7 +402,7 @@ private void fillBeadRectangles(PDPage page) beadRectangles = new ArrayList(); for (PDThreadBead bead : page.getThreadBeads()) { - if (bead == null) + if (bead == null || bead.getRectangle() == null) { // can't skip, because of null entry handling in processTextPosition() beadRectangles.add(null); @@ -538,8 +541,6 @@ protected void writePage() throws IOException Collections.sort(textList, comparator); } } - - Iterator textIter = textList.iterator(); startArticle(); startOfArticle = true; @@ -549,7 +550,7 @@ protected void writePage() throws IOException // the line from presentation form to logical form (if needed). List line = new ArrayList(); - textIter = textList.iterator(); // start from the beginning again + Iterator textIter = textList.iterator(); // PDF files don't always store spaces. We will need to guess where we should add // spaces based on the distances between TextPositions. Historically, this was done // based on the size of the space character provided by the font. In general, this @@ -640,14 +641,7 @@ protected void writePage() throws IOException float expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE; if (endOfLastTextX != END_OF_LAST_TEXT_X_RESET_VALUE) { - if (deltaCharWidth > deltaSpace) - { - expectedStartOfNextWordX = endOfLastTextX + deltaSpace; - } - else - { - expectedStartOfNextWordX = endOfLastTextX + deltaCharWidth; - } + expectedStartOfNextWordX = endOfLastTextX + Math.min(deltaSpace, deltaCharWidth); } if (lastPosition != null) @@ -680,13 +674,25 @@ protected void writePage() throws IOException } // test if our TextPosition starts after a new word would be expected to start if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE - && expectedStartOfNextWordX < positionX && - // only bother adding a space if the last character was not a space - lastPosition.getTextPosition().getUnicode() != null - && !lastPosition.getTextPosition().getUnicode().endsWith(" ")) + && expectedStartOfNextWordX < positionX + // only bother adding a word separator if the last character was not a word separator + && (wordSeparator.isEmpty() || // + (lastPosition.getTextPosition().getUnicode() != null + && !lastPosition.getTextPosition().getUnicode() + .endsWith(wordSeparator)))) { line.add(LineItem.getWordSeparator()); } + // if there is at least the equivalent of one space + // between the last character and the current one, + // reset the max line height as the font size may have completely changed + if (Math.abs(position.getX() + - lastPosition.getTextPosition().getX()) > (wordSpacing + deltaSpace)) + { + maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE; + maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE; + minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE; + } } if (positionY >= maxYForLine) { @@ -738,7 +744,7 @@ private boolean overlap(float y1, float height1, float y2, float height2) /** * Write the line separator value to the output stream. * - * @throws IOException If there is a problem writing out the lineseparator to the document. + * @throws IOException If there is a problem writing out the line separator to the document. */ protected void writeLineSeparator() throws IOException { @@ -748,7 +754,7 @@ protected void writeLineSeparator() throws IOException /** * Write the word separator value to the output stream. * - * @throws IOException If there is a problem writing out the wordseparator to the document. + * @throws IOException If there is a problem writing out the word separator to the document. */ protected void writeWordSeparator() throws IOException { @@ -933,7 +939,7 @@ else if (notFoundButFirstAboveArticleDivisionIndex != -1) // In the wild, some PDF encoded documents put diacritics (accents on // top of characters) into a separate Tj element. When displaying them - // graphically, the two chunks get overlayed. With text output though, + // graphically, the two chunks get overlaid. With text output though, // we need to do the overlay. This code recombines the diacritic with // its associated character if the two are consecutive. if (textList.isEmpty()) @@ -1199,9 +1205,9 @@ public boolean getSortByPosition() /** * The order of the text tokens in a PDF file may not be in the same as they appear visually on the screen. For * example, a PDF writer may write out all text by font, so all bold or larger text, then make a second pass and - * write out the normal text.
    - * The default is to not sort by position.
    - *
    + * write out the normal text.
    + * The default is to not sort by position.
    + *
    * A PDF writer could choose to write each character in a different order. By default PDFBox does not sort * the text tokens before processing them due to performance reasons. * @@ -1815,7 +1821,7 @@ private String handleDirection(String word) if ((level & 1) != 0) { - for (; --end >= start;) + while (--end >= start) { char character = word.charAt(end); if (Character.isMirrored(word.codePointAt(end))) @@ -1848,8 +1854,8 @@ private String handleDirection(String word) static { - String path = "org/apache/pdfbox/resources/text/BidiMirroring.txt"; - InputStream input = PDFTextStripper.class.getClassLoader().getResourceAsStream(path); + String path = "/org/apache/pdfbox/resources/text/BidiMirroring.txt"; + InputStream input = new BufferedInputStream(PDFTextStripper.class.getResourceAsStream(path)); try { parseBidiFile(input); @@ -1870,7 +1876,7 @@ private String handleDirection(String word) LOG.error("Could not close BidiMirroring.txt ", e); } } - }; + } /** * This method parses the bidi file provided as inputstream. @@ -1919,7 +1925,7 @@ private static void parseBidiFile(InputStream inputStream) throws IOException } /** - * Used within {@link #normalize(List, boolean, boolean)} to create a single {@link WordWithTextPositions} entry. + * Used within {@link #normalize(List)} to create a single {@link WordWithTextPositions} entry. */ private WordWithTextPositions createWord(String word, List wordPositions) { @@ -1953,7 +1959,7 @@ private String normalizeWord(String word) { builder = new StringBuilder(strLength * 2); } - builder.append(word.substring(p, q)); + builder.append(word, p, q); // Some fonts map U+FDF2 differently than the Unicode spec. // They add an extra U+0627 character to compensate. // This removes the extra character for those fonts. @@ -1977,13 +1983,13 @@ private String normalizeWord(String word) } else { - builder.append(word.substring(p, q)); + builder.append(word, p, q); return handleDirection(builder.toString()); } } /** - * Used within {@link #normalize(List, boolean, boolean)} to handle a {@link TextPosition}. + * Used within {@link #normalize(List)} to handle a {@link TextPosition}. * * @return The StringBuilder that must be used when calling this method. */ diff --git a/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripperByArea.java b/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripperByArea.java index 8b669a638c2..579cc814211 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripperByArea.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripperByArea.java @@ -21,7 +21,6 @@ import java.io.StringWriter; import java.util.ArrayList; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.pdfbox.pdmodel.PDPage; @@ -45,7 +44,6 @@ public class PDFTextStripperByArea extends PDFTextStripper */ public PDFTextStripperByArea() throws IOException { - super(); super.setShouldSeparateByBeads(false); } @@ -64,7 +62,8 @@ public final void setShouldSeparateByBeads(boolean aShouldSeparateByBeads) * Add a new region to group text by. * * @param regionName The name of the region. - * @param rect The rectangle area to retrieve the text from. + * @param rect The rectangle area to retrieve the text from. The y-coordinates are java + * coordinates (y == 0 is top), not PDF coordinates (y == 0 is bottom). */ public void addRegion( String regionName, Rectangle2D rect ) { @@ -113,14 +112,11 @@ public String getTextForRegion( String regionName ) */ public void extractRegions( PDPage page ) throws IOException { - Iterator regionIter = regions.iterator(); - while( regionIter.hasNext() ) + for (String regionName : regions) { setStartPage(getCurrentPageNo()); setEndPage(getCurrentPageNo()); - //reset the stored text for the region so this class - //can be reused. - String regionName = regionIter.next(); + // reset the stored text for the region so this class can be reused. ArrayList> regionCharactersByArticle = new ArrayList>(); regionCharactersByArticle.add( new ArrayList() ); regionCharacterList.put( regionName, regionCharactersByArticle ); @@ -138,17 +134,15 @@ public void extractRegions( PDPage page ) throws IOException * {@inheritDoc} */ @Override - protected void processTextPosition( TextPosition text ) + protected void processTextPosition(TextPosition text) { - Iterator regionIter = regionArea.keySet().iterator(); - while( regionIter.hasNext() ) + for (Map.Entry regionAreaEntry : regionArea.entrySet()) { - String region = regionIter.next(); - Rectangle2D rect = regionArea.get( region ); - if( rect.contains( text.getX(), text.getY() ) ) + Rectangle2D rect = regionAreaEntry.getValue(); + if (rect.contains(text.getX(), text.getY())) { - charactersByArticle = regionCharacterList.get( region ); - super.processTextPosition( text ); + charactersByArticle = regionCharacterList.get(regionAreaEntry.getKey()); + super.processTextPosition(text); } } } @@ -162,10 +156,8 @@ protected void processTextPosition( TextPosition text ) @Override protected void writePage() throws IOException { - Iterator regionIter = regionArea.keySet().iterator(); - while( regionIter.hasNext() ) + for (String region : regionArea.keySet()) { - String region = regionIter.next(); charactersByArticle = regionCharacterList.get( region ); output = regionText.get( region ); super.writePage(); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java b/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java index 5296304a2ba..4eaadbff5f2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java @@ -17,6 +17,7 @@ package org.apache.pdfbox.text; import java.text.Normalizer; +import java.util.Arrays; import java.util.HashMap; import java.util.Map; import org.apache.commons.logging.Log; @@ -35,48 +36,6 @@ public final class TextPosition private static final Map DIACRITICS = createDiacritics(); - // Adds non-decomposing diacritics to the hash with their related combining character. - // These are values that the unicode spec claims are equivalent but are not mapped in the form - // NFKC normalization method. Determined by going through the Combining Diacritical Marks - // section of the Unicode spec and identifying which characters are not mapped to by the - // normalization. - private static Map createDiacritics() - { - Map map = new HashMap(31); - map.put(0x0060, "\u0300"); - map.put(0x02CB, "\u0300"); - map.put(0x0027, "\u0301"); - map.put(0x02B9, "\u0301"); - map.put(0x02CA, "\u0301"); - map.put(0x005e, "\u0302"); - map.put(0x02C6, "\u0302"); - map.put(0x007E, "\u0303"); - map.put(0x02C9, "\u0304"); - map.put(0x00B0, "\u030A"); - map.put(0x02BA, "\u030B"); - map.put(0x02C7, "\u030C"); - map.put(0x02C8, "\u030D"); - map.put(0x0022, "\u030E"); - map.put(0x02BB, "\u0312"); - map.put(0x02BC, "\u0313"); - map.put(0x0486, "\u0313"); - map.put(0x055A, "\u0313"); - map.put(0x02BD, "\u0314"); - map.put(0x0485, "\u0314"); - map.put(0x0559, "\u0314"); - map.put(0x02D4, "\u031D"); - map.put(0x02D5, "\u031E"); - map.put(0x02D6, "\u031F"); - map.put(0x02D7, "\u0320"); - map.put(0x02B2, "\u0321"); - map.put(0x02CC, "\u0329"); - map.put(0x02B7, "\u032B"); - map.put(0x02CD, "\u0331"); - map.put(0x005F, "\u0332"); - map.put(0x204E, "\u0359"); - return map; - } - // text matrix for the start of the text object, coordinates are in display units // and have not been adjusted private final Matrix textMatrix; @@ -108,9 +67,9 @@ private static Map createDiacritics() * Constructor. * * @param pageRotation rotation of the page that the text is located in - * @param pageWidth rotation of the page that the text is located in - * @param pageHeight rotation of the page that the text is located in - * @param textMatrix TextMatrix for start of text (in display units) + * @param pageWidth width of the page that the text is located in + * @param pageHeight height of the page that the text is located in + * @param textMatrix text rendering matrix for start of text (in display units) * @param endX x coordinate of the end position * @param endY y coordinate of the end position * @param maxHeight Maximum height of text (in display units) @@ -120,7 +79,7 @@ private static Map createDiacritics() * @param charCodes An array of the internal PDF character codes for the glyphs in this text. * @param font The current font for this text position. * @param fontSize The new font size. - * @param fontSizeInPt The font size in pt units. + * @param fontSizeInPt The font size in pt units (see {@link #getFontSizeInPt()} for details). */ public TextPosition(int pageRotation, float pageWidth, float pageHeight, Matrix textMatrix, float endX, float endY, float maxHeight, float individualWidth, @@ -132,8 +91,7 @@ public TextPosition(int pageRotation, float pageWidth, float pageHeight, Matrix this.endX = endX; this.endY = endY; - int rotationAngle = pageRotation; - this.rotation = rotationAngle; + this.rotation = pageRotation; this.maxHeight = maxHeight; this.pageHeight = pageHeight; @@ -147,19 +105,63 @@ public TextPosition(int pageRotation, float pageWidth, float pageHeight, Matrix this.fontSize = fontSize; this.fontSizePt = fontSizeInPt; - x = getXRot(rotationAngle); - if (rotationAngle == 0 || rotationAngle == 180) + x = getXRot(rotation); + if (rotation == 0 || rotation == 180) { - y = this.pageHeight - getYLowerLeftRot(rotationAngle); + y = this.pageHeight - getYLowerLeftRot(rotation); } else { - y = this.pageWidth - getYLowerLeftRot(rotationAngle); + y = this.pageWidth - getYLowerLeftRot(rotation); } } + // Adds non-decomposing diacritics to the hash with their related combining character. + // These are values that the unicode spec claims are equivalent but are not mapped in the form + // NFKC normalization method. Determined by going through the Combining Diacritical Marks + // section of the Unicode spec and identifying which characters are not mapped to by the + // normalization. + private static Map createDiacritics() + { + Map map = new HashMap(31); + map.put(0x0060, "\u0300"); + map.put(0x02CB, "\u0300"); + map.put(0x0027, "\u0301"); + map.put(0x02B9, "\u0301"); + map.put(0x02CA, "\u0301"); + map.put(0x005e, "\u0302"); + map.put(0x02C6, "\u0302"); + map.put(0x007E, "\u0303"); + map.put(0x02C9, "\u0304"); + map.put(0x00B0, "\u030A"); + map.put(0x02BA, "\u030B"); + map.put(0x02C7, "\u030C"); + map.put(0x02C8, "\u030D"); + map.put(0x0022, "\u030E"); + map.put(0x02BB, "\u0312"); + map.put(0x02BC, "\u0313"); + map.put(0x0486, "\u0313"); + map.put(0x055A, "\u0313"); + map.put(0x02BD, "\u0314"); + map.put(0x0485, "\u0314"); + map.put(0x0559, "\u0314"); + map.put(0x02D4, "\u031D"); + map.put(0x02D5, "\u031E"); + map.put(0x02D6, "\u031F"); + map.put(0x02D7, "\u0320"); + map.put(0x02B2, "\u0321"); + map.put(0x02CC, "\u0329"); + map.put(0x02B7, "\u032B"); + map.put(0x02CD, "\u0331"); + map.put(0x005F, "\u0332"); + map.put(0x204E, "\u0359"); + return map; + } + /** - * Return the string of characters stored in this object. + * Return the string of characters stored in this object. The length can be different than the + * CharacterCodes length e.g. if ligatures are used ("fi", "fl", "ffl") where one glyph + * represents several unicode characters. * * @return The string on the screen. */ @@ -179,7 +181,10 @@ public int[] getCharacterCodes() } /** - * Return the text matrix stored in this object. + * The matrix containing the starting text position and scaling. Despite the name, it is not the + * text matrix set by the "Tm" operator, it is really the effective text rendering matrix (which + * is dependent on the current transformation matrix (set by the "cm" operator), the text matrix + * (set by the "Tm" operator), the font size (set by the "Tf" operator) and the page cropbox). * * @return The Matrix containing the starting text position */ @@ -189,8 +194,16 @@ public Matrix getTextMatrix() } /** - * Return the direction/orientation of the string in this object based on its text matrix. - * @return The direction of the text (0, 90, 180, or 270) + * Return the direction/orientation of the string in this object based on its text matrix. Only + * angles of 0, 90, 180, or 270 are supported. To get other angles, use this code: + *

    +     * TextPosition text = ...
    +     * Matrix m = text.getTextMatrix().clone();
    +     * m.concatenate(text.getFont().getFontMatrix());
    +     * int angle = (int) Math.round(Math.toDegrees(Math.atan2(m.getShearY(), m.getScaleY())));
    +     * 
    + * + * @return The direction of the text (0, 90, 180, or 270). */ public float getDir() { @@ -263,7 +276,12 @@ else if (rotation == 270) /** * This will get the page rotation adjusted x position of the character. - * This is adjusted based on page rotation so that the upper left is 0,0. + * This is adjusted based on page rotation so that the upper left is 0,0 which is + * unlike PDF coordinates, which start at the bottom left. See also + * this answer by Michael Klink for + * further details and + * PDFBOX-4597 for a sample + * file. * * @return The x coordinate of the character. */ @@ -276,6 +294,13 @@ public float getX() * This will get the text direction adjusted x position of the character. * This is adjusted based on text direction so that the first character * in that direction is in the upper left at 0,0. + * This method ignores the page rotation but takes the text rotation (see + * {@link #getDir() getDir()}) and adjusts the coordinates to awt. This is useful when doing + * text extraction, to compare the glyph positions when imagining these to be horizontal. See also + * this answer by Michael Klink for + * further details and + * PDFBOX-4597 for a sample + * file. * * @return The x coordinate of the text. */ @@ -313,8 +338,13 @@ else if (rotation == 270) } /** - * This will get the y position of the text, adjusted so that 0,0 is upper left and it is - * adjusted based on the page rotation. + * This will get the page rotation adjusted x position of the character. + * This is adjusted based on page rotation so that the upper left is 0,0 which is + * unlike PDF coordinates, which start at the bottom left. See also + * this answer by Michael Klink for + * further details and + * PDFBOX-4597 for a sample + * file. * * @return The adjusted y coordinate of the character. */ @@ -326,6 +356,13 @@ public float getY() /** * This will get the y position of the text, adjusted so that 0,0 is upper left and it is * adjusted based on the text direction. + * This method ignores the page rotation but takes the + * text rotation and adjusts the coordinates to awt. This is useful when doing text extraction, + * to compare the glyph positions when imagining these to be horizontal. See also + * this answer by Michael Klink for + * further details and + * PDFBOX-4597 for a sample + * file. * * @return The adjusted y coordinate of the character. */ @@ -403,7 +440,10 @@ public float getHeightDir() } /** - * This will get the font size that this object is suppose to be drawn at. + * This will get the font size that has been set with the "Tf" operator (Set text font and + * size). When the text is rendered, it may appear bigger or smaller depending on the current + * transformation matrix (set by the "cm" operator) and the text matrix (set by the "Tm" + * operator). * * @return The font size. */ @@ -413,8 +453,11 @@ public float getFontSize() } /** - * This will get the font size in pt. To get this size we have to multiply the pdf-fontsize - * and the scaling from the textmatrix + * This will get the font size in pt. To get this size we have to multiply the font size from + * {@link #getFontSize() getFontSize()} with the text matrix (set by the "Tm" operator) + * horizontal scaling factor and truncate the result to integer. The actual rendering may appear + * bigger or smaller depending on the current transformation matrix (set by the "cm" operator). + * To get the size in rendering, use {@link #getXScale() getXScale()}. * * @return The font size in pt. */ @@ -445,7 +488,11 @@ public float getWidthOfSpace() } /** - * @return Returns the xScale. + * This will get the X scaling factor. This is dependent on the current transformation matrix + * (set by the "cm" operator), the text matrix (set by the "Tm" operator) and the font size (set + * by the "Tf" operator). + * + * @return The X scaling factor. */ public float getXScale() { @@ -453,7 +500,11 @@ public float getXScale() } /** - * @return Returns the yScale. + * This will get the Y scaling factor. This is dependent on the current transformation matrix + * (set by the "cm" operator), the text matrix (set by the "Tm" operator) and the font size (set + * by the "Tf" operator). + * + * @return The Y scaling factor. */ public float getYScale() { @@ -463,7 +514,7 @@ public float getYScale() /** * Get the widths of each individual character. * - * @return An array that is the same length as the length of the string. + * @return An array that has the same length as the CharacterCodes array. */ public float[] getIndividualWidths() { @@ -582,20 +633,20 @@ public void mergeDiacritic(TextPosition diacritic) } // diacritic completely covers this character and therefore we assume that this is the // character the diacritic belongs to - else if (diacXStart < currCharXStart && diacXEnd > currCharXEnd) + else if (diacXStart < currCharXStart) { insertDiacritic(i, diacritic); wasAdded = true; } // otherwise, The diacritic modifies this character because its completely // contained by the character width - else if (diacXStart >= currCharXStart && diacXEnd <= currCharXEnd) + else if (diacXEnd <= currCharXEnd) { insertDiacritic(i, diacritic); wasAdded = true; } // last character in the TextPosition so we add diacritic to the end - else if (diacXStart >= currCharXStart && diacXEnd > currCharXEnd && i == strLen - 1) + else if (i == strLen - 1) { insertDiacritic(i, diacritic); wasAdded = true; @@ -616,7 +667,7 @@ else if (diacXStart >= currCharXStart && diacXEnd > currCharXEnd && i == strLen private void insertDiacritic(int i, TextPosition diacritic) { StringBuilder sb = new StringBuilder(); - sb.append(unicode.substring(0, i)); + sb.append(unicode, 0, i); float[] widths2 = new float[widths.length + 1]; System.arraycopy(widths, 0, widths2, 0, i); @@ -629,7 +680,7 @@ private void insertDiacritic(int i, TextPosition diacritic) widths2[i + 1] = 0; // get the rest of the string - sb.append(unicode.substring(i + 1, unicode.length())); + sb.append(unicode.substring(i + 1)); System.arraycopy(widths, i + 1, widths2, i + 2, widths.length - i - 1); unicode = sb.toString(); @@ -669,6 +720,14 @@ public boolean isDiacritic() { return false; } + if ("ー".equals(text)) + { + // PDFBOX-3833: ー is not a real diacritic like ¨ or ˆ, it just changes the + // pronunciation of the previous sound, and is printed after the previous glyph + // http://www.japanesewithanime.com/2017/04/prolonged-sound-mark.html + // Ignoring it as diacritic avoids trouble if it slightly overlaps with the next glyph. + return false; + } int type = Character.getType(text.charAt(0)); return type == Character.NON_SPACING_MARK || type == Character.MODIFIER_SYMBOL || @@ -686,4 +745,150 @@ public String toString() { return getUnicode(); } -} + + /** + * This will get the x coordinate of the end position. This is the unadjusted value passed into + * the constructor. + * + * @return The unadjusted x coordinate of the end position + */ + public float getEndX() + { + return endX; + } + + /** + * This will get the y coordinate of the end position. This is the unadjusted value passed into + * the constructor. + * + * @return The unadjusted y coordinate of the end position + */ + public float getEndY() + { + return endY; + } + + /** + * This will get the rotation of the page that the text is located in. This is the unadjusted + * value passed into the constructor. + * + * @return The unadjusted rotation of the page that the text is located in + */ + public int getRotation() + { + return rotation; + } + + /** + * This will get the height of the page that the text is located in. This is the unadjusted + * value passed into the constructor. + * + * @return The unadjusted height of the page that the text is located in + */ + public float getPageHeight() + { + return pageHeight; + } + + /** + * This will get the width of the page that the text is located in. This is the unadjusted value + * passed into the constructor. + * + * @return The unadjusted width of the page that the text is located in + */ + public float getPageWidth() + { + return pageWidth; + } + + @Override + public boolean equals(Object o) + { + if (this == o) + { + return true; + } + if (!(o instanceof TextPosition)) + { + return false; + } + + TextPosition that = (TextPosition) o; + + if (Float.compare(that.endX, endX) != 0) + { + return false; + } + if (Float.compare(that.endY, endY) != 0) + { + return false; + } + if (Float.compare(that.maxHeight, maxHeight) != 0) + { + return false; + } + if (rotation != that.rotation) + { + return false; + } + if (Float.compare(that.x, x) != 0) + { + return false; + } + if (Float.compare(that.y, y) != 0) + { + return false; + } + if (Float.compare(that.pageHeight, pageHeight) != 0) + { + return false; + } + if (Float.compare(that.pageWidth, pageWidth) != 0) + { + return false; + } + if (Float.compare(that.widthOfSpace, widthOfSpace) != 0) + { + return false; + } + if (Float.compare(that.fontSize, fontSize) != 0) + { + return false; + } + if (fontSizePt != that.fontSizePt) + { + return false; + } + if (textMatrix != null ? !textMatrix.equals(that.textMatrix) : that.textMatrix != null) + { + return false; + } + if (!Arrays.equals(charCodes, that.charCodes)) + { + return false; + } + return font != null ? font.equals(that.font) : that.font == null; + + // If changing this method, do not compare mutable fields (PDFBOX-4701) + } + + @Override + public int hashCode() + { + int result = textMatrix != null ? textMatrix.hashCode() : 0; + result = 31 * result + Float.floatToIntBits(endX); + result = 31 * result + Float.floatToIntBits(endY); + result = 31 * result + Float.floatToIntBits(maxHeight); + result = 31 * result + rotation; + result = 31 * result + Float.floatToIntBits(x); + result = 31 * result + Float.floatToIntBits(y); + result = 31 * result + Float.floatToIntBits(pageHeight); + result = 31 * result + Float.floatToIntBits(pageWidth); + result = 31 * result + Float.floatToIntBits(widthOfSpace); + result = 31 * result + Arrays.hashCode(charCodes); + result = 31 * result + (font != null ? font.hashCode() : 0); + result = 31 * result + Float.floatToIntBits(fontSize); + result = 31 * result + fontSizePt; + return result; + } +} \ No newline at end of file diff --git a/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java b/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java index 890afb12b6d..afbc1112cd4 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/text/TextPositionComparator.java @@ -32,13 +32,10 @@ public class TextPositionComparator implements Comparator public int compare(TextPosition pos1, TextPosition pos2) { // only compare text that is in the same direction - if (pos1.getDir() < pos2.getDir()) + int cmp1 = Float.compare(pos1.getDir(), pos2.getDir()); + if (cmp1 != 0) { - return -1; - } - else if (pos1.getDir() > pos2.getDir()) - { - return 1; + return cmp1; } // get the text direction adjusted coordinates @@ -59,22 +56,11 @@ else if (pos1.getDir() > pos2.getDir()) pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom || pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) { - if (x1 < x2) - { - return -1; - } - else if (x1 > x2) - { - return 1; - } - else - { - return 0; - } + return Float.compare(x1, x2); } else if (pos1YBottom < pos2YBottom) { - return - 1; + return -1; } else { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java b/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java index 637e580214e..c11037ebc64 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java @@ -39,7 +39,10 @@ private Charsets() {} /*** ISO-8859-1 charset */ public static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); - + + /*** Windows-1252 charset */ + public static final Charset WINDOWS_1252 = Charset.forName("Windows-1252"); + /*** UTF-8 charset */ public static final Charset UTF_8 = Charset.forName("UTF-8"); } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java b/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java index 3bc91882da2..c7847206136 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java @@ -142,7 +142,7 @@ private DateConverter() // "yyyymmdd hh:mm:ss", // "yyyymmdd", // "yyyymmddX''00''", // covers 24 cases - // (orignally the above ended with '+00''00'''; + // (originally the above ended with '+00''00'''; // the first apostrophe quoted the plus, // '' mapped to a single ', and the ''' was invalid) }; @@ -165,7 +165,7 @@ public static String toString(Calendar cal) } String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET), "'"); - return String.format("D:" + return String.format(Locale.US, "D:" + "%1$4tY%1$2tm%1$2td" // yyyyMMdd + "%1$2tH%1$2tM%1$2tS" // HHmmss + "%2$s" // time zone @@ -186,7 +186,7 @@ public static String toISO8601(Calendar cal) { String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET), ":"); - return String.format( + return String.format(Locale.US, "%1$4tY" // yyyy + "-%1$2tm" // -mm (%tm adds one to cal month value) + "-%1$2td" // -dd (%tm adds one to cal month value) @@ -197,12 +197,23 @@ public static String toISO8601(Calendar cal) } /* - * Constrain a timezone offset to the range [-11:59 thru +11:59]. + * Constrain a timezone offset to the range [-14:00 thru +14:00]. * by adding or subtracting multiples of a full day. */ private static int restrainTZoffset(long proposedOffset) { + if (proposedOffset <= 14 * MILLIS_PER_HOUR && proposedOffset >= -14 * MILLIS_PER_HOUR) + { + // https://www.w3.org/TR/xmlschema-2/#dateTime-timezones + // Timezones between 14:00 and -14:00 are valid + return (int) proposedOffset; + } + // Constrain a timezone offset to the range [-11:59 thru +12:00]. proposedOffset = ((proposedOffset + HALF_DAY) % DAY + DAY) % DAY; + if (proposedOffset == 0) + { + return HALF_DAY; + } // 0 <= proposedOffset < DAY proposedOffset = (proposedOffset - HALF_DAY) % HALF_DAY; // -HALF_DAY < proposedOffset < HALF_DAY @@ -332,8 +343,9 @@ private static int parseTimeField(String text, ParsePosition where, int maxlen, */ private static char skipOptionals(String text, ParsePosition where, String optionals) { - char retval = ' ', currch; - while (text != null && where.getIndex() < text.length() && + char retval = ' '; + char currch; + while (where.getIndex() < text.length() && optionals.indexOf((currch = text.charAt(where.getIndex()))) >= 0) { retval = (currch != ' ') ? currch : retval; @@ -432,7 +444,7 @@ static boolean parseTZoffset(String text, GregorianCalendar cal, int hrSign = (sign == '-' ? -1 : 1); tz.setRawOffset(restrainTZoffset(hrSign * (tzHours * MILLIS_PER_HOUR + tzMin * (long) MILLIS_PER_MINUTE))); - tz.setID("unknown"); + updateZoneId(tz); } else if ( ! hadGMT) { @@ -455,7 +467,43 @@ else if ( ! hadGMT) initialWhere.setIndex(where.getIndex()); return true; } - + + /** + * Update the zone ID based on the raw offset. This is either GMT, GMT+hh:mm or GMT-hh:mm, where + * n is between 1 and 14. The highest negative hour is -14, the highest positive hour is 12. + * Zones that don't fit in this schema are set to zone ID "unknown". + * + * @param tz the time zone to update. + */ + private static void updateZoneId(TimeZone tz) + { + int offset = tz.getRawOffset(); + char pm = '+'; + if (offset < 0) + { + pm = '-'; + offset = -offset; + } + int hh = offset / 3600000; + int mm = offset % 3600000 / 60000; + if (offset == 0) + { + tz.setID("GMT"); + } + else if (pm == '+' && hh <= 12) + { + tz.setID(String.format(Locale.US, "GMT+%02d:%02d", hh, mm)); + } + else if (pm == '-' && hh <= 14) + { + tz.setID(String.format(Locale.US, "GMT-%02d:%02d", hh, mm)); + } + else + { + tz.setID("unknown"); + } + } + /* * Parses a big-endian date: year month day hour min sec. * The year must be four digits. Other fields may be adjacent @@ -493,7 +541,7 @@ private static GregorianCalendar parseBigEndianDate(String text, char nextC = skipOptionals(text, where, "."); if (nextC == '.') { - // fractions of a second: skip upto 19 digits + // fractions of a second: skip up to 19 digits parseTimeField(text, where, 19, 0); } @@ -567,7 +615,7 @@ private static GregorianCalendar parseSimpleDate(String text, String[] fmts, */ private static Calendar parseDate(String text, ParsePosition initialWhere) { - if (text == null || text.isEmpty()) + if (text == null || text.isEmpty() || "D:".equals(text.trim())) { return null; } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java b/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java index 238c28d38b5..944197ba4ba 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java @@ -17,6 +17,14 @@ package org.apache.pdfbox.util; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + /** * Utility functions for hex encoding. * @@ -24,6 +32,17 @@ */ public final class Hex { + private static final Log LOG = LogFactory.getLog(Hex.class); + + /** + * for hex conversion. + * + * https://stackoverflow.com/questions/2817752/java-code-to-convert-byte-to-hexadecimal + * + */ + private static final byte[] HEX_BYTES = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; + private static final char[] HEX_CHARS = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; + private Hex() {} /** @@ -31,7 +50,21 @@ private Hex() {} */ public static String getString(byte b) { - return Integer.toHexString(0x100 | b & 0xff).substring(1).toUpperCase(); + char[] chars = new char[]{HEX_CHARS[getHighNibble(b)], HEX_CHARS[getLowNibble(b)]}; + return new String(chars); + } + + /** + * Returns a hex string of the given byte array. + */ + public static String getString(byte[] bytes) + { + StringBuilder string = new StringBuilder(bytes.length * 2); + for (byte b : bytes) + { + string.append(HEX_CHARS[getHighNibble(b)]).append(HEX_CHARS[getLowNibble(b)]); + } + return string.toString(); } /** @@ -39,6 +72,227 @@ public static String getString(byte b) */ public static byte[] getBytes(byte b) { - return getString(b).getBytes(Charsets.US_ASCII); + return new byte[]{HEX_BYTES[getHighNibble(b)], HEX_BYTES[getLowNibble(b)]}; + } + + /** + * Returns the bytes corresponding to the ASCII hex encoding of the given bytes. + */ + public static byte[] getBytes(byte[] bytes) + { + byte[] asciiBytes = new byte[bytes.length*2]; + for(int i=0; i< bytes.length; i++) + { + asciiBytes[i*2] = HEX_BYTES[getHighNibble(bytes[i])]; + asciiBytes[i*2+1] = HEX_BYTES[getLowNibble(bytes[i])]; + } + return asciiBytes; + } + + /** + * Returns the characters corresponding to the ASCII hex encoding of the given short. + */ + public static char[] getChars(short num) + { + char[] hex = new char[4]; + hex[0] = HEX_CHARS[(num >> 12) & 0x0F]; + hex[1] = HEX_CHARS[(num >> 8) & 0x0F]; + hex[2] = HEX_CHARS[(num >> 4) & 0x0F]; + hex[3] = HEX_CHARS[num & 0x0F]; + return hex; + } + + /** + * Takes the characters in the given string, convert it to bytes in UTF16-BE format + * and build a char array that corresponds to the ASCII hex encoding of the resulting + * bytes. + * + * Example: + *
    +     *   getCharsUTF16BE("ab") == new char[]{'0','0','6','1','0','0','6','2'}
    +     * 
    + * + * @param text The string to convert + * @return The string converted to hex + */ + public static char[] getCharsUTF16BE(String text) + { + // Note that the internal representation of string in Java is already UTF-16. Therefore + // we do not need to use an encoder to convert the string to its byte representation. + char[] hex = new char[text.length()*4]; + + for (int stringIdx = 0, charIdx = 0; stringIdx < text.length(); stringIdx++) + { + char c = text.charAt(stringIdx); + hex[charIdx++] = HEX_CHARS[(c >> 12) & 0x0F]; + hex[charIdx++] = HEX_CHARS[(c >> 8) & 0x0F]; + hex[charIdx++] = HEX_CHARS[(c >> 4) & 0x0F]; + hex[charIdx++] = HEX_CHARS[c & 0x0F]; + } + + return hex; + } + + /** + * Writes the given byte as hex value to the given output stream. + * @param b the byte to be written + * @param output the output stream to be written to + * @throws IOException exception if anything went wrong + */ + public static void writeHexByte(byte b, OutputStream output) throws IOException + { + output.write(HEX_BYTES[getHighNibble(b)]); + output.write(HEX_BYTES[getLowNibble(b)]); + } + + /** + * Writes the given byte array as hex value to the given output stream. + * @param bytes the byte array to be written + * @param output the output stream to be written to + * @throws IOException exception if anything went wrong + */ + public static void writeHexBytes(byte[] bytes, OutputStream output) throws IOException + { + for (byte b : bytes) + { + writeHexByte(b, output); + } + } + + /** + * Get the high nibble of the given byte. + * + * @param b the given byte + * @return the high nibble + */ + private static int getHighNibble(byte b) + { + return (b & 0xF0) >> 4; + } + + /** + * Get the low nibble of the given byte. + * + * @param b the given byte + * @return the low nibble + */ + private static int getLowNibble(byte b) + { + return b & 0x0F; + } + + /** + * Decode a base64 String. + * + * @param base64Value a base64 encoded String. + * + * @return the decoded String as a byte array. + * + * @throws IllegalArgumentException if this isn't a base64 encoded string. + */ + public static byte[] decodeBase64(String base64Value) + { + // https://stackoverflow.com/questions/469695/decode-base64-data-in-java + try + { + // jdk8 and higher? java.util.Base64.getDecoder().decode() + Class b64Class = Class.forName("java.util.Base64"); + Method getDecoderMethod = b64Class.getMethod("getDecoder"); + Object base64Decoder = getDecoderMethod.invoke(b64Class); + Method decodeMethod = base64Decoder.getClass().getMethod("decode", String.class); + return (byte[]) decodeMethod.invoke(base64Decoder, base64Value.replaceAll("\\s", "")); + } + catch (ClassNotFoundException ex) + { + LOG.debug(ex); + } + catch (IllegalAccessException ex) + { + LOG.debug(ex); + } + catch (IllegalArgumentException ex) + { + LOG.debug(ex); + } + catch (NoSuchMethodException ex) + { + LOG.debug(ex); + } + catch (SecurityException ex) + { + LOG.debug(ex); + } + catch (InvocationTargetException ex) + { + LOG.debug(ex); + } + try + { + // up to java7? javax.xml.bind.DatatypeConverter.parseBase64Binary() + Class datatypeConverterClass = Class.forName("javax.xml.bind.DatatypeConverter"); + Method parseBase64BinaryMethod = datatypeConverterClass.getMethod("parseBase64Binary", String.class); + return (byte[]) parseBase64BinaryMethod.invoke(null, base64Value); + } + catch (ClassNotFoundException ex) + { + LOG.debug(ex); + } + catch (IllegalAccessException ex) + { + LOG.debug(ex); + } + catch (IllegalArgumentException ex) + { + LOG.debug(ex); + } + catch (NoSuchMethodException ex) + { + LOG.debug(ex); + } + catch (SecurityException ex) + { + LOG.debug(ex); + } + catch (InvocationTargetException ex) + { + LOG.debug(ex); + } + LOG.error("Can't decode base64 value, try adding javax.xml.bind:jaxb-api to your build"); + return new byte[0]; + } + + /** + * Decodes a hex String into a byte array. + * + * @param s A String with ASCII hex. + * @return decoded byte array. + * @throws IOException + */ + public static byte[] decodeHex(String s) throws IOException + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int i = 0; + while (i < s.length() - 1) + { + if (s.charAt(i) == '\n' || s.charAt(i) == '\r') + { + ++i; + } + else + { + String hexByte = s.substring(i, i + 2); + try + { + baos.write(Integer.parseInt(hexByte, 16)); // Byte.parseByte won't work with "9C" + } + catch (NumberFormatException ex) + { + LOG.error("Can't parse " + hexByte + ", aborting decode", ex); + break; + } + i += 2; + } + } + return baos.toByteArray(); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java b/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java index a1d5669d087..351d58eedc9 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java @@ -22,6 +22,8 @@ import java.awt.geom.AffineTransform; import java.awt.geom.Point2D; +import java.util.Arrays; +import org.apache.pdfbox.cos.COSBase; /** * This class will be used for matrix manipulation. @@ -30,45 +32,76 @@ */ public final class Matrix implements Cloneable { - static final float[] DEFAULT_SINGLE = - { - 1,0,0, // a b 0 sx hy 0 note: hx and hy are reversed vs. the PDF spec as we use - 0,1,0, // c d 0 = hx sy 0 AffineTransform's definition x and y shear - 0,0,1 // tx ty 1 tx ty 1 - }; - - private final float[] single; + public static final int SIZE = 9; + private float[] single; + private static final float MAX_FLOAT_VALUE = 3.4028235E38f; /** - * Constructor. + * Constructor. This produces an identity matrix. */ public Matrix() { - single = new float[DEFAULT_SINGLE.length]; - System.arraycopy(DEFAULT_SINGLE, 0, single, 0, DEFAULT_SINGLE.length); + // a b 0 + // c d 0 + // tx ty 1 + // note: hx and hy are reversed vs.the PDF spec as we use AffineTransform's definition x and y shear + // sx hy 0 + // hx sy 0 + // tx ty 1 + single = new float[] { 1, 0, 0, 0, 1, 0, 0, 0, 1 }; + } + + /** + * Constructor. This produces a matrix with the given array as data. + * The source array is not copied or cloned. + */ + private Matrix(float[] src) + { + single = src; } /** - * Creates a matrix from a 6-element COS array. + * Creates a matrix from a 6-element (a b c d e f) COS array. + * + * @param array source array, elements must be or extend COSNumber */ public Matrix(COSArray array) { - single = new float[DEFAULT_SINGLE.length]; - single[0] = ((COSNumber)array.get(0)).floatValue(); - single[1] = ((COSNumber)array.get(1)).floatValue(); - single[3] = ((COSNumber)array.get(2)).floatValue(); - single[4] = ((COSNumber)array.get(3)).floatValue(); - single[6] = ((COSNumber)array.get(4)).floatValue(); - single[7] = ((COSNumber)array.get(5)).floatValue(); + single = new float[SIZE]; + single[0] = ((COSNumber)array.getObject(0)).floatValue(); + single[1] = ((COSNumber)array.getObject(1)).floatValue(); + single[3] = ((COSNumber)array.getObject(2)).floatValue(); + single[4] = ((COSNumber)array.getObject(3)).floatValue(); + single[6] = ((COSNumber)array.getObject(4)).floatValue(); + single[7] = ((COSNumber)array.getObject(5)).floatValue(); single[8] = 1; } /** - * Creates a matrix with the given 6 elements. + * Creates a transformation matrix with the given 6 elements. Transformation matrices are + * discussed in 8.3.3, "Common Transformations" and 8.3.4, "Transformation Matrices" of the PDF + * specification. For simple purposes (rotate, scale, translate) it is recommended to use the + * static methods below. + * + * Produces the following matrix: + * a b 0 + * c d 0 + * e f 1 + * + * @see Matrix#getRotateInstance(double, float, float) + * @see Matrix#getScaleInstance(float, float) + * @see Matrix#getTranslateInstance(float, float) + * + * @param a the X coordinate scaling element (m00) of the 3x3 matrix + * @param b the Y coordinate shearing element (m10) of the 3x3 matrix + * @param c the X coordinate shearing element (m01) of the 3x3 matrix + * @param d the Y coordinate scaling element (m11) of the 3x3 matrix + * @param e the X coordinate translation element (m02) of the 3x3 matrix + * @param f the Y coordinate translation element (m12) of the 3x3 matrix */ public Matrix(float a, float b, float c, float d, float e, float f) { - single = new float[DEFAULT_SINGLE.length]; + single = new float[SIZE]; single[0] = a; single[1] = b; single[3] = c; @@ -80,17 +113,53 @@ public Matrix(float a, float b, float c, float d, float e, float f) /** * Creates a matrix with the same elements as the given AffineTransform. + * @param at matrix elements will be initialize with the values from this affine transformation, as follows: + * + * scaleX shearY 0 + * shearX scaleY 0 + * transX transY 1 + * */ public Matrix(AffineTransform at) { - single = new float[DEFAULT_SINGLE.length]; - System.arraycopy(DEFAULT_SINGLE, 0, single, 0, DEFAULT_SINGLE.length); + single = new float[SIZE]; single[0] = (float)at.getScaleX(); single[1] = (float)at.getShearY(); single[3] = (float)at.getShearX(); single[4] = (float)at.getScaleY(); single[6] = (float)at.getTranslateX(); single[7] = (float)at.getTranslateY(); + single[8] = 1; + } + + /** + * Convenience method to be used when creating a matrix from unverified data. If the parameter + * is a COSArray with at least six numbers, a Matrix object is created from the first six + * numbers and returned. If not, then the identity Matrix is returned. + * + * @param base a COS object, preferably a COSArray with six numbers. + * + * @return a Matrix object. + */ + public static Matrix createMatrix(COSBase base) + { + if (!(base instanceof COSArray)) + { + return new Matrix(); + } + COSArray array = (COSArray) base; + if (array.size() < 6) + { + return new Matrix(); + } + for (int i = 0; i < 6; ++i) + { + if (!(array.getObject(i) instanceof COSNumber)) + { + return new Matrix(); + } + } + return new Matrix(array); } /** @@ -102,7 +171,10 @@ public Matrix(AffineTransform at) @Deprecated public void reset() { - System.arraycopy(DEFAULT_SINGLE, 0, single, 0, DEFAULT_SINGLE.length); + Arrays.fill(single, 0); + single[0] = 1; + single[4] = 1; + single[8] = 1; } /** @@ -219,20 +291,18 @@ public void concatenate(Matrix matrix) */ public void translate(Vector vector) { - Matrix m = Matrix.getTranslateInstance(vector.getX(), vector.getY()); - concatenate(m); + concatenate(Matrix.getTranslateInstance(vector.getX(), vector.getY())); } /** - * Translates this matrix by the given ammount. + * Translates this matrix by the given amount. * * @param tx x-translation * @param ty y-translation */ public void translate(float tx, float ty) { - Matrix m = Matrix.getTranslateInstance(tx, ty); - concatenate(m); + concatenate(Matrix.getTranslateInstance(tx, ty)); } /** @@ -243,8 +313,7 @@ public void translate(float tx, float ty) */ public void scale(float sx, float sy) { - Matrix m = Matrix.getScaleInstance(sx, sy); - concatenate(m); + concatenate(Matrix.getScaleInstance(sx, sy)); } /** @@ -254,103 +323,81 @@ public void scale(float sx, float sy) */ public void rotate(double theta) { - Matrix m = Matrix.getRotateInstance(theta, 0, 0); - concatenate(m); + concatenate(Matrix.getRotateInstance(theta, 0, 0)); } /** - * This will take the current matrix and multiply it with a matrix that is passed in. + * This method multiplies this Matrix with the specified other Matrix, storing the product in a new instance. It is + * allowed to have (other == this). * - * @param b The matrix to multiply by. - * - * @return The result of the two multiplied matrices. + * @param other the second operand Matrix in the multiplication; required + * @return the product of the two matrices. */ - public Matrix multiply( Matrix b ) + public Matrix multiply(Matrix other) { - return this.multiply(b, new Matrix()); + return multiply(other, new Matrix()); } /** - * This method multiplies this Matrix with the specified other Matrix, storing the product in the specified - * result Matrix. By reusing Matrix instances like this, multiplication chains can be executed without having - * to create many temporary Matrix objects. - *

    - * It is allowed to have (other == this) or (result == this) or indeed (other == result) but if this is done, - * the backing float[] matrix values may be copied in order to ensure a correct product. + * This method multiplies this Matrix with the specified other Matrix, storing the product in the specified result + * Matrix. It is allowed to have (other == this) or (result == this) or indeed (other == result). + * + * See {@link #multiply(Matrix)} if you need a version with a single operator. * - * @param other the second operand Matrix in the multiplication - * @param result the Matrix instance into which the result should be stored. If result is null, a new Matrix - * instance is created. - * @return the product of the two matrices. + * @param other the second operand Matrix in the multiplication; required + * @param result the Matrix instance into which the result should be stored. If result is null, a new Matrix instance is + * created. + * @return the result. + * */ + @Deprecated public Matrix multiply( Matrix other, Matrix result ) { + float[] c = result != null && result != other && result != this ? result.single + : new float[SIZE]; + + multiplyArrays(single, other.single, c); + + if (!Matrix.isFinite(c[0]) // + || !Matrix.isFinite(c[1]) // + || !Matrix.isFinite(c[2]) // + || !Matrix.isFinite(c[3]) // + || !Matrix.isFinite(c[4]) // + || !Matrix.isFinite(c[5]) // + || !Matrix.isFinite(c[6]) // + || !Matrix.isFinite(c[7]) // + || !Matrix.isFinite(c[8])) + throw new IllegalArgumentException("Multiplying two matrices produces illegal values"); + if (result == null) { - result = new Matrix(); + return new Matrix(c); } - - if (other != null && other.single != null) + else { - // the operands - float[] thisOperand = this.single; - float[] otherOperand = other.single; - - // We're multiplying 2 sets of floats together to produce a third, but we allow - // any of these float[] instances to be the same objects. - // There is the possibility then to overwrite one of the operands with result values - // and therefore corrupt the result. - - // If either of these operands are the same float[] instance as the result, then - // they need to be copied. - - if (this == result) - { - final float[] thisOrigVals = new float[this.single.length]; - System.arraycopy(this.single, 0, thisOrigVals, 0, this.single.length); - - thisOperand = thisOrigVals; - } - if (other == result) - { - final float[] otherOrigVals = new float[other.single.length]; - System.arraycopy(other.single, 0, otherOrigVals, 0, other.single.length); - - otherOperand = otherOrigVals; - } - - result.single[0] = thisOperand[0] * otherOperand[0] - + thisOperand[1] * otherOperand[3] - + thisOperand[2] * otherOperand[6]; - result.single[1] = thisOperand[0] * otherOperand[1] - + thisOperand[1] * otherOperand[4] - + thisOperand[2] * otherOperand[7]; - result.single[2] = thisOperand[0] * otherOperand[2] - + thisOperand[1] * otherOperand[5] - + thisOperand[2] * otherOperand[8]; - result.single[3] = thisOperand[3] * otherOperand[0] - + thisOperand[4] * otherOperand[3] - + thisOperand[5] * otherOperand[6]; - result.single[4] = thisOperand[3] * otherOperand[1] - + thisOperand[4] * otherOperand[4] - + thisOperand[5] * otherOperand[7]; - result.single[5] = thisOperand[3] * otherOperand[2] - + thisOperand[4] * otherOperand[5] - + thisOperand[5] * otherOperand[8]; - result.single[6] = thisOperand[6] * otherOperand[0] - + thisOperand[7] * otherOperand[3] - + thisOperand[8] * otherOperand[6]; - result.single[7] = thisOperand[6] * otherOperand[1] - + thisOperand[7] * otherOperand[4] - + thisOperand[8] * otherOperand[7]; - result.single[8] = thisOperand[6] * otherOperand[2] - + thisOperand[7] * otherOperand[5] - + thisOperand[8] * otherOperand[8]; + result.single = c; + return result; } + } - return result; + private static boolean isFinite(float f) + { + // this is faster than the combination of "isNaN" and "isInfinite" and Float.isFinite isn't available in java 6 + return Math.abs(f) <= MAX_FLOAT_VALUE; } + private void multiplyArrays(float[] a, float[] b, float[] c) + { + c[0] = a[0] * b[0] + a[1] * b[3] + a[2] * b[6]; + c[1] = a[0] * b[1] + a[1] * b[4] + a[2] * b[7]; + c[2] = a[0] * b[2] + a[1] * b[5] + a[2] * b[8]; + c[3] = a[3] * b[0] + a[4] * b[3] + a[5] * b[6]; + c[4] = a[3] * b[1] + a[4] * b[4] + a[5] * b[7]; + c[5] = a[3] * b[2] + a[4] * b[5] + a[5] * b[8]; + c[6] = a[6] * b[0] + a[7] * b[3] + a[8] * b[6]; + c[7] = a[6] * b[1] + a[7] * b[4] + a[8] * b[7]; + c[8] = a[6] * b[2] + a[7] * b[5] + a[8] * b[8]; + } /** * Transforms the given point by this matrix. * @@ -389,7 +436,7 @@ public Point2D.Float transformPoint(float x, float y) /** * Transforms the given point by this matrix. * - * @param vector @2D vector + * @param vector 2D vector */ public Vector transform(Vector vector) { @@ -422,16 +469,18 @@ public Matrix extractScaling() /** * Convenience method to create a scaled instance. * - * @param sx The xscale operator. - * @param sy The yscale operator. + * Produces the following matrix: + * x 0 0 + * 0 y 0 + * 0 0 1 + * + * @param x The xscale operator. + * @param y The yscale operator. * @return A new matrix with just the x/y scaling */ - public static Matrix getScaleInstance(float sx, float sy) + public static Matrix getScaleInstance(float x, float y) { - Matrix matrix = new Matrix(); - matrix.single[0] = sx; - matrix.single[4] = sy; - return matrix; + return new Matrix(x, 0, 0, y, 0, 0); } /** @@ -452,30 +501,34 @@ public Matrix extractTranslating() /** * Convenience method to create a translating instance. * - * @param tx The x translating operator. - * @param ty The y translating operator. + * Produces the following matrix: + * 1 0 0 + * 0 1 0 + * x y 1 + * + * @param x The x translating operator. + * @param y The y translating operator. * @return A new matrix with just the x/y translating. * @deprecated Use {@link #getTranslateInstance} instead. */ @Deprecated - public static Matrix getTranslatingInstance(float tx, float ty) + public static Matrix getTranslatingInstance(float x, float y) { - return getTranslateInstance(tx, ty); + return new Matrix(1, 0, 0, 1, x, y); } /** * Convenience method to create a translating instance. * - * @param tx The x translating operator. - * @param ty The y translating operator. + * Produces the following matrix: 1 0 0 0 1 0 x y 1 + * + * @param x The x translating operator. + * @param y The y translating operator. * @return A new matrix with just the x/y translating. */ - public static Matrix getTranslateInstance(float tx, float ty) + public static Matrix getTranslateInstance(float x, float y) { - Matrix matrix = new Matrix(); - matrix.single[6] = tx; - matrix.single[7] = ty; - return matrix; + return new Matrix(1, 0, 0, 1, x, y); } /** @@ -491,14 +544,7 @@ public static Matrix getRotateInstance(double theta, float tx, float ty) float cosTheta = (float)Math.cos(theta); float sinTheta = (float)Math.sin(theta); - Matrix matrix = new Matrix(); - matrix.single[0] = cosTheta; - matrix.single[1] = sinTheta; - matrix.single[3] = -sinTheta; - matrix.single[4] = cosTheta; - matrix.single[6] = tx; - matrix.single[7] = ty; - return matrix; + return new Matrix(cosTheta, sinTheta, -sinTheta, cosTheta, tx, ty); } /** @@ -509,9 +555,7 @@ public static Matrix getRotateInstance(double theta, float tx, float ty) */ public static Matrix concatenate(Matrix a, Matrix b) { - Matrix copy = a.clone(); - copy.concatenate(b); - return copy; + return b.multiply(a); } /** @@ -521,9 +565,7 @@ public static Matrix concatenate(Matrix a, Matrix b) @Override public Matrix clone() { - Matrix clone = new Matrix(); - System.arraycopy( single, 0, clone.single, 0, 9 ); - return clone; + return new Matrix(single.clone()); } /** @@ -533,8 +575,6 @@ public Matrix clone() */ public float getScalingFactorX() { - float xScale = single[0]; - /** * BM: if the trm is rotated, the calculation is a little more complicated * @@ -552,12 +592,12 @@ public float getScalingFactorX() * sqrt(x2) = * abs(x) */ - if( !(single[1]==0.0f && single[3]==0.0f) ) + if (single[1] != 0.0f) { - xScale = (float)Math.sqrt(Math.pow(single[0], 2)+ + return (float) Math.sqrt(Math.pow(single[0], 2) + Math.pow(single[1], 2)); } - return xScale; + return single[0]; } /** @@ -567,17 +607,18 @@ public float getScalingFactorX() */ public float getScalingFactorY() { - float yScale = single[4]; - if( !(single[1]==0.0f && single[3]==0.0f) ) + if (single[3] != 0.0f) { - yScale = (float)Math.sqrt(Math.pow(single[3], 2)+ + return (float) Math.sqrt(Math.pow(single[3], 2) + Math.pow(single[4], 2)); } - return yScale; + return single[4]; } /** * Returns the x-scaling element of this matrix. + * + * @see #getScalingFactorX() */ public float getScaleX() { @@ -602,6 +643,8 @@ public float getShearX() /** * Returns the y-scaling element of this matrix. + * + * @see #getScalingFactorY() */ public float getScaleY() { @@ -649,7 +692,10 @@ public float getYPosition() } /** - * Returns a COS array which represents this matrix. + * Returns a COS array which represent the geometric relevant + * components of the matrix. The last column of the matrix is ignored, + * only the first two columns are returned. This is analog to the + * Matrix(COSArray) constructor. */ public COSArray toCOSArray() { @@ -666,14 +712,36 @@ public COSArray toCOSArray() @Override public String toString() { - StringBuffer sb = new StringBuffer( "" ); - sb.append("["); - sb.append(single[0] + ","); - sb.append(single[1] + ","); - sb.append(single[3] + ","); - sb.append(single[4] + ","); - sb.append(single[6] + ","); - sb.append(single[7] + "]"); - return sb.toString(); + return "[" + + single[0] + "," + + single[1] + "," + + single[3] + "," + + single[4] + "," + + single[6] + "," + + single[7] + "]"; + } + + @Override + public int hashCode() + { + return Arrays.hashCode(single); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) + { + return true; + } + if (obj == null) + { + return false; + } + if (getClass() != obj.getClass()) + { + return false; + } + return Arrays.equals(this.single, ((Matrix) obj).single); } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java b/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java new file mode 100644 index 00000000000..e9f33dd118a --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java @@ -0,0 +1,174 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.util; + +/** + * This class contains methods to format numbers. + * + * @author Michael Doswald + */ +public class NumberFormatUtil +{ + /** + * Maximum number of fraction digits supported by the format methods + */ + private static final int MAX_FRACTION_DIGITS = 5; + + /** + * Contains the power of ten values for fast lookup in the format methods + */ + private static final long[] POWER_OF_TENS; + private static final int[] POWER_OF_TENS_INT; + + static + { + POWER_OF_TENS = new long[19]; + POWER_OF_TENS[0] = 1; + + for (int exp = 1; exp < POWER_OF_TENS.length; exp++) + { + POWER_OF_TENS[exp] = POWER_OF_TENS[exp - 1] * 10; + } + + POWER_OF_TENS_INT = new int[10]; + POWER_OF_TENS_INT[0] = 1; + + for (int exp = 1; exp < POWER_OF_TENS_INT.length; exp++) + { + POWER_OF_TENS_INT[exp] = POWER_OF_TENS_INT[exp - 1] * 10; + } + } + + private NumberFormatUtil() + { + } + + /** + * Fast variant to format a floating point value to a ASCII-string. The format will fail if the + * value is greater than {@link Long#MAX_VALUE}, smaller or equal to {@link Long#MIN_VALUE}, is + * {@link Float#NaN}, infinite or the number of requested fraction digits is greater than + * {@link #MAX_FRACTION_DIGITS}. + * + * When the number contains more fractional digits than {@code maxFractionDigits} the value will + * be rounded. Rounding is done to the nearest possible value, with the tie breaking rule of + * rounding away from zero. + * + * @param value The float value to format + * @param maxFractionDigits The maximum number of fraction digits used + * @param asciiBuffer The output buffer to write the formatted value to + * + * @return The number of bytes used in the buffer or {@code -1} if formatting failed + */ + public static int formatFloatFast(float value, int maxFractionDigits, byte[] asciiBuffer) + { + if (Float.isNaN(value) || + Float.isInfinite(value) || + value > Long.MAX_VALUE || + value <= Long.MIN_VALUE || + maxFractionDigits > MAX_FRACTION_DIGITS) + { + return -1; + } + + int offset = 0; + long integerPart = (long) value; + + //handle sign + if (value < 0) + { + asciiBuffer[offset++] = '-'; + integerPart = -integerPart; + } + + //extract fraction part + long fractionPart = (long) ((Math.abs((double)value) - integerPart) * POWER_OF_TENS[maxFractionDigits] + 0.5d); + + //Check for rounding to next integer + if (fractionPart >= POWER_OF_TENS[maxFractionDigits]) { + integerPart++; + fractionPart -= POWER_OF_TENS[maxFractionDigits]; + } + + //format integer part + offset = formatPositiveNumber(integerPart, getExponent(integerPart), false, asciiBuffer, offset); + + if (fractionPart > 0 && maxFractionDigits > 0) + { + asciiBuffer[offset++] = '.'; + offset = formatPositiveNumber(fractionPart, maxFractionDigits - 1, true, asciiBuffer, offset); + } + + return offset; + } + + /** + * Formats a positive integer number starting with the digit at {@code 10^exp}. + * + * @param number The number to format + * @param exp The start digit + * @param omitTrailingZeros Whether the formatting should stop if only trailing zeros are left. + * This is needed e.g. when formatting fractions of a number. + * @param asciiBuffer The buffer to write the ASCII digits to + * @param startOffset The start offset into the buffer to start writing + * + * @return The offset into the buffer which contains the first byte that was not filled by the + * method + */ + private static int formatPositiveNumber(long number, int exp, boolean omitTrailingZeros, byte[] asciiBuffer, int startOffset) + { + int offset = startOffset; + long remaining = number; + + while (remaining > Integer.MAX_VALUE && (!omitTrailingZeros || remaining > 0)) + { + long digit = remaining / POWER_OF_TENS[exp]; + remaining -= (digit * POWER_OF_TENS[exp]); + + asciiBuffer[offset++] = (byte) ('0' + digit); + exp--; + } + + //If the remaining fits into an integer, use int arithmetic as it is faster + int remainingInt = (int) remaining; + while (exp >= 0 && (!omitTrailingZeros || remainingInt > 0)) + { + int digit = remainingInt / POWER_OF_TENS_INT[exp]; + remainingInt -= (digit * POWER_OF_TENS_INT[exp]); + + asciiBuffer[offset++] = (byte) ('0' + digit); + exp--; + } + + return offset; + } + + /** + * Returns the highest exponent of 10 where {@code 10^exp < number} for numbers > 0 + */ + private static int getExponent(long number) + { + for (int exp = 0; exp < (POWER_OF_TENS.length - 1); exp++) + { + if (number < POWER_OF_TENS[exp + 1]) + { + return exp; + } + } + + return POWER_OF_TENS.length - 1; + } + +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java b/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java index 7b3527354cb..c9f4926948b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/QuickSort.java @@ -46,8 +46,9 @@ public int compare(Comparable object1, Comparable object2) /** * Sorts the given list using the given comparator. * + * @param type of the objects to be sorted. * @param list list to be sorted - * @param cmp comparator used to compare the object swithin the list + * @param cmp comparator used to compare the objects within the list */ public static void sort(List list, Comparator cmp) { @@ -62,6 +63,7 @@ public static void sort(List list, Comparator cmp) /** * Sorts the given list using compareTo as comparator. * + * @param type of the objects to be sorted. * @param list list to be sorted */ public static void sort(List list) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/SmallMap.java b/pdfbox/src/main/java/org/apache/pdfbox/util/SmallMap.java new file mode 100644 index 00000000000..e3b1512a009 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/SmallMap.java @@ -0,0 +1,388 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.util; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Map implementation with a smallest possible memory usage. + * It should only be used for maps with small number of items + * (e.g. <30) since most operations have an O(n) complexity. + * Thus it should be used in cases with large number of map + * objects, each having only few items. + * + *

    null is not supported for keys or values.

    + */ +public class SmallMap implements Map +{ + /** + * stores key-value pair as 2 objects; key first; in case of empty map this might be null + */ + private Object[] mapArr; + + /** Creates empty map. */ + public SmallMap() + { + } + + /** Creates map filled with entries from provided map. */ + public SmallMap(Map initMap) + { + putAll(initMap); + } + + /** + * Returns index of key within map-array or -1 + * if key is not found (or key is null). + */ + private int findKey(Object key) + { + if (isEmpty() || (key==null)) + { + return -1; + } + + for ( int aIdx = 0; aIdx < mapArr.length; aIdx+=2 ) + { + if (key.equals(mapArr[aIdx])) + { + return aIdx; + } + } + + return -1; + } + + /** + * Returns index of value within map-array or -1 + * if value is not found (or value is null). + */ + private int findValue(Object value) + { + if (isEmpty() || (value==null)) + { + return -1; + } + + for ( int aIdx = 1; aIdx < mapArr.length; aIdx+=2 ) + { + if (value.equals(mapArr[aIdx])) + { + return aIdx; + } + } + + return -1; + } + + @Override + public int size() + { + return mapArr == null ? 0 : mapArr.length >> 1; + } + + @Override + public boolean isEmpty() + { + return (mapArr == null) || (mapArr.length == 0); + } + + @Override + public boolean containsKey(Object key) + { + return findKey(key) >= 0; + } + + @Override + public boolean containsValue(Object value) + { + return findValue(value) >= 0; + } + + @SuppressWarnings("unchecked") + @Override + public V get(Object key) + { + int kIdx = findKey(key); + + return kIdx < 0 ? null : (V) mapArr[kIdx+1]; + } + + @Override + public V put(K key, V value) + { + if ((key == null) || (value == null)) + { + throw new NullPointerException( "Key or value must not be null."); + } + + if (mapArr == null) + { + mapArr = new Object[] { key, value }; + return null; + } + else + { + int kIdx = findKey(key); + + if (kIdx < 0) + { + // key unknown + int oldLen = mapArr.length; + Object[] newMapArr = new Object[oldLen+2]; + System.arraycopy(mapArr, 0, newMapArr, 0, oldLen); + newMapArr[oldLen] = key; + newMapArr[oldLen+1] = value; + mapArr = newMapArr; + return null; + } + else + { + // key exists; replace value + @SuppressWarnings("unchecked") + V oldValue = (V) mapArr[kIdx+1]; + mapArr[kIdx+1] = value; + return oldValue; + } + } + } + + @Override + public V remove(Object key) + { + int kIdx = findKey(key); + + if (kIdx < 0) + { + // not found + return null; + } + + @SuppressWarnings("unchecked") + V oldValue = (V) mapArr[kIdx+1]; + int oldLen = mapArr.length; + + if (oldLen == 2) + { + // was last entry + mapArr = null; + } + else + { + Object[] newMapArr = new Object[oldLen-2]; + System.arraycopy(mapArr, 0, newMapArr, 0, kIdx); + System.arraycopy(mapArr, kIdx+2, newMapArr, kIdx, oldLen - kIdx - 2); + mapArr = newMapArr; + } + + return oldValue; + } + + @Override + public final void putAll(Map otherMap) + { + if ((mapArr == null) || (mapArr.length == 0)) + { + // existing map is empty + mapArr = new Object[otherMap.size() << 1]; + int aIdx = 0; + for (Entry entry : otherMap.entrySet()) + { + if ((entry.getKey() == null) || (entry.getValue() == null)) + { + throw new NullPointerException( "Key or value must not be null."); + } + + mapArr[aIdx++] = entry.getKey(); + mapArr[aIdx++] = entry.getValue(); + } + } + else + { + int oldLen = mapArr.length; + // first increase array size to hold all to put entries as if they have unknown keys + // reduce after adding all to the required size + Object[] newMapArr = new Object[oldLen+(otherMap.size() << 1)]; + System.arraycopy(mapArr, 0, newMapArr, 0, oldLen); + + int newIdx = oldLen; + for (Entry entry : otherMap.entrySet()) + { + if ((entry.getKey() == null) || (entry.getValue() == null)) + { + throw new NullPointerException( "Key or value must not be null."); + } + + int existKeyIdx = findKey(entry.getKey()); + + if (existKeyIdx >= 0) + { + // existing key + newMapArr[existKeyIdx+1] = entry.getValue(); + } + else + { + // new key + newMapArr[newIdx++] = entry.getKey(); + newMapArr[newIdx++] = entry.getValue(); + } + } + + if (newIdx < newMapArr.length) + { + Object[] reducedMapArr = new Object[newIdx]; + System.arraycopy(newMapArr, 0, reducedMapArr, 0, newIdx); + newMapArr = reducedMapArr; + } + + mapArr = newMapArr; + } + } + + @Override + public void clear() + { + mapArr = null; + } + + /** + * Returns a set view of the keys contained in this map. + * + *

    The current implementation does not allow changes to the + * returned key set (which would have to be reflected in the + * underlying map.

    + */ + @SuppressWarnings("unchecked") + @Override + public Set keySet() + { + if (isEmpty()) + { + return Collections.emptySet(); + } + + Set keys = new LinkedHashSet(); + for (int kIdx = 0; kIdx < mapArr.length; kIdx+=2) + { + keys.add((K)mapArr[kIdx]); + } + return Collections.unmodifiableSet( keys ); + } + + /** + * Returns a collection of the values contained in this map. + * + *

    The current implementation does not allow changes to the + * returned collection (which would have to be reflected in the + * underlying map.

    + */ + @SuppressWarnings("unchecked") + @Override + public Collection values() + { + if (isEmpty()) + { + return Collections.emptySet(); + } + + List values = new ArrayList(mapArr.length >> 1); + for (int vIdx = 1; vIdx < mapArr.length; vIdx+=2) + { + values.add((V)mapArr[vIdx]); + } + return Collections.unmodifiableList( values ); + } + + private class SmallMapEntry implements Entry + { + private final int keyIdx; + + SmallMapEntry(int keyInMapIdx) + { + keyIdx = keyInMapIdx; + } + + @SuppressWarnings("unchecked") + @Override + public K getKey() + { + return (K)mapArr[keyIdx]; + } + + @SuppressWarnings("unchecked") + @Override + public V getValue() + { + return (V)mapArr[keyIdx+1]; + } + + @Override + public V setValue(V value) + { + if (value == null) + { + throw new NullPointerException( "Key or value must not be null."); + } + + V oldValue = getValue(); + mapArr[keyIdx+1] = value; + return oldValue; + } + + @Override + public int hashCode() + { + return getKey().hashCode(); + } + + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof SmallMap.SmallMapEntry)) + { + return false; + } + @SuppressWarnings("unchecked") + SmallMapEntry other = (SmallMapEntry) obj; + + return getKey().equals(other.getKey()) && getValue().equals(other.getValue()); + } + } + + @Override + public Set> entrySet() + { + if (isEmpty()) + { + return Collections.emptySet(); + } + + Set> entries = new LinkedHashSet>(); + for (int kIdx = 0; kIdx < mapArr.length; kIdx+=2) + { + entries.add(new SmallMapEntry(kIdx)); + } + return Collections.unmodifiableSet( entries ); + } + +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/Version.java b/pdfbox/src/main/java/org/apache/pdfbox/util/Version.java index 7647ee4dbe2..f29271baf0f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/util/Version.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/Version.java @@ -17,17 +17,20 @@ package org.apache.pdfbox.util; +import java.io.BufferedInputStream; import java.io.IOException; -import java.net.URL; +import java.io.InputStream; import java.util.Properties; +import org.apache.pdfbox.io.IOUtils; + /** * Exposes PDFBox version. */ public final class Version { private static final String PDFBOX_VERSION_PROPERTIES = - "org/apache/pdfbox/resources/version.properties"; + "/org/apache/pdfbox/resources/version.properties"; private Version() { @@ -39,20 +42,21 @@ private Version() */ public static String getVersion() { + InputStream is = null; try { - URL url = Version.class.getClassLoader().getResource(PDFBOX_VERSION_PROPERTIES); - if (url == null) - { - return null; - } + is = new BufferedInputStream(Version.class.getResourceAsStream(PDFBOX_VERSION_PROPERTIES)); Properties properties = new Properties(); - properties.load(url.openStream()); + properties.load(is); return properties.getProperty("pdfbox.version", null); } catch (IOException io) { return null; } + finally + { + IOUtils.closeQuietly(is); + } } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/XMLUtil.java b/pdfbox/src/main/java/org/apache/pdfbox/util/XMLUtil.java new file mode 100644 index 00000000000..8cb33a18dcb --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/XMLUtil.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.util; + +import java.io.InputStream; +import java.io.IOException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.FactoryConfigurationError; +import javax.xml.parsers.ParserConfigurationException; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.Text; +import org.xml.sax.SAXException; + +/** + * This class with handle some simple XML operations. + * + * @author Ben Litchfield + */ +public final class XMLUtil +{ + /** + * Utility class, should not be instantiated. + * + */ + private XMLUtil() + { + } + + /** + * This will parse an XML stream and create a DOM document. + * + * @param is The stream to get the XML from. + * @return The DOM document. + * @throws IOException It there is an error creating the dom. + */ + public static Document parse(InputStream is) throws IOException + { + return parse(is, false); + } + + /** + * This will parse an XML stream and create a DOM document. + * + * @param is The stream to get the XML from. + * @param nsAware activates namespace awareness of the parser + * @return The DOM document. + * @throws IOException It there is an error creating the dom. + */ + public static Document parse(InputStream is, boolean nsAware) throws IOException + { + try + { + DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); + builderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + builderFactory.setFeature("http://xml.org/sax/features/external-general-entities", + false); + builderFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", + false); + builderFactory.setFeature( + "http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + builderFactory.setXIncludeAware(false); + builderFactory.setExpandEntityReferences(false); + builderFactory.setNamespaceAware(nsAware); + DocumentBuilder builder = builderFactory.newDocumentBuilder(); + return builder.parse(is); + } + catch (FactoryConfigurationError e) + { + throw new IOException(e.getMessage(), e); + } + catch (ParserConfigurationException e) + { + throw new IOException(e.getMessage(), e); + } + catch (SAXException e) + { + throw new IOException(e.getMessage(), e); + } + } + + /** + * This will get the text value of an element. + * + * @param node The node to get the text value for. + * @return The text of the node. + */ + public static String getNodeValue(Element node) + { + StringBuilder sb = new StringBuilder(); + NodeList children = node.getChildNodes(); + int numNodes = children.getLength(); + for (int i = 0; i < numNodes; i++) + { + Node next = children.item(i); + if (next instanceof Text) + { + sb.append(next.getNodeValue()); + } + } + return sb.toString(); + } + +} diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/filetypedetector/FileTypeDetector.java b/pdfbox/src/main/java/org/apache/pdfbox/util/filetypedetector/FileTypeDetector.java index 4242209fb63..bc83b889b01 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/util/filetypedetector/FileTypeDetector.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/filetypedetector/FileTypeDetector.java @@ -60,7 +60,8 @@ public final class FileTypeDetector root.addPath(FileType.PCX, new byte[]{0x0A, 0x05, 0x01}); root.addPath(FileType.RIFF, "RIFF".getBytes(Charsets.ISO_8859_1)); - root.addPath(FileType.ARW, "II".getBytes(Charsets.ISO_8859_1), new byte[]{0x2a, 0x00, 0x08, 0x00}); + // https://github.com/drewnoakes/metadata-extractor/issues/217 + //root.addPath(FileType.ARW, "II".getBytes(Charsets.ISO_8859_1), new byte[]{0x2a, 0x00, 0x08, 0x00}) root.addPath(FileType.CRW, "II".getBytes(Charsets.ISO_8859_1), new byte[]{0x1a, 0x00, 0x00, 0x00}, "HEAPCCDR".getBytes(Charsets.ISO_8859_1)); root.addPath(FileType.CR2, "II".getBytes(Charsets.ISO_8859_1), new byte[]{0x2a, 0x00, 0x10, 0x00, 0x00, 0x00, 0x43, 0x52}); root.addPath(FileType.NEF, "MM".getBytes(Charsets.ISO_8859_1), new byte[]{0x00, 0x2a, 0x00, 0x00, 0x00, (byte)0x80, 0x00}); @@ -70,7 +71,7 @@ public final class FileTypeDetector root.addPath(FileType.RW2, "II".getBytes(Charsets.ISO_8859_1), new byte[]{0x55, 0x00}); } - private FileTypeDetector() throws Exception + private FileTypeDetector() { } @@ -110,4 +111,9 @@ public static FileType detectFileType(final BufferedInputStream inputStream) thr //noinspection ConstantConditions return root.find(bytes); } + + public static FileType detectFileType(final byte[] fileBytes) throws IOException + { + return root.find(fileBytes); + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/util/package.html b/pdfbox/src/main/java/org/apache/pdfbox/util/package.html index 1cf7adde77c..2ddc77cefd2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/util/package.html +++ b/pdfbox/src/main/java/org/apache/pdfbox/util/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional.txt b/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional.txt index 72ea294d9b5..8515d8465ac 100644 --- a/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional.txt +++ b/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional.txt @@ -66,10 +66,12 @@ bracketleftbig;005B bracketleftBig;005B bracketleftbigg;005B bracketleftBigg;005B +bracketleftmath;005B bracketrightBig;005D bracketrightbig;005D bracketrightbigg;005D bracketrightBigg;005D +bracketrightmath;005D ceilingleftbig;2308 ceilingleftBig;2308 ceilingleftBigg;2308 @@ -88,6 +90,8 @@ contintegraldisplay;222E contintegraltext;222E coproductdisplay;2210 coproducttext;2210 +epsilon1;03B5 +equalmath;003D floorleftBig;230A floorleftbig;230A floorleftbigg;230A @@ -112,10 +116,13 @@ parenleftBig;0028 parenleftbig;0028 parenleftBigg;0028 parenleftbigg;0028 +parenleftmath;0028 parenrightBig;0029 parenrightbig;0029 parenrightBigg;0029 parenrightbigg;0029 +parenrightmath;0029 +plusmath;002B prime;2032 productdisplay;220F producttext;220F diff --git a/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt b/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt index 8c307576305..69e12818710 100644 --- a/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt +++ b/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt @@ -244,4 +244,5 @@ a97;275B a98;275C a99;275D a9;2720 +space;0020 #END diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java new file mode 100644 index 00000000000..78c499bc6bd --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import org.junit.Assert; +import org.junit.Test; + +public class COSDictionaryTest +{ + @Test + public void testCOSDictionaryNotEqualsCOSStream() + { + COSDictionary cosDictionary = new COSDictionary(); + COSStream cosStream = new COSStream(); + cosDictionary.setItem(COSName.BE, COSName.BE); + cosDictionary.setInt(COSName.LENGTH, 0); + cosStream.setItem(COSName.BE, COSName.BE); + Assert.assertNotEquals("a COSDictionary shall not be equal to a COSStream with the same dictionary entries", cosDictionary, cosStream); + Assert.assertNotEquals("a COSStream shall not be equal to a COSDictionary with the same dictionary entries", cosStream, cosDictionary); + } +} \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/COSObjectKeyTest.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/COSObjectKeyTest.java new file mode 100644 index 00000000000..3801a27c069 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/COSObjectKeyTest.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class COSObjectKeyTest +{ + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void compareToInputNotNullOutputZero() + { + // Arrange + final COSObjectKey objectUnderTest = new COSObjectKey(0L, 0); + final COSObjectKey other = new COSObjectKey(0L, 0); + + // Act + final int retval = objectUnderTest.compareTo(other); + + // Assert result + Assert.assertEquals(0, retval); + } + + @Test + public void compareToInputNotNullOutputPositive() + { + // Arrange + final COSObjectKey objectUnderTest = new COSObjectKey(0L, 0); + final COSObjectKey other = new COSObjectKey(-9223372036854775808L, 0); + + // Act + final int retval = objectUnderTest.compareTo(other); + + // Assert result + Assert.assertEquals(1, retval); + } + + @Test + public void checkHashCode() + { + // same object number 100 0 + Assert.assertEquals(new COSObjectKey(100, 0).hashCode(), + new COSObjectKey(100, 0).hashCode()); + + // different object numbers/same generation numbers 100 0 vs. 200 0 + Assert.assertNotEquals(new COSObjectKey(100, 0).hashCode(), + new COSObjectKey(200, 0).hashCode()); + + // different object numbers/different generation numbers/ sum of both numbers are equal 100 0 vs. 99 1 + Assert.assertNotEquals(new COSObjectKey(100, 0).hashCode(), + new COSObjectKey(99, 1).hashCode()); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java index 17acd45f273..b08a05e08dc 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java @@ -17,11 +17,11 @@ package org.apache.pdfbox.cos; -import static org.junit.Assert.assertEquals; -import org.junit.Test; - +import java.io.IOException; import java.util.ArrayList; import java.util.List; +import static org.junit.Assert.assertEquals; +import org.junit.Test; /** * Test for PDFDocEncoding. @@ -89,5 +89,22 @@ public void testDeviations() assertEquals(cosString.getString(), deviation); } } -} + /** + * PDFBOX-3864: Test that chars smaller than 256 which are NOT part of PDFDocEncoding are + * handled correctly. + * + * @throws IOException + */ + @Test + public void testPDFBox3864() throws IOException + { + for (int i = 0; i < 256; i++) + { + String hex = String.format("FEFF%04X", i); + COSString cs1 = COSString.parseHex(hex); + COSString cs2 = new COSString(cs1.getString()); + assertEquals(cs1, cs2); + } + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java index 39edc2e12e2..f090e1ee55f 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java @@ -1,332 +1,402 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.math.BigDecimal; -import java.util.Random; - -import junit.framework.Test; -import junit.framework.TestSuite; - -import org.apache.pdfbox.pdfwriter.COSWriter; - -/** - * Tests {@link COSFloat}. - */ -public class TestCOSFloat extends TestCOSNumber -{ - @Override - public void setUp() - { - try - { - testCOSBase = COSNumber.get("1.1"); - } - catch (IOException e) - { - fail("Failed to create a COSNumber in setUp()"); - } - } - - /** - * Base class to run looped tests with float numbers. - * - * To use it, derive a class and just implement runTest(). Then either call - * runTests for a series of random and pseudorandom tests, or runTest to - * test with corner values. - */ - abstract class BaseTester - { - private int low = -100000; - private int high = 300000; - private int step = 20000; - - public void setLoop(int low, int high, int step) - { - this.low = low; - this.high = high; - this.step = step; - } - - // deterministic and non-deterministic test - public void runTests() - { - // deterministic test - loop(123456); - - // non-deterministic test - loop(System.currentTimeMillis()); - } - - // look through a series of pseudorandom tests influenced by a seed - private void loop(long seed) - { - Random rnd = new Random(seed); - for (int i = low; i < high; i += step) - { - float num = i * rnd.nextFloat(); - try - { - runTest(num); - } - catch (AssertionError a) - { - fail("num = " + num + ", seed = " + seed); - } - } - } - - abstract void runTest(float num); - - } - - /** - * Tests equals() - ensures that the Object.equals() contract is obeyed. - * These are tested over a range of arbitrary values to ensure Consistency, - * Reflexivity, Symmetry, Transitivity and non-nullity. - */ - public void testEquals() - { - new BaseTester() - { - @Override - void runTest(float num) - { - COSFloat test1 = new COSFloat(num); - COSFloat test2 = new COSFloat(num); - COSFloat test3 = new COSFloat(num); - // Reflexive (x == x) - assertTrue(test1.equals(test1)); - // Symmetric is preserved ( x==y then y==x) - assertTrue(test2.equals(test3)); - assertTrue(test1.equals(test2)); - // Transitive (if x==y && y==z then x==z) - assertTrue(test1.equals(test2)); - assertTrue(test2.equals(test3)); - assertTrue(test1.equals(test3)); - - float nf = Float.intBitsToFloat(Float.floatToIntBits(num) + 1); - COSFloat test4 = new COSFloat(nf); - assertFalse(test4.equals(test1)); - } - }.runTests(); - } - - class HashCodeTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat test1 = new COSFloat(num); - COSFloat test2 = new COSFloat(num); - assertEquals(test1.hashCode(), test2.hashCode()); - - float nf = Float.intBitsToFloat(Float.floatToIntBits(num) + 1); - COSFloat test3 = new COSFloat(nf); - assertFalse(test3.hashCode() == test1.hashCode()); - } - } - - /** - * Tests hashCode() - ensures that the Object.hashCode() contract is obeyed - * over a range of arbitrary values. - */ - public void testHashCode() - { - new HashCodeTester().runTests(); - } - - class FloatValueTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat testFloat = new COSFloat(num); - assertEquals(num, testFloat.floatValue()); - } - - } - - @Override - public void testFloatValue() - { - new FloatValueTester().runTests(); - } - - class DoubleValueTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat testFloat = new COSFloat(num); - // compare the string representation instead of the numeric values - // as the cast from float to double adds some more fraction digits - assertEquals(Float.toString(num), Double.toString(testFloat.doubleValue())); - } - - } - - @Override - public void testDoubleValue() - { - new DoubleValueTester().runTests(); - } - - class IntValueTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat testFloat = new COSFloat(num); - assertEquals((int) num, testFloat.intValue()); - } - - } - - @Override - public void testIntValue() - { - new IntValueTester().runTests(); - } - - class LongValueTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat testFloat = new COSFloat(num); - assertEquals((long) num, testFloat.longValue()); - } - - } - - @Override - public void testLongValue() - { - new LongValueTester().runTests(); - } - - class AcceptTester extends BaseTester - { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - COSWriter visitor = new COSWriter(outStream); - - @Override - void runTest(float num) - { - try - { - COSFloat cosFloat = new COSFloat(num); - cosFloat.accept(visitor); - assertEquals(floatToString(cosFloat.floatValue()), outStream.toString("ISO-8859-1")); - testByteArrays(floatToString(num).getBytes("ISO-8859-1"), outStream.toByteArray()); - outStream.reset(); - } - catch (IOException e) - { - fail("Failed to write " + num + " exception: " + e.getMessage()); - } - } - - } - - @Override - public void testAccept() - { - new AcceptTester().runTests(); - } - - class WritePDFTester extends BaseTester - { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - - public WritePDFTester() - { - setLoop(-1000, 3000, 200); - } - - @Override - void runTest(float num) - { - try - { - COSFloat cosFloat = new COSFloat(num); - cosFloat.writePDF(outStream); - assertEquals(floatToString(cosFloat.floatValue()), outStream.toString("ISO-8859-1")); - assertEquals(floatToString(num), outStream.toString("ISO-8859-1")); - testByteArrays(floatToString(num).getBytes("ISO-8859-1"), outStream.toByteArray()); - outStream.reset(); - } - catch (IOException e) - { - fail("Failed to write " + num + " exception: " + e.getMessage()); - } - } - - } - - /** - * Tests writePDF() - this method takes an {@link OutputStream} and writes - * this object to it. - */ - public void testWritePDF() - { - WritePDFTester writePDFTester = new WritePDFTester(); - writePDFTester.runTests(); - - // test a corner case as described in PDFBOX-1778 - writePDFTester.runTest(0.000000000000000000000000000000001f); - } - - private String floatToString(float value) - { - // use a BigDecimal as intermediate state to avoid - // a floating point string representation of the float value - return removeTrailingNull(new BigDecimal(String.valueOf(value)).toPlainString()); - } - - private String removeTrailingNull(String value) - { - // remove fraction digit "0" only - if (value.indexOf('.') > -1 && !value.endsWith(".0")) - { - while (value.endsWith("0") && !value.endsWith(".0")) - { - value = value.substring(0,value.length()-1); - } - } - return value; - } - - /** - * This will get the suite of test that this class holds. - * - * @return All of the tests that this class holds. - */ - public static Test suite() - { - return new TestSuite(TestCOSFloat.class); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigDecimal; +import java.util.Random; + +import junit.framework.Test; +import junit.framework.TestSuite; + +import org.apache.pdfbox.pdfwriter.COSWriter; +import org.junit.Assert; + +/** + * Tests {@link COSFloat}. + */ +public class TestCOSFloat extends TestCOSNumber +{ + @Override + public void setUp() + { + try + { + testCOSBase = COSNumber.get("1.1"); + } + catch (IOException e) + { + fail("Failed to create a COSNumber in setUp()"); + } + } + + /** + * Base class to run looped tests with float numbers. + * + * To use it, derive a class and just implement runTest(). Then either call + * runTests for a series of random and pseudorandom tests, or runTest to + * test with corner values. + */ + abstract class BaseTester + { + private int low = -100000; + private int high = 300000; + private int step = 20000; + + public void setLoop(int low, int high, int step) + { + this.low = low; + this.high = high; + this.step = step; + } + + // deterministic and non-deterministic test + public void runTests() + { + // deterministic test + loop(123456); + + // non-deterministic test + loop(System.currentTimeMillis()); + } + + // look through a series of pseudorandom tests influenced by a seed + private void loop(long seed) + { + Random rnd = new Random(seed); + for (int i = low; i < high; i += step) + { + float num = i * rnd.nextFloat(); + try + { + runTest(num); + } + catch (AssertionError a) + { + fail("num = " + num + ", seed = " + seed); + } + } + } + + abstract void runTest(float num); + + } + + /** + * Tests equals() - ensures that the Object.equals() contract is obeyed. + * These are tested over a range of arbitrary values to ensure Consistency, + * Reflexivity, Symmetry, Transitivity and non-nullity. + */ + public void testEquals() + { + new BaseTester() + { + @Override + void runTest(float num) + { + @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity + + COSFloat test1 = new COSFloat(num); + COSFloat test2 = new COSFloat(num); + COSFloat test3 = new COSFloat(num); + // Reflexive (x == x) + Assert.assertEquals(test1, test1); + // Symmetric is preserved ( x==y then y==x) + Assert.assertEquals(test2, test3); + Assert.assertEquals(test1, test2); + // Transitive (if x==y && y==z then x==z) + Assert.assertEquals(test1, test2); + Assert.assertEquals(test2, test3); + Assert.assertEquals(test1, test3); + + float nf = Float.intBitsToFloat(Float.floatToIntBits(num) + 1); + COSFloat test4 = new COSFloat(nf); + Assert.assertNotEquals(test4, test1); + } + }.runTests(); + } + + class HashCodeTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat test1 = new COSFloat(num); + COSFloat test2 = new COSFloat(num); + Assert.assertEquals(test1.hashCode(), test2.hashCode()); + + float nf = Float.intBitsToFloat(Float.floatToIntBits(num) + 1); + COSFloat test3 = new COSFloat(nf); + assertNotSame(test3.hashCode(), test1.hashCode()); + } + } + + /** + * Tests hashCode() - ensures that the Object.hashCode() contract is obeyed + * over a range of arbitrary values. + */ + public void testHashCode() + { + new HashCodeTester().runTests(); + } + + class FloatValueTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat testFloat = new COSFloat(num); + assertEquals(num, testFloat.floatValue()); + } + + } + + @Override + public void testFloatValue() + { + new FloatValueTester().runTests(); + } + + class DoubleValueTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat testFloat = new COSFloat(num); + // compare the string representation instead of the numeric values + // as the cast from float to double adds some more fraction digits + Assert.assertEquals(Float.toString(num), Double.toString(testFloat.doubleValue())); + } + + } + + @Override + public void testDoubleValue() + { + new DoubleValueTester().runTests(); + } + + class IntValueTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat testFloat = new COSFloat(num); + Assert.assertEquals((int) num, testFloat.intValue()); + } + + } + + public void testVerySmallValues() throws IOException + { + double smallValue = Float.MIN_VALUE / 10d; + + assertEquals("Test must be performed with a value smaller than Float.MIN_VALUE.", -1, + Double.compare(smallValue, Float.MIN_VALUE)); + + // 1.4012984643248171E-46 + String asString = String.valueOf(smallValue); + COSFloat cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + // 0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + smallValue *= -1; + + // -1.4012984643248171E-46 + asString = String.valueOf(smallValue); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + // -0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + } + + public void testVeryLargeValues() throws IOException + { + double largeValue = Float.MAX_VALUE * 10d; + + assertEquals("Test must be performed with a value larger than Float.MAX_VALUE.", 1, + Double.compare(largeValue, Float.MIN_VALUE)); + + // 1.4012984643248171E-46 + String asString = String.valueOf(largeValue); + COSFloat cosFloat = new COSFloat(asString); + assertEquals(Float.MAX_VALUE, cosFloat.floatValue()); + + // 0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(Float.MAX_VALUE, cosFloat.floatValue()); + + largeValue *= -1; + + // -1.4012984643248171E-46 + asString = String.valueOf(largeValue); + cosFloat = new COSFloat(asString); + assertEquals(-Float.MAX_VALUE, cosFloat.floatValue()); + + // -0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(-Float.MAX_VALUE, cosFloat.floatValue()); + } + + @Override + public void testIntValue() + { + new IntValueTester().runTests(); + } + + class LongValueTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat testFloat = new COSFloat(num); + Assert.assertEquals((long) num, testFloat.longValue()); + } + + } + + @Override + public void testLongValue() + { + new LongValueTester().runTests(); + } + + class AcceptTester extends BaseTester + { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + COSWriter visitor = new COSWriter(outStream); + + @Override + void runTest(float num) + { + try + { + COSFloat cosFloat = new COSFloat(num); + cosFloat.accept(visitor); + Assert.assertEquals(floatToString(cosFloat.floatValue()), outStream.toString("ISO-8859-1")); + testByteArrays(floatToString(num).getBytes("ISO-8859-1"), outStream.toByteArray()); + outStream.reset(); + } + catch (IOException e) + { + fail("Failed to write " + num + " exception: " + e.getMessage()); + } + } + + } + + @Override + public void testAccept() + { + new AcceptTester().runTests(); + } + + class WritePDFTester extends BaseTester + { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + + WritePDFTester() + { + setLoop(-1000, 3000, 200); + } + + @Override + void runTest(float num) + { + try + { + COSFloat cosFloat = new COSFloat(num); + cosFloat.writePDF(outStream); + Assert.assertEquals(floatToString(cosFloat.floatValue()), outStream.toString("ISO-8859-1")); + Assert.assertEquals(floatToString(num), outStream.toString("ISO-8859-1")); + testByteArrays(floatToString(num).getBytes("ISO-8859-1"), outStream.toByteArray()); + outStream.reset(); + } + catch (IOException e) + { + fail("Failed to write " + num + " exception: " + e.getMessage()); + } + } + + } + + /** + * Tests writePDF() - this method takes an {@link java.io.OutputStream} and writes + * this object to it. + */ + public void testWritePDF() + { + WritePDFTester writePDFTester = new WritePDFTester(); + writePDFTester.runTests(); + + // test a corner case as described in PDFBOX-1778 + writePDFTester.runTest(0.000000000000000000000000000000001f); + } + + public void testDoubleNegative() throws IOException + { + // PDFBOX-4289 + COSFloat cosFloat = new COSFloat("--16.33"); + assertEquals(-16.33f, cosFloat.floatValue()); + } + + private String floatToString(float value) + { + // use a BigDecimal as intermediate state to avoid + // a floating point string representation of the float value + return removeTrailingNull(new BigDecimal(String.valueOf(value)).toPlainString()); + } + + private String removeTrailingNull(String value) + { + // remove fraction digit "0" only + if (value.indexOf('.') > -1 && !value.endsWith(".0")) + { + while (value.endsWith("0") && !value.endsWith(".0")) + { + value = value.substring(0,value.length()-1); + } + } + return value; + } + + /** + * This will get the suite of test that this class holds. + * + * @return All of the tests that this class holds. + */ + public static Test suite() + { + return new TestSuite(TestCOSFloat.class); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java index be34cfe331c..be3f798e957 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java @@ -23,6 +23,7 @@ import junit.framework.TestSuite; import org.apache.pdfbox.pdfwriter.COSWriter; +import org.junit.Assert; /** * A test case for COSInteger @@ -57,21 +58,17 @@ public void testEquals() COSInteger test2 = COSInteger.get(i); COSInteger test3 = COSInteger.get(i); // Reflexive (x == x) - assertTrue(test1.equals(test1)); + Assert.assertEquals(test1, test1); // Symmetric is preserved ( x==y then y===x) - assertTrue(test2.equals(test1)); - assertTrue(test1.equals(test2)); + Assert.assertEquals(test2, test1); + Assert.assertEquals(test1, test2); // Transitive (if x==y && y==z then x===z) - assertTrue(test1.equals(test2)); - assertTrue(test2.equals(test3)); - assertTrue(test1.equals(test3)); - // Non-nullity - assertFalse(test1 == null); - assertFalse(test2 == null); - assertFalse(test3 == null); - + Assert.assertEquals(test1, test2); + Assert.assertEquals(test2, test3); + Assert.assertEquals(test1, test3); + COSInteger test4 = COSInteger.get(i + 1); - assertFalse(test4.equals(test1)); + Assert.assertNotEquals(test4, test1); } } @@ -85,10 +82,10 @@ public void testHashCode() { COSInteger test1 = COSInteger.get(i); COSInteger test2 = COSInteger.get(i); - assertEquals(test1.hashCode(), test2.hashCode()); + Assert.assertEquals(test1.hashCode(), test2.hashCode()); COSInteger test3 = COSInteger.get(i + 1); - assertFalse(test3.hashCode() == test1.hashCode()); + assertNotSame(test3.hashCode(), test1.hashCode()); } } @@ -115,7 +112,7 @@ public void testIntValue() { for (int i = -1000; i < 3000; i += 200) { - assertEquals(i, COSInteger.get(i).intValue()); + Assert.assertEquals(i, COSInteger.get(i).intValue()); } } @@ -124,7 +121,7 @@ public void testLongValue() { for (int i = -1000; i < 3000; i += 200) { - assertEquals((long) i, COSInteger.get(i).longValue()); + Assert.assertEquals((long) i, COSInteger.get(i).longValue()); } } @@ -152,7 +149,7 @@ public void testAccept() } /** - * Tests writePDF() - this method takes an {@link OutputStream} and writes this object to it. + * Tests writePDF() - this method takes an {@link java.io.OutputStream} and writes this object to it. */ public void testWritePDF() { @@ -184,4 +181,4 @@ public static Test suite() { return new TestSuite(TestCOSInteger.class); } -} \ No newline at end of file +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java new file mode 100644 index 00000000000..1bbd5b722fd --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java @@ -0,0 +1,53 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import static org.junit.Assert.assertNull; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.junit.Assert; +import org.junit.Test; + +public class TestCOSName +{ + /** + * PDFBOX-4076: Check that characters outside of US_ASCII are not replaced with "?". + * + * @throws IOException + */ + @Test + public void PDFBox4076() throws IOException + { + String special = "中国你好!"; + PDDocument document = new PDDocument(); + PDPage page = new PDPage(); + document.addPage(page); + document.getDocumentCatalog().getCOSObject().setString(COSName.getPDFName(special), special); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + document.save(baos); + document.close(); + document = PDDocument.load(baos.toByteArray()); + COSDictionary catalogDict = document.getDocumentCatalog().getCOSObject(); + Assert.assertTrue(catalogDict.containsKey(special)); + Assert.assertEquals(special, catalogDict.getString(special)); + document.close(); + } + +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java index 86d4b084b0c..2bee3194fa1 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java @@ -78,11 +78,56 @@ public void testGet() { // PASS } - + // PDFBOX-2569: some numbers start with "+" + assertEquals(COSNumber.get("1"), COSNumber.get("+1")); + assertEquals(COSNumber.get("123"), COSNumber.get("+123")); } catch (IOException e) { fail("Failed to convert a number " + e.getMessage()); } } + + /** + * PDFBOX-5176: large number, too big for a long leads to an COSInteger value which is marked as invalid. + * + * @throws IOException + */ + public void testLargeNumber() throws IOException + { + // max value + COSNumber cosNumber = COSNumber.get(Long.toString(Long.MAX_VALUE)); + assertTrue(cosNumber instanceof COSInteger); + COSInteger cosInteger = (COSInteger) cosNumber; + assertTrue(cosInteger.isValid()); + // min value + cosNumber = COSNumber.get(Long.toString(Long.MIN_VALUE)); + assertTrue(cosNumber instanceof COSInteger); + cosInteger = (COSInteger) cosNumber; + assertTrue(cosInteger.isValid()); + + // out of range, max value + cosNumber = COSNumber.get("18446744073307448448"); + assertTrue(cosNumber instanceof COSInteger); + cosInteger = (COSInteger) cosNumber; + assertFalse(cosInteger.isValid()); + // out of range, min value + cosNumber = COSNumber.get("-18446744073307448448"); + assertTrue(cosNumber instanceof COSInteger); + cosInteger = (COSInteger) cosNumber; + assertFalse(cosInteger.isValid()); + } + + public void testInvalidNumber() + { + try + { + COSNumber.get("18446744073307F448448"); + fail("Was expecting an IOException"); + } + catch (IOException e) + { + } + } + } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java index b882bb68dda..33b9f94f754 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java @@ -129,6 +129,25 @@ public void testCompressedStream2Decode() throws IOException validateDecoded(stream, testString); } + /** + * Tests tests that encoding is done correctly even if the the stream is closed twice. + * Closeable.close() allows streams to be closed multiple times. The second and subsequent + * close() calls should have no effect. + * + * @throws IOException + */ + public void testCompressedStreamDoubleClose() throws IOException + { + byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes("ASCII"); + byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); + COSStream stream = new COSStream(); + OutputStream output = stream.createOutputStream(COSName.FLATE_DECODE); + output.write(testString); + output.close(); + output.close(); + validateEncoded(stream, testStringEncoded); + } + private byte[] encodeData(byte[] original, COSName filter) throws IOException { Filter encodingFilter = FilterFactory.INSTANCE.getFilter(filter); diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSString.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSString.java index 9273021257c..91e41970d95 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSString.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSString.java @@ -24,6 +24,7 @@ import junit.framework.TestSuite; import org.apache.pdfbox.pdfwriter.COSWriter; +import org.junit.Assert; /** * This will test all of the filters in the PDFBox system. @@ -100,7 +101,7 @@ private void writePDFTests(String expected, COSString testSubj) { fail("IOException: " + e.getMessage()); } - assertEquals(expected, outStream.toString()); + Assert.assertEquals(expected, outStream.toString()); } /** @@ -150,11 +151,11 @@ public void testGetHex() String expected = "Test subject for testing getHex"; COSString test1 = new COSString(expected); String hexForm = createHex(expected); - assertEquals(hexForm, test1.toHexString()); + Assert.assertEquals(hexForm, test1.toHexString()); COSString escCS = new COSString(ESC_CHAR_STRING); // Not sure whether the escaped characters should be escaped or not, presumably since // writePDF() gives you the proper formatted text, getHex() should ONLY convert to hex. - assertEquals(createHex(ESC_CHAR_STRING), escCS.toHexString()); + Assert.assertEquals(createHex(ESC_CHAR_STRING), escCS.toHexString()); } /** @@ -166,17 +167,17 @@ public void testGetString() { String testStr = "Test subject for getString()"; COSString test1 = new COSString(testStr); - assertEquals(testStr, test1.getString()); + Assert.assertEquals(testStr, test1.getString()); COSString hexStr = COSString.parseHex(createHex(testStr)); - assertEquals(testStr, hexStr.getString()); + Assert.assertEquals(testStr, hexStr.getString()); COSString escapedString = new COSString(ESC_CHAR_STRING); - assertEquals(ESC_CHAR_STRING, escapedString.getString()); + Assert.assertEquals(ESC_CHAR_STRING, escapedString.getString()); testStr = "Line1\nLine2\nLine3\n"; COSString lineFeedString = new COSString(testStr); - assertEquals(testStr, lineFeedString.getString()); + Assert.assertEquals(testStr, lineFeedString.getString()); } catch (IOException e) { @@ -215,9 +216,9 @@ public void testUnicode() throws IOException { String theString = "\u4e16"; COSString string = new COSString(theString); - assertTrue(string.getString().equals(theString)); + Assert.assertEquals(string.getString(), theString); - String textAscii = "This is some regular text. It should all be expressable in ASCII"; + String textAscii = "This is some regular text. It should all be expressible in ASCII"; /** En français où les choses sont accentués. En español, así */ String text8Bit = "En fran\u00e7ais o\u00f9 les choses sont accentu\u00e9s. En espa\u00f1ol, as\u00ed"; /** をクリックしてく */ @@ -225,29 +226,29 @@ public void testUnicode() throws IOException // Testing the getString method COSString stringAscii = new COSString( textAscii ); - assertEquals( stringAscii.getString(), textAscii ); + Assert.assertEquals( stringAscii.getString(), textAscii ); COSString string8Bit = new COSString( text8Bit ); - assertEquals( string8Bit.getString(), text8Bit ); + Assert.assertEquals( string8Bit.getString(), text8Bit ); COSString stringHighBits = new COSString( textHighBits ); - assertEquals( stringHighBits.getString(), textHighBits ); + Assert.assertEquals( stringHighBits.getString(), textHighBits ); // Testing the getBytes method // The first two strings should be stored as ISO-8859-1 because they only contain chars in the range 0..255 - assertEquals(textAscii, new String(stringAscii.getBytes(), "ISO-8859-1")); + Assert.assertEquals(textAscii, new String(stringAscii.getBytes(), "ISO-8859-1")); // likewise for the 8bit characters. - assertEquals(text8Bit, new String(string8Bit.getBytes(), "ISO-8859-1")); + Assert.assertEquals(text8Bit, new String(string8Bit.getBytes(), "ISO-8859-1")); // The japanese text contains high bits so must be stored as big endian UTF-16 - assertEquals(textHighBits, new String(stringHighBits.getBytes(), "UnicodeBig")); + Assert.assertEquals(textHighBits, new String(stringHighBits.getBytes(), "UnicodeBig")); // Test the writePDF method to ensure that the Strings are correct when written into PDF. ByteArrayOutputStream out = new ByteArrayOutputStream(); COSWriter.writeString(stringAscii, out); - assertEquals("(" + textAscii + ")", new String(out.toByteArray(), "ASCII")); + Assert.assertEquals("(" + textAscii + ")", new String(out.toByteArray(), "ASCII")); out.reset(); COSWriter.writeString(string8Bit, out); @@ -256,7 +257,7 @@ public void testUnicode() throws IOException { hex.append( Integer.toHexString(c).toUpperCase() ); } - assertEquals("<"+hex.toString()+">", new String(out.toByteArray(), "ASCII")); + Assert.assertEquals("<"+hex.toString()+">", new String(out.toByteArray(), "ASCII")); out.reset(); COSWriter.writeString(stringHighBits, out); @@ -266,7 +267,7 @@ public void testUnicode() throws IOException { hex.append( Integer.toHexString(c).toUpperCase() ); } - assertEquals("<"+hex.toString()+">", new String(out.toByteArray(), "ASCII")); + Assert.assertEquals("<"+hex.toString()+">", new String(out.toByteArray(), "ASCII")); } @Override @@ -276,11 +277,11 @@ public void testAccept() throws IOException ICOSVisitor visitor = new COSWriter(outStream); COSString testSubj = new COSString(ESC_CHAR_STRING); testSubj.accept(visitor); - assertEquals("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", outStream.toString()); + Assert.assertEquals("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", outStream.toString()); outStream.reset(); testSubj.setForceHexForm(true); testSubj.accept(visitor); - assertEquals("<" + createHex(ESC_CHAR_STRING) + ">", outStream.toString()); + Assert.assertEquals("<" + createHex(ESC_CHAR_STRING) + ">", outStream.toString()); } /** @@ -293,33 +294,27 @@ public void testEquals() { // Reflexive COSString x1 = new COSString("Test"); - assertTrue(x1.equals(x1)); + Assert.assertEquals(x1, x1); // Symmetry i.e. if x == y then y == x COSString y1 = new COSString("Test"); - assertTrue(x1.equals(y1)); - assertTrue(y1.equals(x1)); + Assert.assertEquals(x1, y1); + Assert.assertEquals(y1, x1); COSString x2 = new COSString("Test"); x2.setForceHexForm(true); // also if x != y then y != x - assertFalse(x1.equals(x2)); - assertFalse(x2.equals(x1)); + Assert.assertNotEquals(x1, x2); + Assert.assertNotEquals(x2, x1); // Transitive if x == y && y == z then x == z COSString z1 = new COSString("Test"); - assertTrue(x1.equals(y1)); - assertTrue(y1.equals(z1)); - assertTrue(x1.equals(z1)); + Assert.assertEquals(x1, y1); + Assert.assertEquals(y1, z1); + Assert.assertEquals(x1, z1); // Test the negative as well if x1 == y1 && y1 != x2 then x1 != x2 - assertTrue(x1.equals(y1)); - assertFalse(y1.equals(x2)); - assertFalse(x1.equals(x2)); - - // Non-nullity - assertFalse(x1 == null); - assertFalse(y1 == null); - assertFalse(z1 == null); - assertFalse(x2 == null); + Assert.assertEquals(x1, y1); + Assert.assertNotEquals(y1, x2); + Assert.assertNotEquals(x1, x2); } } @@ -330,27 +325,42 @@ public void testHashCode() { COSString str1 = new COSString("Test1"); COSString str2 = new COSString("Test2"); - assertFalse(str1.hashCode() == str2.hashCode()); + Assert.assertNotEquals(str1.hashCode(), str2.hashCode()); COSString str3 = new COSString("Test1"); - assertTrue(str1.hashCode() == str3.hashCode()); + Assert.assertEquals(str1.hashCode(), str3.hashCode()); str3.setForceHexForm(true); - assertFalse(str1.hashCode() == str3.hashCode()); + Assert.assertNotEquals(str1.hashCode(), str3.hashCode()); } /** * Test testCompareFromHexString() - tests that Strings created from hex * compare correctly (PDFBOX-2401) + * + * @throws java.io.IOException */ public void testCompareFromHexString() throws IOException { + @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity + COSString test1 = COSString.parseHex("000000FF000000"); COSString test2 = COSString.parseHex("000000FF00FFFF"); - assertEquals(test1, test1); - assertEquals(test2, test2); - assertFalse(test1.toHexString().equals(test2.toHexString())); + Assert.assertEquals(test1, test1); + Assert.assertEquals(test2, test2); + Assert.assertNotEquals(test1.toHexString(), test2.toHexString()); assertFalse(Arrays.equals(test1.getBytes(), test2.getBytes())); - assertFalse(test1.equals(test2)); - assertFalse(test2.equals(test1)); - assertFalse(test1.getString().equals(test2.getString())); + Assert.assertNotEquals(test1, test2); + Assert.assertNotEquals(test2, test1); + Assert.assertNotEquals(test1.getString(), test2.getString()); + } + + /** + * PDFBOX-3881: Test that if String has only the BOM, that it be an empty string. + * + * @throws IOException + */ + public void testEmptyStringWithBOM() throws IOException + { + assertTrue(COSString.parseHex("FEFF").getString().isEmpty()); + assertTrue(COSString.parseHex("FFFE").getString().isEmpty()); } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/package.html b/pdfbox/src/test/java/org/apache/pdfbox/cos/package.html index 27af3063009..5e42f5a153b 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/cos/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestPublicKeyEncryption.java b/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestPublicKeyEncryption.java index b6785a26fc9..7e8df35a1cc 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestPublicKeyEncryption.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestPublicKeyEncryption.java @@ -16,13 +16,14 @@ */ package org.apache.pdfbox.encryption; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.security.NoSuchAlgorithmException; import java.security.cert.CertificateFactory; import java.security.cert.X509Certificate; +import java.util.Arrays; +import java.util.Collection; import javax.crypto.Cipher; @@ -31,18 +32,27 @@ import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy; import org.apache.pdfbox.pdmodel.encryption.PublicKeyRecipient; +import org.apache.pdfbox.text.PDFTextStripper; -import junit.framework.TestCase; - +import org.junit.After; import org.junit.Assert; +import static org.junit.Assert.fail; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; /** - * Tests for public key encryption. + * Tests for public key encryption. These tests are not perfect - to be sure, encrypt a file by + * using a certificate exported from your digital id in Adobe Reader, and then open that file with + * Adobe Reader. Do this with every key length. * * @author Ben Litchfield */ -public class TestPublicKeyEncryption extends TestCase +@RunWith(Parameterized.class) +public class TestPublicKeyEncryption { + private final File testResultsDir = new File("target/test-output/crypto"); private AccessPermission permission1; private AccessPermission permission2; @@ -61,12 +71,33 @@ public class TestPublicKeyEncryption extends TestCase */ private PDDocument document; - + private String text; + private String producer; + + @Parameterized.Parameter + public int keyLength; + + /** + * Values for keyLength test parameter. + * + * @return + */ + @Parameterized.Parameters + public static Collection keyLengths() + { + return Arrays.asList(40, 128, 256); + } + + public TestPublicKeyEncryption() + { + testResultsDir.mkdirs(); + } + /** * {@inheritDoc} */ - @Override - protected void setUp() throws Exception + @Before + public void setUp() throws Exception { if (Cipher.getMaxAllowedKeyLength("AES") != Integer.MAX_VALUE) { @@ -103,23 +134,17 @@ protected void setUp() throws Exception keyStore1 = "test1.pfx"; keyStore2 = "test2.pfx"; - InputStream input = - TestPublicKeyEncryption.class.getResourceAsStream("test.pdf"); - try - { - document = PDDocument.load(input); - } - finally - { - input.close(); - } + document = PDDocument.load(new File(this.getClass().getResource("test.pdf").toURI())); + text = new PDFTextStripper().getText(document); + producer = document.getDocumentInformation().getProducer(); + document.setVersion(1.7f); } /** * {@inheritDoc} */ - @Override - protected void tearDown() throws Exception + @After + public void tearDown() throws Exception { document.close(); } @@ -130,16 +155,19 @@ protected void tearDown() throws Exception * * @throws Exception If there is an unexpected error during the test. */ + @Test public void testProtectionError() throws Exception { PublicKeyProtectionPolicy policy = new PublicKeyProtectionPolicy(); policy.addRecipient(recipient1); + policy.setEncryptionKeyLength(keyLength); document.protect(policy); PDDocument encryptedDoc = null; try { - encryptedDoc = reload(document, password2, getKeyStore(keyStore2)); + File file = save("testProtectionError"); + encryptedDoc = reload(file, password2, getKeyStore(keyStore2)); Assert.assertTrue(encryptedDoc.isEncrypted()); fail("No exception when using an incorrect decryption key"); } @@ -165,19 +193,21 @@ public void testProtectionError() throws Exception * * @throws Exception If there is an unexpected error during the test. */ + @Test public void testProtection() throws Exception { PublicKeyProtectionPolicy policy = new PublicKeyProtectionPolicy(); policy.addRecipient(recipient1); + policy.setEncryptionKeyLength(keyLength); document.protect(policy); - PDDocument encryptedDoc = reload(document, password1, getKeyStore(keyStore1)); + File file = save("testProtection"); + PDDocument encryptedDoc = reload(file, password1, getKeyStore(keyStore1)); try { Assert.assertTrue(encryptedDoc.isEncrypted()); - AccessPermission permission = - encryptedDoc.getCurrentAccessPermission(); + AccessPermission permission = encryptedDoc.getCurrentAccessPermission(); Assert.assertFalse(permission.canAssembleDocument()); Assert.assertFalse(permission.canExtractContent()); Assert.assertTrue(permission.canExtractForAccessibility()); @@ -199,19 +229,21 @@ public void testProtection() throws Exception * * @throws Exception If there is an error during the test. */ + @Test public void testMultipleRecipients() throws Exception { PublicKeyProtectionPolicy policy = new PublicKeyProtectionPolicy(); policy.addRecipient(recipient1); policy.addRecipient(recipient2); + policy.setEncryptionKeyLength(keyLength); document.protect(policy); // open first time - PDDocument encryptedDoc1 = reload(document, password1, getKeyStore(keyStore1)); + File file = save("testMultipleRecipients"); + PDDocument encryptedDoc1 = reload(file, password1, getKeyStore(keyStore1)); try { - AccessPermission permission = - encryptedDoc1.getCurrentAccessPermission(); + AccessPermission permission = encryptedDoc1.getCurrentAccessPermission(); Assert.assertFalse(permission.canAssembleDocument()); Assert.assertFalse(permission.canExtractContent()); Assert.assertTrue(permission.canExtractForAccessibility()); @@ -227,11 +259,10 @@ public void testMultipleRecipients() throws Exception } // open second time - PDDocument encryptedDoc2 = reload(document, password2, getKeyStore(keyStore2)); + PDDocument encryptedDoc2 = reload(file, password2, getKeyStore(keyStore2)); try { - AccessPermission permission = - encryptedDoc2.getCurrentAccessPermission(); + AccessPermission permission = encryptedDoc2.getCurrentAccessPermission(); Assert.assertFalse(permission.canAssembleDocument()); Assert.assertFalse(permission.canExtractContent()); Assert.assertTrue(permission.canExtractForAccessibility()); @@ -248,22 +279,26 @@ public void testMultipleRecipients() throws Exception } /** - * Reloads the given document by writing it to a temporary byte array - * and loading a fresh document from that byte array. + * Reloads the given document from a file and check some contents. * - * @param doc input document + * @param file input file * @param decryptionPassword password to be used to decrypt the doc * @param keyStore password to be used to decrypt the doc * @return reloaded document * @throws Exception if */ - private PDDocument reload(PDDocument doc, String decryptionPassword, InputStream keyStore) + private PDDocument reload(File file, String decryptionPassword, InputStream keyStore) throws IOException, NoSuchAlgorithmException { - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - doc.save(buffer); - return PDDocument.load(new ByteArrayInputStream(buffer.toByteArray()), decryptionPassword, + PDDocument doc2 = PDDocument.load(file, decryptionPassword, keyStore, null, MemoryUsageSetting.setupMainMemoryOnly()); + Assert.assertEquals("Extracted text is different", + text, + new PDFTextStripper().getText(doc2)); + Assert.assertEquals("Producer is different", + producer, + doc2.getDocumentInformation().getProducer()); + return doc2; } /** @@ -283,8 +318,7 @@ private PublicKeyRecipient getRecipient(String certificate, AccessPermission per CertificateFactory factory = CertificateFactory.getInstance("X.509"); PublicKeyRecipient recipient = new PublicKeyRecipient(); recipient.setPermission(permission); - recipient.setX509( - (X509Certificate) factory.generateCertificate(input)); + recipient.setX509((X509Certificate) factory.generateCertificate(input)); return recipient; } finally @@ -297,4 +331,39 @@ private InputStream getKeyStore(String name) { return TestPublicKeyEncryption.class.getResourceAsStream(name); } -} + + private File save(String name) throws IOException + { + File file = new File(testResultsDir, name + "-" + keyLength + "bit.pdf"); + document.save(file); + return file; + } + + @Test + public void testReadPubkeyEncryptedAES128() throws IOException + { + InputStream is = TestPublicKeyEncryption.class.getResourceAsStream("AESkeylength128.pdf"); + PDDocument doc = PDDocument.load(is, + "w!z%C*F-JaNdRgUk", + TestPublicKeyEncryption.class.getResourceAsStream("PDFBOX-4421-keystore.pfx"), + "testnutzer"); + PDFTextStripper stripper = new PDFTextStripper(); + Assert.assertEquals("Key length: 128", stripper.getText(doc).trim()); + is.close(); + doc.close(); + } + + @Test + public void testReadPubkeyEncryptedAES256() throws IOException + { + InputStream is = TestPublicKeyEncryption.class.getResourceAsStream("AESkeylength256.pdf"); + PDDocument doc = PDDocument.load(is, + "w!z%C*F-JaNdRgUk", + TestPublicKeyEncryption.class.getResourceAsStream("PDFBOX-4421-keystore.pfx"), + "testnutzer"); + PDFTextStripper stripper = new PDFTextStripper(); + Assert.assertEquals("Key length: 256", stripper.getText(doc).trim()); + is.close(); + doc.close(); + } +} \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestSymmetricKeyEncryption.java b/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestSymmetricKeyEncryption.java index 9733400ee2e..06652a3aed2 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestSymmetricKeyEncryption.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestSymmetricKeyEncryption.java @@ -18,7 +18,6 @@ import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -31,17 +30,23 @@ import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; +import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; +import org.apache.pdfbox.pdmodel.encryption.PDEncryption; import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy; +import org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler; import org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage; import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.util.Charsets; import org.junit.Assert; /** @@ -100,7 +105,7 @@ protected void setUp() throws Exception * Test that permissions work as intended: the user psw ("user") is enough * to open the PDF with possibly restricted rights, the owner psw ("owner") * gives full permissions. The 3 files of this test were created by Maruan - * Sayhoun, NOT with PDFBox, but with Adobe Acrobat to ensure "the gold + * Sahyoun, NOT with PDFBox, but with Adobe Acrobat to ensure "the gold * standard". The restricted permissions prevent printing and text * extraction. In the 128 and 256 bit encrypted files, AssembleDocument, * ExtractForAccessibility and PrintDegraded are also disabled. @@ -160,15 +165,16 @@ public void testPermissions() throws Exception private void checkPerms(byte[] inputFileAsByteArray, String password, AccessPermission expectedPermissions) throws IOException { - PDDocument doc = PDDocument.load( - new ByteArrayInputStream(inputFileAsByteArray), - password); + PDDocument doc = PDDocument.load(inputFileAsByteArray, password); - AccessPermission currentAccessPermission = doc.getCurrentAccessPermission(); + AccessPermission currentAccessPermission = doc.getCurrentAccessPermission(); // check permissions assertEquals(expectedPermissions.isOwnerPermission(), currentAccessPermission.isOwnerPermission()); - assertEquals(expectedPermissions.isReadOnly(), currentAccessPermission.isReadOnly()); + if (!expectedPermissions.isOwnerPermission()) + { + assertEquals(true, currentAccessPermission.isReadOnly()); + } assertEquals(expectedPermissions.canAssembleDocument(), currentAccessPermission.canAssembleDocument()); assertEquals(expectedPermissions.canExtractContent(), currentAccessPermission.canExtractContent()); assertEquals(expectedPermissions.canExtractForAccessibility(), currentAccessPermission.canExtractForAccessibility()); @@ -184,8 +190,7 @@ private void checkPerms(byte[] inputFileAsByteArray, String password, } /** - * Protect a document with a key and try to reopen it with that key and - * compare. + * Protect a document with a key and try to reopen it with that key and compare. * * @throws Exception If there is an unexpected error during the test. */ @@ -194,13 +199,38 @@ public void testProtection() throws Exception byte[] inputFileAsByteArray = getFileResourceAsByteArray("Acroform-PDFBOX-2333.pdf"); int sizePriorToEncryption = inputFileAsByteArray.length; - testSymmEncrForKeySize(40, sizePriorToEncryption, inputFileAsByteArray, + testSymmEncrForKeySize(40, false, sizePriorToEncryption, inputFileAsByteArray, + USERPASSWORD, OWNERPASSWORD, permission); + + testSymmEncrForKeySize(128, false, sizePriorToEncryption, inputFileAsByteArray, + USERPASSWORD, OWNERPASSWORD, permission); + + testSymmEncrForKeySize(128, true, sizePriorToEncryption, inputFileAsByteArray, USERPASSWORD, OWNERPASSWORD, permission); - testSymmEncrForKeySize(128, sizePriorToEncryption, inputFileAsByteArray, + testSymmEncrForKeySize(256, true, sizePriorToEncryption, inputFileAsByteArray, USERPASSWORD, OWNERPASSWORD, permission); + } + + /** + * PDFBOX-4308: test that index colorspace table string doesn't get + * corrupted when encrypting. This happened because the colorspace was + * referenced twice, once in the resources dictionary and once in an image + * in the resources dictionary, and when saving the PDF the string was saved + * twice, once as a direct object and once as an indirect object (both from + * the same java object). Encryption used the wrong object number and/or the + * object was encrypted twice. + * + * @throws IOException + */ + public void testPDFBox4308() throws IOException + { + InputStream is = new FileInputStream("target/pdfs/PDFBOX-4308.pdf"); + byte[] inputFileAsByteArray = IOUtils.toByteArray(is); + is.close(); + int sizePriorToEncryption = inputFileAsByteArray.length; - testSymmEncrForKeySize(256, sizePriorToEncryption, inputFileAsByteArray, + testSymmEncrForKeySize(40, false, sizePriorToEncryption, inputFileAsByteArray, USERPASSWORD, OWNERPASSWORD, permission); } @@ -220,22 +250,67 @@ public void testProtectionInnerAttachment() throws Exception File extractedEmbeddedFile = extractEmbeddedFile(new ByteArrayInputStream(inputFileWithEmbeddedFileAsByteArray), "innerFile.pdf"); - testSymmEncrForKeySizeInner(40, sizeOfFileWithEmbeddedFile, + testSymmEncrForKeySizeInner(40, false, sizeOfFileWithEmbeddedFile, + inputFileWithEmbeddedFileAsByteArray, extractedEmbeddedFile, USERPASSWORD, OWNERPASSWORD); + + testSymmEncrForKeySizeInner(128, false, sizeOfFileWithEmbeddedFile, inputFileWithEmbeddedFileAsByteArray, extractedEmbeddedFile, USERPASSWORD, OWNERPASSWORD); - testSymmEncrForKeySizeInner(128, sizeOfFileWithEmbeddedFile, + testSymmEncrForKeySizeInner(128, true, sizeOfFileWithEmbeddedFile, inputFileWithEmbeddedFileAsByteArray, extractedEmbeddedFile, USERPASSWORD, OWNERPASSWORD); - testSymmEncrForKeySizeInner(256, sizeOfFileWithEmbeddedFile, + testSymmEncrForKeySizeInner(256, true, sizeOfFileWithEmbeddedFile, inputFileWithEmbeddedFileAsByteArray, extractedEmbeddedFile, USERPASSWORD, OWNERPASSWORD); } - private void testSymmEncrForKeySize(int keyLength, + /** + * PDFBOX-4453: verify that identical encrypted strings are really decrypted each. + * + * @throws IOException + */ + public void testPDFBox4453() throws IOException + { + final int TESTCOUNT = 1000; + File file = new File(testResultsDir,"PDFBOX-4453.pdf"); + PDDocument doc = new PDDocument(); + doc.addPage(new PDPage()); + for (int i = 0; i < TESTCOUNT; ++i) + { + // strings must be in different dictionaries so that the actual + // encryption key changes + COSDictionary dict = new COSDictionary(); + doc.getPage(0).getCOSObject().setItem(COSName.getPDFName("_Test-" + i), dict); + // need two different keys so that there are both encrypted and decrypted COSStrings + // with value "0" + dict.setString("key1", "3"); + dict.setString("key2", "0"); + } + + //RC4-40 + StandardProtectionPolicy spp = new StandardProtectionPolicy("12345", "",new AccessPermission()); + spp.setEncryptionKeyLength(40); + spp.setPreferAES(false); + doc.protect(spp); + doc.save(file); + doc.close(); + + doc = PDDocument.load(file); + Assert.assertTrue(doc.isEncrypted()); + for (int i = 0; i < TESTCOUNT; ++i) + { + COSDictionary dict = doc.getPage(0).getCOSObject().getCOSDictionary(COSName.getPDFName("_Test-" + i)); + Assert.assertEquals("3", dict.getString("key1")); + Assert.assertEquals("0", dict.getString("key2")); + } + doc.close(); + } + + private void testSymmEncrForKeySize(int keyLength, boolean preferAES, int sizePriorToEncr, byte[] inputFileAsByteArray, String userpassword, String ownerpassword, AccessPermission permission) throws IOException { - PDDocument document = PDDocument.load(new ByteArrayInputStream(inputFileAsByteArray)); + PDDocument document = PDDocument.load(inputFileAsByteArray); String prefix = "Simple-"; int numSrcPages = document.getNumberOfPages(); PDFRenderer pdfRenderer = new PDFRenderer(document); @@ -250,7 +325,7 @@ private void testSymmEncrForKeySize(int keyLength, srcContentStreamTab.add(bytes); } - PDDocument encryptedDoc = encrypt(keyLength, sizePriorToEncr, document, + PDDocument encryptedDoc = encrypt(keyLength, preferAES, sizePriorToEncr, document, prefix, permission, userpassword, ownerpassword); Assert.assertEquals(numSrcPages, encryptedDoc.getNumberOfPages()); @@ -270,7 +345,7 @@ private void testSymmEncrForKeySize(int keyLength, bytes); } - File pdfFile = new File(testResultsDir, prefix + keyLength + "-bit-decrypted.pdf"); + File pdfFile = new File(testResultsDir, prefix + keyLength + "-bit-" + (preferAES ? "AES" : "RC4") + "-decrypted.pdf"); encryptedDoc.setAllSecurityToBeRemoved(true); encryptedDoc.save(pdfFile); encryptedDoc.close(); @@ -278,38 +353,52 @@ private void testSymmEncrForKeySize(int keyLength, // encrypt with keylength and permission, save, check sizes before and after encryption // reopen, decrypt and return document - private PDDocument encrypt(int keyLength, int sizePriorToEncr, + private PDDocument encrypt(int keyLength, boolean preferAES, int sizePriorToEncr, PDDocument doc, String prefix, AccessPermission permission, String userpassword, String ownerpassword) throws IOException { - AccessPermission ap = new AccessPermission(); - StandardProtectionPolicy spp = new StandardProtectionPolicy(ownerpassword, userpassword, ap); + StandardProtectionPolicy spp = new StandardProtectionPolicy(ownerpassword, userpassword, + permission); spp.setEncryptionKeyLength(keyLength); - spp.setPermissions(permission); + spp.setPreferAES(preferAES); // This must have no effect and should only log a warning. doc.setAllSecurityToBeRemoved(true); doc.protect(spp); - File pdfFile = new File(testResultsDir, prefix + keyLength + "-bit-encrypted.pdf"); + File pdfFile = new File(testResultsDir, prefix + keyLength + "-bit-" + (preferAES ? "AES" : "RC4") + "-encrypted.pdf"); doc.save(pdfFile); doc.close(); long sizeEncrypted = pdfFile.length(); - Assert.assertTrue(keyLength - + "-bit encrypted pdf should not have same size as plain one", - sizeEncrypted != sizePriorToEncr); - - PDDocument encryptedDoc; + Assert.assertNotEquals(keyLength + + "-bit " + (preferAES ? "AES" : "RC4") + " encrypted pdf should not have same size as plain one", + sizeEncrypted, sizePriorToEncr); // test with owner password => full permissions - encryptedDoc = PDDocument.load(pdfFile, ownerpassword); + PDDocument encryptedDoc = PDDocument.load(pdfFile, ownerpassword); Assert.assertTrue(encryptedDoc.isEncrypted()); Assert.assertTrue(encryptedDoc.getCurrentAccessPermission().isOwnerPermission()); + + // Older encryption allows to get the user password when the owner password is known + PDEncryption encryption = encryptedDoc.getEncryption(); + int revision = encryption.getRevision(); + if (revision < 5) + { + StandardSecurityHandler standardSecurityHandler = new StandardSecurityHandler(); + int keyLengthInBytes = encryption.getVersion() == 1 ? 5 : encryption.getLength() / 8; + byte[] computedUserPassword = standardSecurityHandler.getUserPassword( + ownerpassword.getBytes(Charsets.ISO_8859_1), + encryption.getOwnerKey(), + revision, + keyLengthInBytes); + Assert.assertEquals(userpassword.substring(0, 32), new String(computedUserPassword, Charsets.ISO_8859_1)); + } + encryptedDoc.close(); - // test with owner password => restricted permissions + // test with user password => restricted permissions encryptedDoc = PDDocument.load(pdfFile, userpassword); Assert.assertTrue(encryptedDoc.isEncrypted()); Assert.assertFalse(encryptedDoc.getCurrentAccessPermission().isOwnerPermission()); @@ -347,21 +436,21 @@ private File extractEmbeddedFile(InputStream pdfInputStream, String name) throws return resultFile; } - private void testSymmEncrForKeySizeInner(int keyLength, + private void testSymmEncrForKeySizeInner(int keyLength, boolean preferAES, int sizePriorToEncr, byte[] inputFileWithEmbeddedFileAsByteArray, File embeddedFilePriorToEncryption, String userpassword, String ownerpassword) throws IOException { - PDDocument document = PDDocument.load(new ByteArrayInputStream(inputFileWithEmbeddedFileAsByteArray)); - PDDocument encryptedDoc = encrypt(keyLength, sizePriorToEncr, document, "ContainsEmbedded-", permission, userpassword, ownerpassword); + PDDocument document = PDDocument.load(inputFileWithEmbeddedFileAsByteArray); + PDDocument encryptedDoc = encrypt(keyLength, preferAES, sizePriorToEncr, document, "ContainsEmbedded-", permission, userpassword, ownerpassword); - File decryptedFile = new File(testResultsDir, "DecryptedContainsEmbedded-" + keyLength + "-bit.pdf"); + File decryptedFile = new File(testResultsDir, "DecryptedContainsEmbedded-" + keyLength + "-bit-" + (preferAES ? "AES" : "RC4") + ".pdf"); encryptedDoc.setAllSecurityToBeRemoved(true); encryptedDoc.save(decryptedFile); - File extractedEmbeddedFile = extractEmbeddedFile(new FileInputStream(decryptedFile), "decryptedInnerFile-" + keyLength + "-bit.pdf"); + File extractedEmbeddedFile = extractEmbeddedFile(new FileInputStream(decryptedFile), "decryptedInnerFile-" + keyLength + "-bit-" + (preferAES ? "AES" : "RC4") + ".pdf"); - Assert.assertEquals(keyLength + "-bit decrypted inner attachment pdf should have same size as plain one", + Assert.assertEquals(keyLength + "-bit " + (preferAES ? "AES" : "RC4") + " decrypted inner attachment pdf should have same size as plain one", embeddedFilePriorToEncryption.length(), extractedEmbeddedFile.length()); // compare the two embedded files @@ -371,22 +460,13 @@ private void testSymmEncrForKeySizeInner(int keyLength, encryptedDoc.close(); } - private byte[] getStreamAsByteArray(InputStream is) throws IOException - { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - IOUtils.copy(is, baos); - is.close(); - return baos.toByteArray(); - } - private byte[] getFileResourceAsByteArray(String testFileName) throws IOException { - return getStreamAsByteArray(TestSymmetricKeyEncryption.class.getResourceAsStream(testFileName)); + return IOUtils.toByteArray(TestSymmetricKeyEncryption.class.getResourceAsStream(testFileName)); } private byte[] getFileAsByteArray(File f) throws IOException { - return getStreamAsByteArray(new FileInputStream(f)); + return IOUtils.toByteArray(new FileInputStream(f)); } - } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/encryption/package.html b/pdfbox/src/test/java/org/apache/pdfbox/encryption/package.html index d8141689320..ef137959d85 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/encryption/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/encryption/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/filter/TestFilters.java b/pdfbox/src/test/java/org/apache/pdfbox/filter/TestFilters.java index 9580e00cda1..a4f25e69974 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/filter/TestFilters.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/filter/TestFilters.java @@ -18,8 +18,8 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.util.Arrays; import java.util.Random; @@ -27,6 +27,8 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; /** * This will test all of the filters in the PDFBox system. @@ -115,27 +117,30 @@ public void testFilters() throws IOException } } + /** + * This will test the use of identity filter to decode stream and string. + * This test threw an IOException before the correction. + * + * @throws IOException + */ + public void testPDFBOX4517() throws IOException + { + PDDocument.load(new File("target/pdfs/PDFBOX-4517-cryptfilter.pdf"), + "userpassword1234"); + } /** - * This will test the LZW filter with the sequence that failed in PDFBOX-1777. + * This will test the LZW filter with the sequence that failed in PDFBOX-1977. * To check that the test itself is legit, revert LZWFilter.java to rev 1571801, * which should fail this test. * * @throws IOException */ - public void testPDFBOX1777() throws IOException + public void testPDFBOX1977() throws IOException { Filter lzwFilter = FilterFactory.INSTANCE.getFilter(COSName.LZW_DECODE); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - InputStream is = this.getClass().getResourceAsStream("PDFBOX-1777.bin"); - int by; - while ((by = is.read()) != -1) - { - baos.write(by); - } - is.close(); - - checkEncodeDecode(lzwFilter, baos.toByteArray()); + byte[] byteArray = IOUtils.toByteArray(this.getClass().getResourceAsStream("PDFBOX-1977.bin")); + checkEncodeDecode(lzwFilter, byteArray); } private void checkEncodeDecode(Filter filter, byte[] original) throws IOException diff --git a/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/AbstractCCITTFaxTestCase.java b/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/AbstractCCITTFaxTestCase.java deleted file mode 100644 index 94614f14c93..00000000000 --- a/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/AbstractCCITTFaxTestCase.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* $Id$ */ - -package org.apache.pdfbox.filter.ccitt; - -import junit.framework.TestCase; - -/** - * Abstract base class for testing CCITT fax encoding. - */ -public abstract class AbstractCCITTFaxTestCase extends TestCase -{ - - /** - * Visualizes a packed bitmap and dumps it on System.out. - * @param data the bitmap - * @param columns the number of columns - */ - protected void dumpBitmap(byte[] data, int columns) - { - int lineBytes = columns / 8; - if (columns % 8 != 0) - { - lineBytes++; - } - int lines = data.length / lineBytes; - for (int y = 0; y < lines; y++) - { - int start = y * lineBytes; - for (int x = 0; x < columns; x++) - { - int index = start + (x / 8); - int mask = 1 << (7 - (x % 8)); - int value = data[index] & mask; - System.out.print(value != 0 ? 'X' : '_'); - } - System.out.println(); - } - } - - /** - * Converts a series of bytes to a "binary" String of 0s and 1s. - * @param data the data - * @return the binary string - */ - protected String toBitString(byte[] data) - { - return PackedBitArray.toBitString(data); - } - -} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/TestCCITTFaxG31DDecodeInputStream.java b/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/TestCCITTFaxG31DDecodeInputStream.java deleted file mode 100644 index 509084ee23b..00000000000 --- a/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/TestCCITTFaxG31DDecodeInputStream.java +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* $Id$ */ - -package org.apache.pdfbox.filter.ccitt; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import org.apache.pdfbox.io.IOUtils; - -/** - * Tests the CCITT Fax G3 1D decoder. - */ -public class TestCCITTFaxG31DDecodeInputStream extends AbstractCCITTFaxTestCase -{ - - private static final boolean DEBUG = false; - - private static final String EOL = "000000000001"; - private static final String RTC = EOL + EOL + EOL + EOL + EOL + EOL; - - /** - * Tests the decoder with naked bits (no EOL, no alignment, nothing). - * @throws IOException if an I/O error occurs - */ - public void testDecoderNaked() throws IOException - { - //Test data: 24x3 pixels encoded - byte[] data = fromBinary("10011" + "000101" + "10011" - + "00110101" + "011" + "10011" + "0000111" - + "00110101" + "010" + "000111" + "010" + "0010111" + "000000"); - assertStandardDecodingResult(data); - } - - /** - * Tests the decoder with EOLs. - * @throws IOException if an I/O error occurs - */ - public void testDecoderWithEOL() throws IOException - { - //Test data: 24x3 pixels encoded - byte[] data = fromBinary("10011" + "000101" + "10011" + EOL - + "00110101" + "011" + "10011" + "0000111" + EOL - + "00110101" + "010" + "000111" + "010" + "0010111" + "000000" + EOL); - assertStandardDecodingResult(data); - } - - /** - * Tests the decoder with RTC and byte alignment. - * @throws IOException if an I/O error occurs - */ - public void testDecoderAlignedWithRTC() throws IOException - { - //Test data: 24x3 pixels encoded - byte[] data = fromBinary("1001100010110011" + EOL - + "00110101011100110000111" + "0" + EOL - + "001101010100001110100010111000000" + "00000" + RTC); - assertStandardDecodingResult(data); - } - - /** - * Tests the decoder with an initial EOL. - * @throws IOException if an I/O error occurs - */ - public void testDecoderInitialEOL() throws IOException - { - //Test data: 24x3 pixels encoded - byte[] data = fromBinary("000" + EOL + "1001100010110011" + EOL - + "00110101011100110000111" + EOL - + "001101010100001110100010111000000"); - assertStandardDecodingResult(data); - } - - private void assertStandardDecodingResult(byte[] data) throws IOException - { - int columns = 24; - - byte[] decoded = decode(data, columns); - - if (DEBUG) - { - dumpBitmap(decoded, columns); - System.out.println(PackedBitArray.toBitString(decoded)); - } - - assertEquals(9, decoded.length); - assertEquals("000000001111111100000000" - + "111100000000111111111111" - + "101000000000000000000000", toBitString(decoded)); - } - - /** - * Tests the decoder with a restriction in the number of rows. - * @throws IOException if an I/O error occurs - */ - public void testDecoderRowsRestriction() throws IOException - { - //Test data: 24x3 pixels encoded - byte[] data = fromBinary("10011" + "000101" + "10011" - + "00110101" + "011" + "10011" + "0000111" - + "00110101" + "010" + "000111" + "010" + "0010111" + "000000"); - int columns = 24; - int rows = 2; //We actually have data for three rows. Just checking the restriction. - - CCITTFaxG31DDecodeInputStream decoder = new CCITTFaxG31DDecodeInputStream( - new ByteArrayInputStream(data), columns, rows, false); - byte[] decoded = IOUtils.toByteArray(decoder); - decoder.close(); - - if (DEBUG) - { - dumpBitmap(decoded, columns); - System.out.println(PackedBitArray.toBitString(decoded)); - } - - assertEquals(6, decoded.length); - assertEquals("000000001111111100000000" - + "111100000000111111111111", toBitString(decoded)); - } - - /** - * Tests the decoder with white lines. - * @throws IOException if an I/O error occurs - */ - public void testDecoderWhiteLines() throws IOException - { - //Test data: 1728x3 pixels encoded (all white) - byte[] data = fromBinary(EOL + "010011011" + "00110101" //EOL + w1728 (make-up) + w0 - + EOL + "010011011" + "00110101" - + EOL + "010011011" + "00110101" + RTC); - int columns = 1728; - - byte[] decoded = decode(data, columns); - - if (DEBUG) - { - dumpBitmap(decoded, columns); - } - - assertEquals(columns * 3 / 8, decoded.length); - } - - /** - * Decodes a byte buffer. - * @param data the data - * @param columns the number of columns - * @return the decoded bits/pixels - * @throws IOException if an I/O error occurs - */ - public static byte[] decode(byte[] data, int columns) throws IOException - { - CCITTFaxG31DDecodeInputStream decoder = new CCITTFaxG31DDecodeInputStream( - new ByteArrayInputStream(data), columns, false); - byte[] decoded = IOUtils.toByteArray(decoder); - decoder.close(); - return decoded; - } - - private byte[] fromBinary(String binary) - { - ByteArrayOutputStream baout = new ByteArrayOutputStream(); - int pos = 0; - while (pos < binary.length() - 8) - { - int v = Integer.parseInt(binary.substring(pos, pos + 8), 2); - baout.write(v & 0xFF); - pos += 8; - } - int rest = binary.length() - pos; - if (rest > 0) - { - String f = binary.substring(pos) + "00000000".substring(rest); - baout.write(Integer.parseInt(f, 2)); - } - return baout.toByteArray(); - } - -} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/TestPackedBitArray.java b/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/TestPackedBitArray.java deleted file mode 100644 index 5265bf67246..00000000000 --- a/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/TestPackedBitArray.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* $Id$ */ - -package org.apache.pdfbox.filter.ccitt; - -import junit.framework.TestCase; - -/** - * This is a unit test for {@link PackedBitArray}. - */ -public class TestPackedBitArray extends TestCase -{ - - /** - * Tests the {@link PackedBitArray} class. - */ - public void testPackedBitArray() - { - PackedBitArray bits = new PackedBitArray(19); - assertEquals(19, bits.getBitCount()); - assertEquals(3, bits.getByteCount()); - assertEquals("0000000000000000000", bits.toString()); - - bits.set(1); - assertEquals("0100000000000000000", bits.toString()); - - bits.clear(1); - assertEquals("0000000000000000000", bits.toString()); - - bits.setBits(4, 4); - assertEquals("0000111100000000000", bits.toString()); - - bits.setBits(2, 1); - assertEquals("0010111100000000000", bits.toString()); - - bits.setBits(9, 9, 1); - assertEquals("0010111101111111110", bits.toString()); - - bits.clearBits(15, 2); - assertEquals("0010111101111110010", bits.toString()); - - bits.setBits(9, 9, 0); - assertEquals("0010111100000000000", bits.toString()); - - bits.clear(); - assertEquals("0000000000000000000", bits.toString()); - - bits.setBits(1, 18); - assertEquals("0111111111111111111", bits.toString()); - - bits.clearBits(3, 1); - assertEquals("0110111111111111111", bits.toString()); - - try - { - bits.setBits(1, 19); - fail("Expecting IndexOutOfBoundsException"); - } - catch (IndexOutOfBoundsException e) - { - //good - } - } - -} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/package.html b/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/package.html deleted file mode 100644 index 975173e46e1..00000000000 --- a/pdfbox/src/test/java/org/apache/pdfbox/filter/ccitt/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -This package contains classes testing the CCITT encoders and decoders. - - diff --git a/pdfbox/src/test/java/org/apache/pdfbox/filter/package.html b/pdfbox/src/test/java/org/apache/pdfbox/filter/package.html index 13aa34d17f7..42247d09ace 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/filter/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/filter/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/io/ScratchFileBufferTest.java b/pdfbox/src/test/java/org/apache/pdfbox/io/ScratchFileBufferTest.java new file mode 100644 index 00000000000..6964f4df182 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/io/ScratchFileBufferTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.io; + +import java.io.IOException; +import org.junit.Assert; + +import org.junit.Test; + +/** + * Regression test to check the known bugs in {@link ScratchFileBuffer}. + * + * @author Kühn & Weyh Software GmbH + */ +public class ScratchFileBufferTest +{ + + private static final int PAGE_SIZE = 4096; + private static final int NUM_ITERATIONS = 3; + + /** + * PDFBOX-4756: test positions are correct when seeking and that no EOFException is thrown in + * ScratchFileBuffer.seek() beyond last page. + * + * @throws IOException + */ + @Test + public void testEOFBugInSeek() throws IOException + { + ScratchFile scratchFile = new ScratchFile(MemoryUsageSetting.setupTempFileOnly()); + ScratchFileBuffer scratchFileBuffer = new ScratchFileBuffer(scratchFile); + byte[] bytes = new byte[PAGE_SIZE]; + for (int i = 0; i < NUM_ITERATIONS; i++) + { + long p0 = scratchFileBuffer.getPosition(); + scratchFileBuffer.write(bytes); + long p1 = scratchFileBuffer.getPosition(); + Assert.assertEquals(PAGE_SIZE, p1 - p0); + scratchFileBuffer.write(bytes); + long p2 = scratchFileBuffer.getPosition(); + Assert.assertEquals(PAGE_SIZE, p2 - p1); + scratchFileBuffer.seek(0); + scratchFileBuffer.seek(i * 2 * PAGE_SIZE); + } + scratchFile.close(); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java b/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java index 273480559a4..9dd0a0929be 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/io/TestRandomAccessBuffer.java @@ -18,8 +18,6 @@ package org.apache.pdfbox.io; import java.io.IOException; -import java.nio.Buffer; -import java.util.Vector; import junit.framework.TestCase; @@ -34,7 +32,7 @@ public class TestRandomAccessBuffer extends TestCase /** * This test checks two corner cases where the last read ends - * exactly at the end of a chunck (remainingBytes == 0) + * exactly at the end of a chunk (remainingBytes == 0) * @throws IOException */ public void testRemainingByteZero() throws IOException diff --git a/pdfbox/src/test/java/org/apache/pdfbox/io/UnmodifiableCOSDictionaryTest.java b/pdfbox/src/test/java/org/apache/pdfbox/io/UnmodifiableCOSDictionaryTest.java new file mode 100644 index 00000000000..1164e5753e6 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/io/UnmodifiableCOSDictionaryTest.java @@ -0,0 +1,390 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.io; + +import static org.junit.Assert.fail; + +import java.util.Calendar; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.font.encoding.Encoding; +import org.junit.Test; + +public class UnmodifiableCOSDictionaryTest +{ + @Test + public void testUnmodifiableCOSDictionary() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.clear(); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.removeItem(COSName.A); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.addAll(new COSDictionary()); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.mergeInto(new COSDictionary()); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setFlag(COSName.A, 0, true); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setNeedToBeUpdated(true); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetItem() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setItem(COSName.A, COSName.A); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setItem(COSName.A, + Encoding.getInstance(COSName.STANDARD_ENCODING)); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setItem("A", COSName.A); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + try + { + unmodifiableCOSDictionary.setItem("A", Encoding.getInstance(COSName.STANDARD_ENCODING)); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetBoolean() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setBoolean(COSName.A, true); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setBoolean("A", true); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetName() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setName(COSName.A, "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setName("A", "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetDate() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setDate(COSName.A, Calendar.getInstance()); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setDate("A", Calendar.getInstance()); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetEmbeddedDate() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setEmbeddedDate("Embedded", COSName.A, + Calendar.getInstance()); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setEmbeddedDate("Embedded", "A", Calendar.getInstance()); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetString() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setString(COSName.A, "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setString("A", "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetEmbeddedString() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setEmbeddedString("Embedded", COSName.A, "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setEmbeddedString("Embedded", "A", "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetInt() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setInt(COSName.A, 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setInt("A", 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetEmbeddedInt() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setEmbeddedInt("Embedded", COSName.A, 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setEmbeddedInt("Embedded", "A", 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetLong() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setLong(COSName.A, 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setLong("A", 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + public void testSetFloat() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setFloat(COSName.A, 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setFloat("A", 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } +} \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/io/package.html b/pdfbox/src/test/java/org/apache/pdfbox/io/package.html index 5b58b11bda3..e9e67526a20 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/io/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/io/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/multipdf/MergeAcroFormsTest.java b/pdfbox/src/test/java/org/apache/pdfbox/multipdf/MergeAcroFormsTest.java new file mode 100644 index 00000000000..fbcdb62e4e4 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/multipdf/MergeAcroFormsTest.java @@ -0,0 +1,272 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.multipdf; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.multipdf.PDFMergerUtility.AcroFormMergeMode; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDField; +import org.junit.Before; +import org.junit.Test; + +/** + * Test merging different PDFs with AcroForms. + * + * + */ +public class MergeAcroFormsTest +{ + private static final File IN_DIR = new File("src/test/resources/org/apache/pdfbox/multipdf"); + private static final File OUT_DIR = new File("target/test-output/merge/"); + private static final File TARGET_PDF_DIR = new File("target/pdfs"); + + @Before + public void setUp() + { + OUT_DIR.mkdirs(); + } + + /* + * Test LegacyMode merge + */ + @Test + public void testLegacyModeMerge() throws IOException + { + PDFMergerUtility merger = new PDFMergerUtility(); + File toBeMerged = new File(IN_DIR,"AcroFormForMerge.pdf"); + File pdfOutput = new File(OUT_DIR,"PDFBoxLegacyMerge-SameMerged.pdf"); + merger.setDestinationFileName(pdfOutput.getAbsolutePath()); + merger.addSource(toBeMerged); + merger.addSource(toBeMerged); + merger.mergeDocuments(null); + merger.setAcroFormMergeMode(AcroFormMergeMode.PDFBOX_LEGACY_MODE); + + PDDocument compliantDocument = null; + PDDocument toBeCompared = null; + + + try + { + compliantDocument = PDDocument.load(new File(IN_DIR,"PDFBoxLegacyMerge-SameMerged.pdf")); + toBeCompared = PDDocument.load(new File(OUT_DIR,"PDFBoxLegacyMerge-SameMerged.pdf")); + + + PDAcroForm compliantAcroForm = compliantDocument.getDocumentCatalog().getAcroForm(); + PDAcroForm toBeComparedAcroForm = toBeCompared.getDocumentCatalog().getAcroForm(); + + assertEquals("There shall be the same number of root fields", + compliantAcroForm.getFields().size(), + toBeComparedAcroForm.getFields().size()); + + for (PDField compliantField : compliantAcroForm.getFieldTree()) + { + assertNotNull("There shall be a field with the same FQN", toBeComparedAcroForm.getField(compliantField.getFullyQualifiedName())); + PDField toBeComparedField = toBeComparedAcroForm.getField(compliantField.getFullyQualifiedName()); + compareFieldProperties(compliantField, toBeComparedField); + } + + for (PDField toBeComparedField : toBeComparedAcroForm.getFieldTree()) + { + assertNotNull("There shall be a field with the same FQN", compliantAcroForm.getField(toBeComparedField.getFullyQualifiedName())); + PDField compliantField = compliantAcroForm.getField(toBeComparedField.getFullyQualifiedName()); + compareFieldProperties(toBeComparedField, compliantField); + } + } + finally + { + IOUtils.closeQuietly(compliantDocument); + IOUtils.closeQuietly(toBeCompared); + } + } + + /* + * Test Join Field Merge for text fields only and same source documents + */ + @Test + public void testJoinFieldsMerge_TextFieldsOnly_SameMerged() throws IOException + { + PDFMergerUtility merger = new PDFMergerUtility(); + File toBeMerged = new File(IN_DIR,"AcroFormForMerge-TextFieldsOnly.pdf"); + File pdfOutput = new File(OUT_DIR,"PDFBoxJoinFieldsMerge-TextFieldsOnly-SameMerged.pdf"); + merger.setDestinationFileName(pdfOutput.getAbsolutePath()); + merger.addSource(toBeMerged); + merger.addSource(toBeMerged); + merger.setAcroFormMergeMode(AcroFormMergeMode.JOIN_FORM_FIELDS_MODE); + merger.mergeDocuments(null); + + PDDocument compliantDocument = null; + PDDocument toBeCompared = null; + + + try + { + compliantDocument = PDDocument.load(new File(IN_DIR,"AcrobatMerge-TextFieldsOnly-SameMerged.pdf")); + toBeCompared = PDDocument.load(new File(OUT_DIR,"PDFBoxJoinFieldsMerge-TextFieldsOnly-SameMerged.pdf")); + + + PDAcroForm compliantAcroForm = compliantDocument.getDocumentCatalog().getAcroForm(); + PDAcroForm toBeComparedAcroForm = toBeCompared.getDocumentCatalog().getAcroForm(); + + assertEquals("There shall be the same number of root fields", + compliantAcroForm.getFields().size(), + toBeComparedAcroForm.getFields().size()); + + for (PDField compliantField : compliantAcroForm.getFieldTree()) + { + assertNotNull("There shall be a field with the same FQN", toBeComparedAcroForm.getField(compliantField.getFullyQualifiedName())); + PDField toBeComparedField = toBeComparedAcroForm.getField(compliantField.getFullyQualifiedName()); + compareFieldProperties(compliantField, toBeComparedField); + } + + for (PDField toBeComparedField : toBeComparedAcroForm.getFieldTree()) + { + assertNotNull("There shall be a field with the same FQN", compliantAcroForm.getField(toBeComparedField.getFullyQualifiedName())); + PDField compliantField = compliantAcroForm.getField(toBeComparedField.getFullyQualifiedName()); + compareFieldProperties(toBeComparedField, compliantField); + } + } + finally + { + IOUtils.closeQuietly(compliantDocument); + IOUtils.closeQuietly(toBeCompared); + } + } + + + private void compareFieldProperties(PDField sourceField, PDField toBeComapredField) + { + // List of keys for comparison + // Don't include too complex properties such as AP as this will fail the test because + // of a stack overflow when + final String[] keys = {"FT", "T", "TU", "TM", "Ff", "V", "DV", "Opts", "TI", "I", "Rect", "DA", }; + + COSDictionary sourceFieldCos = sourceField.getCOSObject(); + COSDictionary toBeComparedCos = toBeComapredField.getCOSObject(); + + for (String key : keys) + { + COSBase sourceBase = sourceFieldCos.getDictionaryObject(key); + COSBase toBeComparedBase = toBeComparedCos.getDictionaryObject(key); + + if (sourceBase != null) + { + assertEquals("The content of the field properties shall be the same",sourceBase.toString(), toBeComparedBase.toString()); + } + else + { + assertNull("If the source property is null the compared property shall be null too", toBeComparedBase); + } + } + } + + /* + * PDFBOX-1031 Ensure that after merging the PDFs there is an Annots entry per page. + */ + @Test + public void testAnnotsEntry() throws IOException { + + InputStream s1 = null; + InputStream s2 = null; + // Merge the PDFs form PDFBOX-1031 + PDFMergerUtility merger = new PDFMergerUtility(); + try { + File f1 = new File(TARGET_PDF_DIR, "PDFBOX-1031-1.pdf"); + s1 = new FileInputStream(f1); + + File f2 = new File(TARGET_PDF_DIR, "PDFBOX-1031-2.pdf"); + s2 = new FileInputStream(f2); + + File pdfOutput = new File(OUT_DIR, "PDFBOX-1031.pdf"); + merger.setDestinationFileName(pdfOutput.getAbsolutePath()); + merger.addSource(s1); + merger.addSource(s2); + merger.mergeDocuments(null); + + // Test merge result + PDDocument mergedPDF = PDDocument.load(pdfOutput); + assertEquals("There shall be 2 pages", 2, mergedPDF.getNumberOfPages()); + + assertNotNull("There shall be an /Annots entry for the first page", mergedPDF.getPage(0).getCOSObject().getDictionaryObject(COSName.ANNOTS)); + assertEquals("There shall be 1 annotation for the first page", 1, mergedPDF.getPage(0).getAnnotations().size()); + + assertNotNull("There shall be an /Annots entry for the second page", mergedPDF.getPage(1).getCOSObject().getDictionaryObject(COSName.ANNOTS)); + assertEquals("There shall be 1 annotation for the second page", 1, mergedPDF.getPage(0).getAnnotations().size()); + + mergedPDF.close(); + } finally { + IOUtils.closeQuietly(s1); + IOUtils.closeQuietly(s2); + } + } + + /* + * PDFBOX-1100 Ensure that after merging the PDFs there is an AP and V entry. + */ + @Test + public void testAPEntry() throws IOException { + + InputStream is1 = null; + InputStream is2 = null; + // Merge the PDFs form PDFBOX-1100 + PDFMergerUtility merger = new PDFMergerUtility(); + + try { + File file1 = new File(TARGET_PDF_DIR, "PDFBOX-1100-1.pdf"); + is1 = new FileInputStream(file1); + + File file2 = new File(TARGET_PDF_DIR, "PDFBOX-1100-2.pdf"); + is2 = new FileInputStream(file2); + File pdfOutput = new File(OUT_DIR, "PDFBOX-1100.pdf"); + merger.setDestinationFileName(pdfOutput.getAbsolutePath()); + merger.addSource(is1); + merger.addSource(is2); + merger.mergeDocuments(null); + + // Test merge result + PDDocument mergedPDF = PDDocument.load(pdfOutput); + assertEquals("There shall be 2 pages", 2, mergedPDF.getNumberOfPages()); + + PDAcroForm acroForm = mergedPDF.getDocumentCatalog().getAcroForm(); + + PDField formField = acroForm.getField("Testfeld"); + assertNotNull("There shall be an /AP entry for the field", formField.getCOSObject().getDictionaryObject(COSName.AP)); + assertNotNull("There shall be a /V entry for the field", formField.getCOSObject().getDictionaryObject(COSName.V)); + + formField = acroForm.getField("Testfeld2"); + assertNotNull("There shall be an /AP entry for the field", formField.getCOSObject().getDictionaryObject(COSName.AP)); + assertNotNull("There shall be a /V entry for the field", formField.getCOSObject().getDictionaryObject(COSName.V)); + + mergedPDF.close(); + } finally { + IOUtils.closeQuietly(is1); + IOUtils.closeQuietly(is2); + } + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/multipdf/MergeAnnotationsTest.java b/pdfbox/src/test/java/org/apache/pdfbox/multipdf/MergeAnnotationsTest.java new file mode 100644 index 00000000000..c7ff5dde816 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/multipdf/MergeAnnotationsTest.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.multipdf; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentNameDestinationDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.junit.Before; +import org.junit.Test; + +/** + * Test merging different PDFs with Annotations. + */ +public class MergeAnnotationsTest +{ + private static final File OUT_DIR = new File("target/test-output/merge/"); + private static final File TARGET_PDF_DIR = new File("target/pdfs"); + + @Before + public void setUp() + { + OUT_DIR.mkdirs(); + } + + /* + * PDFBOX-1065 Ensure that after merging the PDFs there are all link + * annotations and they point to the correct page. + */ + @Test + public void testLinkAnnotations() throws IOException { + + // Merge the PDFs from PDFBOX-1065 + PDFMergerUtility merger = new PDFMergerUtility(); + InputStream is1 = null; + InputStream is2 = null; + + try { + File file1 = new File(TARGET_PDF_DIR, "PDFBOX-1065-1.pdf"); + is1 = new FileInputStream(file1); + + File file2 = new File(TARGET_PDF_DIR, "PDFBOX-1065-2.pdf"); + is2 = new FileInputStream(file2); + File pdfOutput = new File(OUT_DIR, "PDFBOX-1065.pdf"); + merger.setDestinationFileName(pdfOutput.getAbsolutePath()); + merger.addSource(is1); + merger.addSource(is2); + merger.mergeDocuments(null); + + // Test merge result + PDDocument mergedPDF = PDDocument.load(pdfOutput); + assertEquals("There shall be 6 pages", 6, mergedPDF.getNumberOfPages()); + + PDDocumentNameDestinationDictionary destinations = mergedPDF.getDocumentCatalog().getDests(); + + // Each document has 3 annotations with 2 entries in the /Dests dictionary per annotation. One for the + // source and one for the target. + assertEquals("There shall be 12 entries", 12, destinations.getCOSObject().entrySet().size()); + + List sourceAnnotations01 = mergedPDF.getPage(0).getAnnotations(); + List sourceAnnotations02 = mergedPDF.getPage(3).getAnnotations(); + + List targetAnnotations01 = mergedPDF.getPage(2).getAnnotations(); + List targetAnnotations02 = mergedPDF.getPage(5).getAnnotations(); + + // Test for the first set of annotations to be merged an linked correctly + assertEquals("There shall be 3 source annotations at the first page", 3, sourceAnnotations01.size()); + assertEquals("There shall be 3 source annotations at the third page", 3, targetAnnotations01.size()); + assertTrue("The annotations shall match to each other", testAnnotationsMatch(sourceAnnotations01, targetAnnotations01)); + + // Test for the second set of annotations to be merged an linked correctly + assertEquals("There shall be 3 source annotations at the first page", 3, sourceAnnotations02.size()); + assertEquals("There shall be 3 source annotations at the third page", 3, targetAnnotations02.size()); + assertTrue("The annotations shall match to each other", testAnnotationsMatch(sourceAnnotations02, targetAnnotations02)); + mergedPDF.close(); + } finally { + IOUtils.closeQuietly(is1); + IOUtils.closeQuietly(is2); + } + } + + /* + * Source and target annotations are línked by name with the target annotation's name + * being the source annotation's name prepended with 'annoRef_' + */ + private boolean testAnnotationsMatch(List sourceAnnots, List targetAnnots) + { + Map targetAnnotsByName = new HashMap(); + COSName destinationName; + + // fill the map with the annotations destination name + for (PDAnnotation targetAnnot : targetAnnots) + { + destinationName = (COSName) targetAnnot.getCOSObject().getDictionaryObject(COSName.DEST); + targetAnnotsByName.put(destinationName.getName(), targetAnnot); + } + + // try to lookup the target annotation for the source annotation by destination name + for (PDAnnotation sourceAnnot : sourceAnnots) + { + destinationName = (COSName) sourceAnnot.getCOSObject().getDictionaryObject(COSName.DEST); + if (targetAnnotsByName.get("annoRef_" + destinationName.getName()) == null) + { + return false; + } + } + return true; + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFCloneUtilityTest.java b/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFCloneUtilityTest.java index ba7d5e27e77..c55e0c270d9 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFCloneUtilityTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFCloneUtilityTest.java @@ -16,13 +16,20 @@ package org.apache.pdfbox.multipdf; import java.awt.Color; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import junit.framework.TestCase; +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.assertTrue; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties; /** * Test suite for PDFCloneUtility, see PDFBOX-2052. @@ -97,4 +104,29 @@ public void testClonePDFWithCosArrayStream2() throws IOException PDDocument.load(new File(TESTDIR + CLONEDST)).close(); PDDocument.load(new File(TESTDIR + CLONEDST), (String)null).close(); } -} + + /** + * PDFBOX-4814: this tests merging a direct and an indirect COSDictionary, when "target" is + * indirect in cloneMerge(). + * + * @throws IOException + */ + public void testDirectIndirect() throws IOException + { + PDDocument doc1 = new PDDocument(); + + doc1.addPage(new PDPage()); + doc1.getDocumentCatalog().setOCProperties(new PDOptionalContentProperties()); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + doc1.save(baos); + PDDocument doc2 = PDDocument.load(baos.toByteArray()); + PDFMergerUtility merger = new PDFMergerUtility(); + // The OCProperties is a direct object here, but gets saved as an indirect object. + assertTrue(doc1.getDocumentCatalog().getCOSObject().getItem(COSName.OCPROPERTIES) instanceof COSDictionary); + assertTrue(doc2.getDocumentCatalog().getCOSObject().getItem(COSName.OCPROPERTIES) instanceof COSObject); + merger.appendDocument(doc2, doc1); + assertEquals(2, doc2.getNumberOfPages()); + doc2.close(); + doc1.close(); + } +} \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java b/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java index f17170c6624..b0cc59790a0 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java @@ -16,14 +16,45 @@ package org.apache.pdfbox.multipdf; import java.awt.image.BufferedImage; +import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; import junit.framework.TestCase; +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.fail; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageTree; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.common.PDNameTreeNode; +import org.apache.pdfbox.pdmodel.common.PDNumberTreeNode; +import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureElement; +import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitDestination; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDField; import org.apache.pdfbox.rendering.PDFRenderer; +import org.junit.Assert; /** * Test suite for PDFMergerUtility. @@ -35,6 +66,7 @@ public class PDFMergerUtilityTest extends TestCase { final String SRCDIR = "src/test/resources/input/merge/"; final String TARGETTESTDIR = "target/test-output/merge/"; + private static final File TARGETPDFDIR = new File("target/pdfs"); final int DPI = 96; @Override @@ -111,6 +143,761 @@ public void testPDFMergerUtility2() throws IOException MemoryUsageSetting.setupTempFileOnly()); } + /** + * PDFBOX-3972: Test that OpenAction page destination isn't lost after merge. + * + * @throws IOException + */ + public void testPDFMergerOpenAction() throws IOException + { + PDDocument doc1 = new PDDocument(); + doc1.addPage(new PDPage()); + doc1.addPage(new PDPage()); + doc1.addPage(new PDPage()); + doc1.save(new File(TARGETTESTDIR,"MergerOpenActionTest1.pdf")); + doc1.close(); + + PDDocument doc2 = new PDDocument(); + doc2.addPage(new PDPage()); + doc2.addPage(new PDPage()); + doc2.addPage(new PDPage()); + PDPageDestination dest = new PDPageFitDestination(); + dest.setPage(doc2.getPage(1)); + doc2.getDocumentCatalog().setOpenAction(dest); + doc2.save(new File(TARGETTESTDIR,"MergerOpenActionTest2.pdf")); + doc2.close(); + + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + pdfMergerUtility.addSource(new File(TARGETTESTDIR, "MergerOpenActionTest1.pdf")); + pdfMergerUtility.addSource(new File(TARGETTESTDIR, "MergerOpenActionTest2.pdf")); + pdfMergerUtility.setDestinationFileName(TARGETTESTDIR + "MergerOpenActionTestResult.pdf"); + pdfMergerUtility.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly()); + + PDDocument mergedDoc = PDDocument.load(new File(TARGETTESTDIR, "MergerOpenActionTestResult.pdf")); + PDDocumentCatalog documentCatalog = mergedDoc.getDocumentCatalog(); + dest = (PDPageDestination) documentCatalog.getOpenAction(); + assertEquals(4, documentCatalog.getPages().indexOf(dest.getPage())); + mergedDoc.close(); + } + + /** + * PDFBOX-3999: check that page entries in the structure tree only reference pages from the page + * tree, i.e. that no orphan pages exist. + * + * @throws IOException + */ + public void testStructureTreeMerge() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3999-GeneralForbearance.pdf")); + + ElementCounter elementCounter = new ElementCounter(); + elementCounter.walk(src.getDocumentCatalog().getStructureTreeRoot().getK()); + int singleCnt = elementCounter.cnt; + int singleSetSize = elementCounter.set.size(); + assertEquals(134, singleCnt); + assertEquals(134, singleSetSize); + + PDDocument dst = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3999-GeneralForbearance.pdf")); + pdfMergerUtility.appendDocument(dst, src); + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-3999-GeneralForbearance-merged.pdf")); + dst.close(); + + PDDocument doc = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-3999-GeneralForbearance-merged.pdf")); + + // Assume that the merged tree has double element count + elementCounter = new ElementCounter(); + elementCounter.walk(doc.getDocumentCatalog().getStructureTreeRoot().getK()); + assertEquals(singleCnt * 2, elementCounter.cnt); + assertEquals(singleSetSize * 2, elementCounter.set.size()); + checkForPageOrphans(doc); + + doc.close(); + } + + /** + * PDFBOX-3999: check that no streams are kept from the source document by the destination + * document, despite orphan annotations remaining in the structure tree. + * + * @throws IOException + */ + public void testStructureTreeMerge2() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3999-GeneralForbearance.pdf")); + doc.getDocumentCatalog().getAcroForm().flatten(); + doc.save(new File(TARGETTESTDIR, "PDFBOX-3999-GeneralForbearance-flattened.pdf")); + + ElementCounter elementCounter = new ElementCounter(); + elementCounter.walk(doc.getDocumentCatalog().getStructureTreeRoot().getK()); + int singleCnt = elementCounter.cnt; + int singleSetSize = elementCounter.set.size(); + assertEquals(134, singleCnt); + assertEquals(134, singleSetSize); + + doc.close(); + + PDDocument src = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-3999-GeneralForbearance-flattened.pdf")); + PDDocument dst = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-3999-GeneralForbearance-flattened.pdf")); + pdfMergerUtility.appendDocument(dst, src); + // before solving PDFBOX-3999, the close() below brought + // IOException: COSStream has been closed and cannot be read. + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-3999-GeneralForbearance-flattened-merged.pdf")); + dst.close(); + + doc = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-3999-GeneralForbearance-flattened-merged.pdf")); + + checkForPageOrphans(doc); + + // Assume that the merged tree has double element count + elementCounter = new ElementCounter(); + elementCounter.walk(doc.getDocumentCatalog().getStructureTreeRoot().getK()); + assertEquals(singleCnt * 2, elementCounter.cnt); + assertEquals(singleSetSize * 2, elementCounter.set.size()); + + doc.close(); + } + + /** + * PDFBOX-4408: Check that /StructParents values from pages and /StructParent values from + * annotations are found in the /ParentTree. + * + * @throws IOException + */ + public void testStructureTreeMerge3() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4408.pdf")); + + ElementCounter elementCounter = new ElementCounter(); + elementCounter.walk(src.getDocumentCatalog().getStructureTreeRoot().getK()); + int singleCnt = elementCounter.cnt; + int singleSetSize = elementCounter.set.size(); + assertEquals(25, singleCnt); + assertEquals(25, singleSetSize); + + PDDocument dst = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4408.pdf")); + pdfMergerUtility.appendDocument(dst, src); + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-4408-merged.pdf")); + dst.close(); + + dst = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-4408-merged.pdf")); + + // Assume that the merged tree has double element count + elementCounter = new ElementCounter(); + elementCounter.walk(dst.getDocumentCatalog().getStructureTreeRoot().getK()); + assertEquals(singleCnt * 2, elementCounter.cnt); + assertEquals(singleSetSize * 2, elementCounter.set.size()); + + checkWithNumberTree(dst); + checkForPageOrphans(dst); + dst.close(); + checkStructTreeRootCount(new File(TARGETTESTDIR, "PDFBOX-4408-merged.pdf")); + } + + /** + * PDFBOX-4417: Same as the previous tests, but this one failed when the previous tests + * succeeded because of more bugs with cloning. + * + * @throws IOException + */ + public void testStructureTreeMerge4() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(SRCDIR, "PDFBOX-4417-001031.pdf")); + + ElementCounter elementCounter = new ElementCounter(); + elementCounter.walk(src.getDocumentCatalog().getStructureTreeRoot().getK()); + int singleCnt = elementCounter.cnt; + int singleSetSize = elementCounter.set.size(); + assertEquals(104, singleCnt); + assertEquals(104, singleSetSize); + + PDDocument dst = PDDocument.load(new File(SRCDIR, "PDFBOX-4417-001031.pdf")); + pdfMergerUtility.appendDocument(dst, src); + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-4417-001031-merged.pdf")); + dst.close(); + dst = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-4417-001031-merged.pdf")); + + // Assume that the merged tree has double element count + elementCounter = new ElementCounter(); + elementCounter.walk(dst.getDocumentCatalog().getStructureTreeRoot().getK()); + assertEquals(singleCnt * 2, elementCounter.cnt); + assertEquals(singleSetSize * 2, elementCounter.set.size()); + + checkWithNumberTree(dst); + checkForPageOrphans(dst); + dst.close(); + checkStructTreeRootCount(new File(TARGETTESTDIR, "PDFBOX-4417-001031-merged.pdf")); + } + + /** + * PDFBOX-4417: Same as the previous tests, but this one failed when the previous tests + * succeeded because the /K tree started with two dictionaries and not with an array. + * + * @throws IOException + */ + public void testStructureTreeMerge5() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(SRCDIR, "PDFBOX-4417-054080.pdf")); + + ElementCounter elementCounter = new ElementCounter(); + elementCounter.walk(src.getDocumentCatalog().getStructureTreeRoot().getK()); + int singleCnt = elementCounter.cnt; + int singleSetSize = elementCounter.set.size(); + + PDDocument dst = PDDocument.load(new File(SRCDIR, "PDFBOX-4417-054080.pdf")); + pdfMergerUtility.appendDocument(dst, src); + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-4417-054080-merged.pdf")); + dst.close(); + dst = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-4417-054080-merged.pdf")); + checkWithNumberTree(dst); + checkForPageOrphans(dst); + + // Assume that the merged tree has double element count + elementCounter = new ElementCounter(); + elementCounter.walk(dst.getDocumentCatalog().getStructureTreeRoot().getK()); + assertEquals(singleCnt * 2, elementCounter.cnt); + assertEquals(singleSetSize * 2, elementCounter.set.size()); + + dst.close(); + + checkStructTreeRootCount(new File(TARGETTESTDIR, "PDFBOX-4417-054080-merged.pdf")); + } + + /** + * PDFBOX-4418: test merging PDFs where ParentTree have a hierarchy. + * + * @throws IOException + */ + public void testStructureTreeMerge6() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4418-000671.pdf")); + + PDStructureTreeRoot structureTreeRoot = src.getDocumentCatalog().getStructureTreeRoot(); + PDNumberTreeNode parentTree = structureTreeRoot.getParentTree(); + Map numberTreeAsMap = PDFMergerUtility.getNumberTreeAsMap(parentTree); + assertEquals(381, numberTreeAsMap.size()); + assertEquals(743, Collections.max(numberTreeAsMap.keySet()) + 1); + assertEquals(0, (int) Collections.min(numberTreeAsMap.keySet())); + assertEquals(743, structureTreeRoot.getParentTreeNextKey()); + + PDDocument dst = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4418-000314.pdf")); + + structureTreeRoot = dst.getDocumentCatalog().getStructureTreeRoot(); + parentTree = structureTreeRoot.getParentTree(); + numberTreeAsMap = PDFMergerUtility.getNumberTreeAsMap(parentTree); + assertEquals(7, numberTreeAsMap.size()); + assertEquals(328, Collections.max(numberTreeAsMap.keySet()) + 1); + assertEquals(321, (int) Collections.min(numberTreeAsMap.keySet())); + // ParentTreeNextKey should be 321 but PDF has a higher value + assertEquals(408, structureTreeRoot.getParentTreeNextKey()); + + pdfMergerUtility.appendDocument(dst, src); + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-4418-merged.pdf")); + dst.close(); + + dst = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-4418-merged.pdf")); + checkWithNumberTree(dst); + checkForPageOrphans(dst); + + structureTreeRoot = dst.getDocumentCatalog().getStructureTreeRoot(); + parentTree = structureTreeRoot.getParentTree(); + numberTreeAsMap = PDFMergerUtility.getNumberTreeAsMap(parentTree); + assertEquals(381+7, numberTreeAsMap.size()); + assertEquals(408+743, Collections.max(numberTreeAsMap.keySet()) + 1); + assertEquals(321, (int) Collections.min(numberTreeAsMap.keySet())); + assertEquals(408+743, structureTreeRoot.getParentTreeNextKey()); + dst.close(); + + checkStructTreeRootCount(new File(TARGETTESTDIR, "PDFBOX-4418-merged.pdf")); + } + + /** + * PDFBOX-4423: test merging a PDF where a widget has no StructParent. + * + * @throws IOException + */ + public void testStructureTreeMerge7() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4423-000746.pdf")); + + PDStructureTreeRoot structureTreeRoot = src.getDocumentCatalog().getStructureTreeRoot(); + PDNumberTreeNode parentTree = structureTreeRoot.getParentTree(); + Map numberTreeAsMap = PDFMergerUtility.getNumberTreeAsMap(parentTree); + assertEquals(33, numberTreeAsMap.size()); + assertEquals(64, Collections.max(numberTreeAsMap.keySet()) + 1); + assertEquals(31, (int) Collections.min(numberTreeAsMap.keySet())); + assertEquals(126, structureTreeRoot.getParentTreeNextKey()); + + PDDocument dst = new PDDocument(); + + pdfMergerUtility.appendDocument(dst, src); + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-4423-merged.pdf")); + dst.close(); + + dst = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-4423-merged.pdf")); + checkWithNumberTree(dst); + checkForPageOrphans(dst); + + structureTreeRoot = dst.getDocumentCatalog().getStructureTreeRoot(); + parentTree = structureTreeRoot.getParentTree(); + numberTreeAsMap = PDFMergerUtility.getNumberTreeAsMap(parentTree); + assertEquals(33, numberTreeAsMap.size()); + assertEquals(64, Collections.max(numberTreeAsMap.keySet()) + 1); + assertEquals(31, (int) Collections.min(numberTreeAsMap.keySet())); + assertEquals(64, structureTreeRoot.getParentTreeNextKey()); + dst.close(); + + checkStructTreeRootCount(new File(TARGETTESTDIR, "PDFBOX-4423-merged.pdf")); + } + + /** + * PDFBOX-4009: Test that ParentTreeNextKey is recalculated correctly. + */ + public void testMissingParentTreeNextKey() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4418-000314.pdf")); + PDDocument dst = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4418-000314.pdf")); + // existing numbers are 321..327; ParentTreeNextKey is 408. + // After deletion, it is recalculated in the merge 328. + // That value is added to all numbers of the destination, + // so the new numbers should be 321+328..327+328, i.e. 649..655, + // and this ParentTreeNextKey is 656 at the end. + dst.getDocumentCatalog().getStructureTreeRoot().getCOSObject().removeItem(COSName.PARENT_TREE_NEXT_KEY); + pdfMergerUtility.appendDocument(dst, src); + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-4418-000314-merged.pdf")); + dst.close(); + dst = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-4418-000314-merged.pdf")); + assertEquals(656, dst.getDocumentCatalog().getStructureTreeRoot().getParentTreeNextKey()); + dst.close(); + } + + /** + * PDFBOX-4416: Test merging of /IDTree + *
    + * PDFBOX-4009: test merging to empty destination + * + * @throws IOException + */ + public void testStructureTreeMergeIDTree() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(SRCDIR, "PDFBOX-4417-001031.pdf")); + PDDocument dst = PDDocument.load(new File(SRCDIR, "PDFBOX-4417-054080.pdf")); + + PDNameTreeNode srcIDTree = src.getDocumentCatalog().getStructureTreeRoot().getIDTree(); + Map srcIDTreeMap = PDFMergerUtility.getIDTreeAsMap(srcIDTree); + PDNameTreeNode dstIDTree = dst.getDocumentCatalog().getStructureTreeRoot().getIDTree(); + Map dstIDTreeMap = PDFMergerUtility.getIDTreeAsMap(dstIDTree); + int expectedTotal = srcIDTreeMap.size() + dstIDTreeMap.size(); + assertEquals(192, expectedTotal); + + // PDFBOX-4009, test that empty dest doc still merges structure tree + // (empty dest doc is used in command line app) + PDDocument emptyDest = new PDDocument(); + pdfMergerUtility.appendDocument(emptyDest, src); + src.close(); + src = emptyDest; + assertEquals(4, src.getDocumentCatalog().getStructureTreeRoot().getParentTreeNextKey()); + + pdfMergerUtility.appendDocument(dst, src); + src.close(); + dst.save(new File(TARGETTESTDIR, "PDFBOX-4416-IDTree-merged.pdf")); + dst.close(); + dst = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-4416-IDTree-merged.pdf")); + checkWithNumberTree(dst); + checkForPageOrphans(dst); + + dstIDTree = dst.getDocumentCatalog().getStructureTreeRoot().getIDTree(); + dstIDTreeMap = PDFMergerUtility.getIDTreeAsMap(dstIDTree); + assertEquals(expectedTotal, dstIDTreeMap.size()); + + dst.close(); + checkStructTreeRootCount(new File(TARGETTESTDIR, "PDFBOX-4416-IDTree-merged.pdf")); + } + + /** + * PDFBOX-4429: merge into destination that has /StructParent(s) entries in the destination file + * but no structure tree. + * + * @throws IOException + */ + public void testMergeBogusStructParents1() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4408.pdf")); + PDDocument dst = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4408.pdf")); + dst.getDocumentCatalog().setStructureTreeRoot(null); + dst.getPage(0).setStructParents(9999); + dst.getPage(0).getAnnotations().get(0).setStructParent(9998); + pdfMergerUtility.appendDocument(dst, src); + checkWithNumberTree(dst); + checkForPageOrphans(dst); + src.close(); + dst.close(); + } + + /** + * PDFBOX-4429: merge into destination that has /StructParent(s) entries in the source file but + * no structure tree. + * + * @throws IOException + */ + public void testMergeBogusStructParents2() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + PDDocument src = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4408.pdf")); + PDDocument dst = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4408.pdf")); + src.getDocumentCatalog().setStructureTreeRoot(null); + src.getPage(0).setStructParents(9999); + src.getPage(0).getAnnotations().get(0).setStructParent(9998); + pdfMergerUtility.appendDocument(dst, src); + checkWithNumberTree(dst); + checkForPageOrphans(dst); + src.close(); + dst.close(); + } + + /** + * Test of the parent tree. Didn't work before PDFBOX-4003 because of incompatible class for + * PDNumberTreeNode. + * + * @throws IOException + */ + public void testParentTree() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3999-GeneralForbearance.pdf")); + PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot(); + PDNumberTreeNode parentTree = structureTreeRoot.getParentTree(); + parentTree.getValue(0); + Map numberTreeAsMap = PDFMergerUtility.getNumberTreeAsMap(parentTree); + assertEquals(31, numberTreeAsMap.size()); + assertEquals(31, Collections.max(numberTreeAsMap.keySet()) + 1); + assertEquals(0, (int) Collections.min(numberTreeAsMap.keySet())); + assertEquals(31, structureTreeRoot.getParentTreeNextKey()); + doc.close(); + } + + // PDFBOX-4417: check for multiple /StructTreeRoot entries that was due to + // incorrect merging of /K entries + private void checkStructTreeRootCount(File file) throws IOException + { + int count = 0; + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); + String line; + while ((line = br.readLine()) != null) + { + if (line.equals("/Type /StructTreeRoot")) + { + ++count; + } + } + br.close(); + assertEquals(file.getPath(), 1, count); + } + + /** + * PDFBOX-4408: Check that /StructParents values from pages and /StructParent values from + * annotations are found in the /ParentTree. + * + * @param document + */ + void checkWithNumberTree(PDDocument document) throws IOException + { + PDDocumentCatalog documentCatalog = document.getDocumentCatalog(); + PDNumberTreeNode parentTree = documentCatalog.getStructureTreeRoot().getParentTree(); + Map numberTreeAsMap = PDFMergerUtility.getNumberTreeAsMap(parentTree); + Set keySet = numberTreeAsMap.keySet(); + PDAcroForm acroForm = documentCatalog.getAcroForm(); + if (acroForm != null) + { + for (PDField field : acroForm.getFieldTree()) + { + for (PDAnnotationWidget widget : field.getWidgets()) + { + if (widget.getStructParent() >= 0) + { + assertTrue("field '" + field.getFullyQualifiedName() + "' /StructParent " + + widget.getStructParent() + " missing in /ParentTree", + keySet.contains(widget.getStructParent())); + } + } + } + } + for (PDPage page : document.getPages()) + { + if (page.getStructParents() >= 0) + { + assertTrue(keySet.contains(page.getStructParents())); + } + for (PDAnnotation ann : page.getAnnotations()) + { + if (ann.getStructParent() >= 0) + { + assertTrue("/StructParent " + ann.getStructParent() + " missing in /ParentTree", + keySet.contains(ann.getStructParent())); + } + } + } + + // might also test image and form dictionaries... + } + + /** + * PDFBOX-4383: Test that file can be deleted after merge. + * + * @throws IOException + */ + public void testFileDeletion() throws IOException + { + File outFile = new File(TARGETTESTDIR, "PDFBOX-4383-result.pdf"); + + File inFile1 = new File(TARGETTESTDIR, "PDFBOX-4383-src1.pdf"); + File inFile2 = new File(TARGETTESTDIR, "PDFBOX-4383-src2.pdf"); + + createSimpleFile(inFile1); + createSimpleFile(inFile2); + + OutputStream out = new FileOutputStream(outFile); + PDFMergerUtility merger = new PDFMergerUtility(); + merger.setDestinationStream(out); + merger.addSource(inFile1); + merger.addSource(inFile2); + merger.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly()); + out.close(); + + assertTrue(inFile1.delete()); + assertTrue(inFile2.delete()); + assertTrue(outFile.delete()); + } + + + /** + * Check that there is a top level Document and Parts below in a merge of 2 documents. + * + * @param file + * @throws IOException + */ + public void testPDFBox5198_2() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + pdfMergerUtility.addSource(new File(SRCDIR, "PDFA3A.pdf")); + pdfMergerUtility.addSource(new File(SRCDIR, "PDFA3A.pdf")); + pdfMergerUtility.setDestinationFileName(TARGETTESTDIR + "PDFA3A-merged2.pdf"); + pdfMergerUtility.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly()); + + checkParts(new File(TARGETTESTDIR + "PDFA3A-merged2.pdf")); + } + + /** + * Check that there is a top level Document and Parts below in a merge of 3 documents. + * + * @param file + * @throws IOException + */ + public void testPDFBox5198_3() throws IOException + { + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + pdfMergerUtility.addSource(new File(SRCDIR, "PDFA3A.pdf")); + pdfMergerUtility.addSource(new File(SRCDIR, "PDFA3A.pdf")); + pdfMergerUtility.addSource(new File(SRCDIR, "PDFA3A.pdf")); + pdfMergerUtility.setDestinationFileName(TARGETTESTDIR + "PDFA3A-merged3.pdf"); + pdfMergerUtility.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly()); + + checkParts(new File(TARGETTESTDIR + "PDFA3A-merged3.pdf")); + } + + /** + * Check that there is a top level Document and Parts below. + * @param file + * @throws IOException + */ + private void checkParts(File file) throws IOException + { + PDDocument doc = PDDocument.load(file); + PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot(); + COSDictionary topDict = (COSDictionary) structureTreeRoot.getK(); + assertEquals(COSName.DOCUMENT, topDict.getItem(COSName.S)); + assertEquals(structureTreeRoot.getCOSObject(), topDict.getCOSDictionary(COSName.P)); + COSArray kArray = topDict.getCOSArray(COSName.K); + assertEquals(doc.getNumberOfPages(), kArray.size()); + for (int i = 0; i < kArray.size(); ++i) + { + COSDictionary dict = (COSDictionary) kArray.getObject(i); + assertEquals(COSName.PART, dict.getItem(COSName.S)); + assertEquals(topDict, dict.getCOSDictionary(COSName.P)); + } + doc.close(); + } + + private void checkForPageOrphans(PDDocument doc) throws IOException + { + // check for orphan pages in the StructTreeRoot/K, StructTreeRoot/ParentTree and + // StructTreeRoot/IDTree trees. + PDPageTree pageTree = doc.getPages(); + PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot(); + checkElement(pageTree, structureTreeRoot.getParentTree().getCOSObject()); + checkElement(pageTree, structureTreeRoot.getK()); + checkForIDTreeOrphans(pageTree, structureTreeRoot); + } + + private void checkForIDTreeOrphans(PDPageTree pageTree, PDStructureTreeRoot structureTreeRoot) + throws IOException + { + PDNameTreeNode idTree = structureTreeRoot.getIDTree(); + if (idTree == null) + { + return; + } + Map map = PDFMergerUtility.getIDTreeAsMap(idTree); + for (PDStructureElement element : map.values()) + { + if (element.getPage() != null) + { + checkForPage(pageTree, element); + } + if (!element.getKids().isEmpty()) + { + checkElement(pageTree, element.getCOSObject().getDictionaryObject(COSName.K)); + } + } + } + + private void createSimpleFile(File file) throws IOException + { + PDDocument doc = new PDDocument(); + doc.addPage(new PDPage()); + doc.save(file); + doc.close(); + } + + private class ElementCounter + { + int cnt = 0; + Set set = new HashSet(); + + void walk(COSBase base) + { + if (base instanceof COSArray) + { + for (COSBase base2 : (COSArray) base) + { + if (base2 instanceof COSObject) + { + base2 = ((COSObject) base2).getObject(); + } + walk(base2); + } + } + else if (base instanceof COSDictionary) + { + COSDictionary kdict = (COSDictionary) base; + if (kdict.containsKey(COSName.PG)) + { + ++cnt; + set.add(kdict); + } + if (kdict.containsKey(COSName.K)) + { + walk(kdict.getDictionaryObject(COSName.K)); + } + } + } + } + + // Each element can be an array, a dictionary or a number. + // See PDF specification Table 37 - Entries in a number tree node dictionary + // See PDF specification Table 322 - Entries in the structure tree root + // See PDF specification Table 323 - Entries in a structure element dictionary + // See PDF specification Table 325 – Entries in an object reference dictionary + // example of file with /Kids: 000153.pdf 000208.pdf 000314.pdf 000359.pdf 000671.pdf + // from digitalcorpora site + private void checkElement(PDPageTree pageTree, COSBase base) throws IOException + { + if (base instanceof COSArray) + { + for (COSBase base2 : (COSArray) base) + { + if (base2 instanceof COSObject) + { + base2 = ((COSObject) base2).getObject(); + } + checkElement(pageTree, base2); + } + } + else if (base instanceof COSDictionary) + { + COSDictionary kdict = (COSDictionary) base; + if (kdict.containsKey(COSName.PG)) + { + PDStructureElement structureElement = new PDStructureElement(kdict); + checkForPage(pageTree, structureElement); + } + if (kdict.containsKey(COSName.K)) + { + checkElement(pageTree, kdict.getDictionaryObject(COSName.K)); + return; + } + + // if we're in a number tree, check /Nums and /Kids + if (kdict.containsKey(COSName.KIDS)) + { + checkElement(pageTree, kdict.getDictionaryObject(COSName.KIDS)); + } + else if (kdict.containsKey(COSName.NUMS)) + { + checkElement(pageTree, kdict.getDictionaryObject(COSName.NUMS)); + } + + // if we're an object reference dictionary (/OBJR), check the obj + if (kdict.containsKey(COSName.OBJ)) + { + COSDictionary obj = (COSDictionary) kdict.getDictionaryObject(COSName.OBJ); + COSBase type = obj.getDictionaryObject(COSName.TYPE); + if (COSName.ANNOT.equals(type)) + { + PDAnnotation annotation = PDAnnotation.createAnnotation(obj); + PDPage page = annotation.getPage(); + if (page != null) + { + if (pageTree.indexOf(page) == -1) + { + COSBase item = kdict.getItem(COSName.OBJ); + if (item instanceof COSObject) + { + Assert.assertNotEquals("Annotation page is not in the page tree: " + item, -1, pageTree.indexOf(page)); + } + else + { + // don't display because of stack overflow + Assert.assertNotEquals("Annotation page is not in the page tree", -1, pageTree.indexOf(page)); + } + } + } + } + else + { + //TODO needs to be investigated. Specification mentions + // "such as an XObject or an annotation" + fail("Other type: " + type); + } + } + } + } + // checks that the result file of a merge has the same rendering as the two // source files private void checkMergeIdentical(String filename1, String filename2, String mergeFilename, @@ -177,4 +964,12 @@ private void checkImagesIdentical(BufferedImage bim1, BufferedImage bim2) } } + private void checkForPage(PDPageTree pageTree, PDStructureElement structureElement) + { + PDPage page = structureElement.getPage(); + if (page != null) + { + Assert.assertNotEquals("Page is not in the page tree", -1, pageTree.indexOf(page)); + } + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/package.html b/pdfbox/src/test/java/org/apache/pdfbox/package.html index e280a257764..61f8b808f1b 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java index 7920b595212..65b99208344 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/EndstreamOutputStreamTest.java @@ -17,16 +17,29 @@ package org.apache.pdfbox.pdfparser; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; -import junit.framework.TestCase; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Map; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; +import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; +import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; +import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; import org.junit.Assert; +import org.junit.Test; /** * * @author Tilman Hausherr */ -public class EndstreamOutputStreamTest extends TestCase +public class EndstreamOutputStreamTest { + @Test public void testEndstreamOutputStream() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); @@ -93,5 +106,33 @@ public void testEndstreamOutputStream() throws IOException byte[] expectedResult5 = { 1, 2, 3, 4, '\r', '\n', 5, 6, 7, '\r', 8, 9, '\n', '\r'}; Assert.assertArrayEquals(expectedResult5, baos.toByteArray()); } - + + @Test + public void testPDFBox2079EmbeddedFile() throws IOException + { + // there should be 17660 bytes in the zip file. + // in PDFBox 1.8.5, windows newline is appended to the byte stream + // yielding 17662 bytes, which causes a problem for ZipFile in Java 1.6 + + // Modification of embedded_zip.pdf for 2.0: + // /Length entry removed to force usage of EndstreamOutputStream + PDDocument doc = PDDocument.load( + new File("src/test/resources/org/apache/pdfbox/pdfparser", "embedded_zip.pdf")); + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + PDDocumentNameDictionary names = catalog.getNames(); + PDEmbeddedFilesNameTreeNode node = names.getEmbeddedFiles(); + Map map = node.getNames(); + Assert.assertEquals(1, map.size()); + PDComplexFileSpecification spec = map.get("My first attachment"); + PDEmbeddedFile file = spec.getEmbeddedFile(); + InputStream input = file.createInputStream(); + File d = new File("target/test-output"); + d.mkdirs(); + File f = new File(d, spec.getFile()); + OutputStream os = new FileOutputStream(f); + IOUtils.copy(input, os); + os.close(); + Assert.assertEquals(17660, f.length()); + doc.close(); + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/InputStreamSourceTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/InputStreamSourceTest.java new file mode 100644 index 00000000000..b14de04705a --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/InputStreamSourceTest.java @@ -0,0 +1,133 @@ +/* + * Copyright 2014 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdfparser; + +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.IOException; +import org.junit.Assert; +import org.junit.Test; + +/** + * Unittest for org.apache.pdfbox.pdfparser.InputStreamSource + */ +public class InputStreamSourceTest +{ + @Test + public void testPositionReadFully() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + InputStreamSource inputStreamSource = new InputStreamSource(bais); + + Assert.assertEquals(0, inputStreamSource.getPosition()); + inputStreamSource.readFully(5); + Assert.assertEquals(5, inputStreamSource.getPosition()); + + try + { + inputStreamSource.readFully(10); + Assert.fail("readFully beyond EOF should have triggered an EOFException"); + } + catch(EOFException exception) + { + + } + + inputStreamSource.close(); + } + + @Test + public void testPositionRead() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + InputStreamSource inputStreamSource = new InputStreamSource(bais); + + Assert.assertEquals(0, inputStreamSource.getPosition()); + inputStreamSource.read(); + inputStreamSource.read(); + inputStreamSource.read(); + Assert.assertEquals(3, inputStreamSource.getPosition()); + + inputStreamSource.close(); + } + + @Test + public void testPositionReadBytes() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + InputStreamSource inputStreamSource = new InputStreamSource(bais); + + Assert.assertEquals(0, inputStreamSource.getPosition()); + byte[] buffer = new byte[4]; + inputStreamSource.read(buffer); + Assert.assertEquals(4, inputStreamSource.getPosition()); + + inputStreamSource.read(buffer, 1, 2); + Assert.assertEquals(6, inputStreamSource.getPosition()); + + inputStreamSource.close(); + } + + @Test + public void testPositionPeek() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + InputStreamSource inputStreamSource = new InputStreamSource(bais); + + Assert.assertEquals(0, inputStreamSource.getPosition()); + inputStreamSource.readFully(6); + Assert.assertEquals(6, inputStreamSource.getPosition()); + + inputStreamSource.peek(); + Assert.assertEquals(6, inputStreamSource.getPosition()); + + inputStreamSource.close(); + } + + @Test + public void testPositionUnreadBytes() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + InputStreamSource inputStreamSource = new InputStreamSource(bais); + + Assert.assertEquals(0, inputStreamSource.getPosition()); + inputStreamSource.read(); + inputStreamSource.read(); + byte[] readBytes = inputStreamSource.readFully(6); + Assert.assertEquals(8, inputStreamSource.getPosition()); + inputStreamSource.unread(readBytes); + Assert.assertEquals(2, inputStreamSource.getPosition()); + inputStreamSource.read(); + Assert.assertEquals(3, inputStreamSource.getPosition()); + inputStreamSource.read(readBytes, 2, 4); + Assert.assertEquals(7, inputStreamSource.getPosition()); + inputStreamSource.unread(readBytes, 2, 4); + Assert.assertEquals(3, inputStreamSource.getPosition()); + + inputStreamSource.close(); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParserTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParserTest.java new file mode 100644 index 00000000000..809f7175492 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParserTest.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdfparser; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; + +import org.apache.pdfbox.cos.COSBoolean; +import org.apache.pdfbox.cos.COSInteger; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.cos.COSStream; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test for PDFObjectStreamParser. + */ +public class PDFObjectStreamParserTest +{ + @Test + public void testOffsetParsing() throws IOException + { + COSStream stream = new COSStream(); + stream.setItem(COSName.N, COSInteger.TWO); + stream.setItem(COSName.FIRST, COSInteger.get(8)); + OutputStream outputStream = stream.createOutputStream(); + outputStream.write("1 0 2 5 true false".getBytes()); + outputStream.close(); + PDFObjectStreamParser objectStreamParser = new PDFObjectStreamParser(stream, null); + objectStreamParser.parse(); + List objects = objectStreamParser.getObjects(); + Assert.assertEquals(2, objects.size()); + Assert.assertEquals(COSBoolean.TRUE, objects.get(0).getObject()); + Assert.assertEquals(COSBoolean.FALSE, objects.get(1).getObject()); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/PDFStreamParserTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/PDFStreamParserTest.java index 886a11248fa..6b234a82b6a 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/PDFStreamParserTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/PDFStreamParserTest.java @@ -19,7 +19,7 @@ import java.util.List; import junit.framework.TestCase; import org.apache.pdfbox.contentstream.operator.Operator; - +import org.apache.pdfbox.contentstream.operator.OperatorName; import static org.junit.Assert.assertArrayEquals; @@ -49,6 +49,8 @@ public void testInlineImages() throws IOException testInlineImage2ops("ID\n12345EI Q ", "12345", "Q"); testInlineImage2ops("ID\n12345EI EMC ", "12345", "EMC"); + testInlineImage2ops("ID\n12345EI \000Q", "12345", "Q"); + testInlineImage2ops("ID\n12345EI Q ", "12345", "Q"); testInlineImage2ops("ID\n12345EI EMC ", "12345", "EMC"); @@ -92,7 +94,7 @@ private void testInlineImage2ops(String s, String imageDataString, String opName assertEquals(2, tokens.size()); - assertEquals("ID", ((Operator) tokens.get(0)).getName()); + assertEquals(OperatorName.BEGIN_INLINE_IMAGE_DATA, ((Operator) tokens.get(0)).getName()); assertEquals(imageDataString.length(), ((Operator) tokens.get(0)).getImageData().length); assertArrayEquals(imageDataString.getBytes(), ((Operator) tokens.get(0)).getImageData()); @@ -106,7 +108,7 @@ private void testInlineImage1op(String s, String imageDataString) throws IOExcep assertEquals(1, tokens.size()); - assertEquals("ID", ((Operator) tokens.get(0)).getName()); + assertEquals(OperatorName.BEGIN_INLINE_IMAGE_DATA, ((Operator) tokens.get(0)).getName()); assertEquals(imageDataString.length(), ((Operator) tokens.get(0)).getImageData().length); assertArrayEquals(imageDataString.getBytes(), ((Operator) tokens.get(0)).getImageData()); } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/RandomAccessSourceTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/RandomAccessSourceTest.java new file mode 100644 index 00000000000..10927dbedd8 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/RandomAccessSourceTest.java @@ -0,0 +1,140 @@ +/* + * Copyright 2014 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdfparser; + +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.IOException; + +import org.apache.pdfbox.io.RandomAccessBuffer; +import org.junit.Assert; +import org.junit.Test; + +/** + * Unittest for org.apache.pdfbox.pdfparser.RandomAccessSource + */ +public class RandomAccessSourceTest +{ + @Test + public void testPositionReadFully() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + RandomAccessSource randomAccessSource = new RandomAccessSource( + new RandomAccessBuffer(bais)); + + Assert.assertEquals(0, randomAccessSource.getPosition()); + randomAccessSource.readFully(5); + Assert.assertEquals(5, randomAccessSource.getPosition()); + + try + { + randomAccessSource.readFully(10); + Assert.fail("readFully beyond EOF should have triggered an EOFException"); + } + catch (EOFException exception) + { + + } + + randomAccessSource.close(); + } + + @Test + public void testPositionRead() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + RandomAccessSource randomAccessSource = new RandomAccessSource( + new RandomAccessBuffer(bais)); + + Assert.assertEquals(0, randomAccessSource.getPosition()); + randomAccessSource.read(); + randomAccessSource.read(); + randomAccessSource.read(); + Assert.assertEquals(3, randomAccessSource.getPosition()); + + randomAccessSource.close(); + } + + @Test + public void testPositionReadBytes() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + RandomAccessSource randomAccessSource = new RandomAccessSource( + new RandomAccessBuffer(bais)); + + Assert.assertEquals(0, randomAccessSource.getPosition()); + byte[] buffer = new byte[4]; + randomAccessSource.read(buffer); + Assert.assertEquals(4, randomAccessSource.getPosition()); + + randomAccessSource.read(buffer, 1, 2); + Assert.assertEquals(6, randomAccessSource.getPosition()); + + randomAccessSource.close(); + } + + @Test + public void testPositionPeek() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + RandomAccessSource randomAccessSource = new RandomAccessSource( + new RandomAccessBuffer(bais)); + + Assert.assertEquals(0, randomAccessSource.getPosition()); + randomAccessSource.readFully(6); + Assert.assertEquals(6, randomAccessSource.getPosition()); + + randomAccessSource.peek(); + Assert.assertEquals(6, randomAccessSource.getPosition()); + + randomAccessSource.close(); + } + + @Test + public void testPositionUnreadBytes() throws IOException + { + byte[] inputValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + ByteArrayInputStream bais = new ByteArrayInputStream(inputValues); + + RandomAccessSource randomAccessSource = new RandomAccessSource( + new RandomAccessBuffer(bais)); + + Assert.assertEquals(0, randomAccessSource.getPosition()); + randomAccessSource.read(); + randomAccessSource.read(); + byte[] readBytes = randomAccessSource.readFully(6); + Assert.assertEquals(8, randomAccessSource.getPosition()); + randomAccessSource.unread(readBytes); + Assert.assertEquals(2, randomAccessSource.getPosition()); + randomAccessSource.read(); + Assert.assertEquals(3, randomAccessSource.getPosition()); + randomAccessSource.read(readBytes, 2, 4); + Assert.assertEquals(7, randomAccessSource.getPosition()); + randomAccessSource.unread(readBytes, 2, 4); + Assert.assertEquals(3, randomAccessSource.getPosition()); + + randomAccessSource.close(); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java index 67535a4afee..9617a912ec6 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java @@ -28,6 +28,7 @@ import java.io.FileInputStream; import java.io.FilenameFilter; import java.io.IOException; +import java.net.URISyntaxException; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.io.MemoryUsageSetting; @@ -35,14 +36,19 @@ import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentInformation; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.util.DateConverter; import org.junit.Before; import org.junit.Test; public class TestPDFParser { - private static final String PATH_OF_PDF = "src/test/resources/input/yaddatest.pdf"; - private static File tmpDirectory = new File(System.getProperty("java.io.tmpdir")); + private static final File tmpDirectory = new File(System.getProperty("java.io.tmpdir")); + private static final File TARGETPDFDIR = new File("target/pdfs"); private int numberOfTmpFiles = 0; @@ -108,10 +114,253 @@ public void testPDFParserInputStreamScratchFile() throws IOException } @Test - public void testPDFParserMissingCatalog() throws IOException + public void testPDFParserMissingCatalog() throws IOException, URISyntaxException { // PDFBOX-3060 - PDDocument.load(TestPDFParser.class.getResourceAsStream("MissingCatalog.pdf")).close(); + PDDocument.load(new File(TestPDFParser.class.getResource("MissingCatalog.pdf").toURI())).close(); + } + + /** + * Test whether /Info dictionary is retrieved correctly when rebuilding the trailer of a corrupt + * file. An incorrect algorithm would result in an outline dictionary being mistaken for an + * /Info. + * + * @throws IOException + */ + @Test + public void testPDFBox3208() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR,"PDFBOX-3208-L33MUTT2SVCWGCS6UIYL5TH3PNPXHIS6.pdf")); + + PDDocumentInformation di = doc.getDocumentInformation(); + assertEquals("Liquent Enterprise Services", di.getAuthor()); + assertEquals("Liquent services server", di.getCreator()); + assertEquals("Amyuni PDF Converter version 4.0.0.9", di.getProducer()); + assertEquals("", di.getKeywords()); + assertEquals("", di.getSubject()); + assertEquals("892B77DE781B4E71A1BEFB81A51A5ABC_20140326022424.docx", di.getTitle()); + assertEquals(DateConverter.toCalendar("D:20140326142505-02'00'"), di.getCreationDate()); + assertEquals(DateConverter.toCalendar("20140326172513Z"), di.getModificationDate()); + + doc.close(); + } + + /** + * Test whether the /Info is retrieved correctly when rebuilding the trailer of a corrupt file, + * despite the /Info dictionary not having a modification date. + * + * @throws IOException + */ + @Test + public void testPDFBox3940() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR,"PDFBOX-3940-079977.pdf")); + PDDocumentInformation di = doc.getDocumentInformation(); + assertEquals("Unknown", di.getAuthor()); + assertEquals("C:REGULA~1IREGSFR_EQ_EM.WP", di.getCreator()); + assertEquals("Acrobat PDFWriter 3.02 for Windows", di.getProducer()); + assertEquals("", di.getKeywords()); + assertEquals("", di.getSubject()); + assertEquals("C:REGULA~1IREGSFR_EQ_EM.PDF", di.getTitle()); + assertEquals(DateConverter.toCalendar("Tuesday, July 28, 1998 4:00:09 PM"), di.getCreationDate()); + + doc.close(); + } + + /** + * PDFBOX-3783: test parsing of file with trash after %%EOF. + * + * @throws IOException + */ + @Test + public void testPDFBox3783() throws IOException + { + PDDocument.load(new File(TARGETPDFDIR,"PDFBOX-3783-72GLBIGUC6LB46ELZFBARRJTLN4RBSQM.pdf")).close(); + } + + /** + * PDFBOX-3785, PDFBOX-3957: + * Test whether truncated file with several revisions has correct page count. + * + * @throws IOException + */ + @Test + public void testPDFBox3785() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR,"PDFBOX-3785-202097.pdf")); + assertEquals(11, doc.getNumberOfPages()); + doc.close(); + } + + /** + * PDFBOX-3947: test parsing of file with broken object stream. + * + * @throws IOException + */ + @Test + public void testPDFBox3947() throws IOException + { + PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3947-670064.pdf")).close(); + } + + /** + * PDFBOX-3948: test parsing of file with object stream containing some unexpected newlines. + * + * @throws IOException + */ + @Test + public void testPDFBox3948() throws IOException + { + PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3948-EUWO6SQS5TM4VGOMRD3FLXZHU35V2CP2.pdf")).close(); + } + + /** + * PDFBOX-3949: test parsing of file with incomplete object stream. + * + * @throws IOException + */ + @Test + public void testPDFBox3949() throws IOException + { + PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3949-MKFYUGZWS3OPXLLVU2Z4LWCTVA5WNOGF.pdf")).close(); + } + + /** + * PDFBOX-3950: test parsing and rendering of truncated file with missing pages. + * + * @throws IOException + */ + @Test + public void testPDFBox3950() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3950-23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf")); + assertEquals(4, doc.getNumberOfPages()); + PDFRenderer renderer = new PDFRenderer(doc); + for (int i = 0; i < doc.getNumberOfPages(); ++i) + { + try + { + renderer.renderImage(i); + } + catch (IOException ex) + { + if (i == 3 && ex.getMessage().equals("Missing descendant font array")) + { + continue; + } + throw ex; + } + } + doc.close(); + } + + /** + * PDFBOX-3951: test parsing of truncated file. + * + * @throws IOException + */ + @Test + public void testPDFBox3951() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3951-FIHUZWDDL2VGPOE34N6YHWSIGSH5LVGZ.pdf")); + assertEquals(143, doc.getNumberOfPages()); + doc.close(); + } + + /** + * PDFBOX-3964: test parsing of broken file. + * + * @throws IOException + */ + @Test + public void testPDFBox3964() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-3964-c687766d68ac766be3f02aaec5e0d713_2.pdf")); + assertEquals(10, doc.getNumberOfPages()); + doc.close(); + } + + /** + * Test whether /Info dictionary is retrieved correctly in brute force search for the + * Info/Catalog dictionaries. + * + * @throws IOException + */ + @Test + public void testPDFBox3977() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR,"PDFBOX-3977-63NGFQRI44HQNPIPEJH5W2TBM6DJZWMI.pdf")); + PDDocumentInformation di = doc.getDocumentInformation(); + assertEquals("QuarkXPress(tm) 6.52", di.getCreator()); + assertEquals("Acrobat Distiller 7.0 pour Macintosh", di.getProducer()); + assertEquals("Fich sal Fabr corr1 (Page 6)", di.getTitle()); + assertEquals(DateConverter.toCalendar("D:20070608151915+02'00'"), di.getCreationDate()); + assertEquals(DateConverter.toCalendar("D:20080604152122+02'00'"), di.getModificationDate()); + doc.close(); + } + + /** + * Test parsing the "genko_oc_shiryo1.pdf" file, which is susceptible to regression. + * + * @throws IOException + */ + @Test + public void testParseGenko() throws IOException + { + PDDocument.load(new File(TARGETPDFDIR, "genko_oc_shiryo1.pdf")).close(); + } + + /** + * Test parsing the file from PDFBOX-4338, which brought an + * ArrayIndexOutOfBoundsException before the bug was fixed. + * + * @throws IOException + */ + @Test + public void testPDFBox4338() throws IOException + { + PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4338.pdf")).close(); + } + + /** + * Test parsing the file from PDFBOX-4339, which brought a + * NullPointerException before the bug was fixed. + * + * @throws IOException + */ + @Test + public void testPDFBox4339() throws IOException + { + PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4339.pdf")).close(); + } + + /** + * Test parsing the "WXMDXCYRWFDCMOSFQJ5OAJIAFXYRZ5OA.pdf" file, which is susceptible to + * regression. + * + * @throws IOException + */ + @Test + public void testPDFBox4153() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4153-WXMDXCYRWFDCMOSFQJ5OAJIAFXYRZ5OA.pdf")); + PDDocumentOutline documentOutline = doc.getDocumentCatalog().getDocumentOutline(); + PDOutlineItem firstChild = documentOutline.getFirstChild(); + assertEquals("Main Menu", firstChild.getTitle()); + doc.close(); + } + + /** + * Test that PDFBOX-4490 has 3 pages. + * + * @throws IOException + */ + @Test + public void testPDFBox4490() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4490.pdf")); + assertEquals(3, doc.getNumberOfPages()); + doc.close(); } private void executeParserTest(RandomAccessRead source, MemoryUsageSetting memUsageSetting) throws IOException diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/package.html b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/package.html index 232f47f6f62..8e2a7cb54a2 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java new file mode 100644 index 00000000000..82b18025e30 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdfwriter; + +import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.junit.Test; + +public class COSWriterTest +{ + /** + * PDFBOX-4241: check whether the output stream is closed twice. + * + * @throws IOException + */ + @Test + public void testPDFBox4241() throws IOException + { + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + doc.save(new BufferedOutputStream(new ByteArrayOutputStream(1024) + { + private boolean open = true; + + @Override + public void close() throws IOException + { + //Thread.dumpStack(); + + open = false; + super.close(); + } + + @Override + public void flush() throws IOException + { + if (!open) + { + throw new IOException("Stream already closed"); + } + + //Thread.dumpStack(); + } + })); + doc.close(); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterXRefEntryTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterXRefEntryTest.java new file mode 100644 index 00000000000..88a04050aff --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterXRefEntryTest.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdfwriter; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class COSWriterXRefEntryTest +{ + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void compareToInputNullOutputNegative() + { + // Arrange + final COSWriterXRefEntry objectUnderTest = new COSWriterXRefEntry(0L, null, null); + final COSWriterXRefEntry obj = null; + + // Act + final int retval = objectUnderTest.compareTo(obj); + + // Assert result + Assert.assertEquals(-1, retval); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/ContentStreamWriterTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/ContentStreamWriterTest.java new file mode 100644 index 00000000000..44d49eee073 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/ContentStreamWriterTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdfwriter; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import javax.imageio.ImageIO; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdfparser.PDFStreamParser; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.rendering.TestPDFToImage; +import org.junit.After; +import org.junit.AfterClass; +import static org.junit.Assert.fail; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class ContentStreamWriterTest +{ + + private final File testDirIn = new File("target/test-output/contentstream/in"); + private final File testDirOut = new File("target/test-output/contentstream/out"); + + public ContentStreamWriterTest() + { + testDirIn.mkdirs(); + testDirOut.mkdirs(); + } + + @BeforeClass + public static void setUpClass() + { + } + + @AfterClass + public static void tearDownClass() + { + } + + @Before + public void setUp() + { + } + + @After + public void tearDown() + { + } + + /** + * Test parse content stream, write back tokens and compare rendering. + * + * @throws java.io.IOException + */ + @Test + public void testPDFBox4750() throws IOException + { + String filename = "PDFBOX-4750.pdf"; + File file = new File("target/pdfs", filename); + PDDocument doc = PDDocument.load(file); + + PDFRenderer r = new PDFRenderer(doc); + for (int i = 0; i < doc.getNumberOfPages(); ++i) + { + BufferedImage bim1 = r.renderImageWithDPI(i, 96); + ImageIO.write(bim1, "png", new File(testDirIn, filename + "-" + (i + 1) + ".png")); + PDPage page = doc.getPage(i); + PDStream newContent = new PDStream(doc); + + PDFStreamParser parser = new PDFStreamParser(page); + parser.parse(); + OutputStream os = newContent.createOutputStream(COSName.FLATE_DECODE); + ContentStreamWriter tokenWriter = new ContentStreamWriter(os); + tokenWriter.writeTokens(parser.getTokens()); + os.close(); + + page.setContents(newContent); + } + doc.save(new File(testDirIn, filename)); + doc.close(); + TestPDFToImage testPDFToImage = new TestPDFToImage(TestPDFToImage.class.getName()); + if (!testPDFToImage.doTestFile(new File(testDirIn, filename), testDirIn.getAbsolutePath(), testDirOut.getAbsolutePath())) + { + fail("Rendering failed or is not identical, see in " + testDirOut); + } + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/PageLayoutTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/PageLayoutTest.java new file mode 100644 index 00000000000..63dfa09386a --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/PageLayoutTest.java @@ -0,0 +1,66 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel; + +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Set; +import static org.junit.Assert.assertEquals; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class PageLayoutTest +{ + /** + * @author Tilman Hausherr + * + * Test for completeness (PDFBOX-3362). + */ + @Test + public void testValues() + { + Set pageLayoutSet = EnumSet.noneOf(PageLayout.class); + Set stringSet = new HashSet(); + for (PageLayout pl : PageLayout.values()) + { + String s = pl.stringValue(); + stringSet.add(s); + pageLayoutSet.add(PageLayout.fromString(s)); + } + assertEquals(PageLayout.values().length, pageLayoutSet.size()); + assertEquals(PageLayout.values().length, stringSet.size()); + } + + /** + * @author John Bergqvist + */ + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void fromStringInputNotNullOutputIllegalArgumentException() + { + // Arrange + final String value = "SinglePag"; + + // Act + thrown.expect(IllegalArgumentException.class); + PageLayout.fromString(value); + + // Method is not expected to return due to exception thrown + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/PageModeTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/PageModeTest.java new file mode 100644 index 00000000000..7781ab9c275 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/PageModeTest.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class PageModeTest +{ + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void fromStringInputNotNullOutputNotNull() + { + // Arrange + final String value = "FullScreen"; + + // Act + final PageMode retval = PageMode.fromString(value); + + // Assert result + Assert.assertEquals(PageMode.FULL_SCREEN, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull2() + { + // Arrange + final String value = "UseThumbs"; + + // Act + final PageMode retval = PageMode.fromString(value); + + // Assert result + Assert.assertEquals(PageMode.USE_THUMBS, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull3() + { + // Arrange + final String value = "UseOC"; + + // Act + final PageMode retval = PageMode.fromString(value); + + // Assert result + Assert.assertEquals(PageMode.USE_OPTIONAL_CONTENT, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull4() + { + // Arrange + final String value = "UseNone"; + + // Act + final PageMode retval = PageMode.fromString(value); + + // Assert result + Assert.assertEquals(PageMode.USE_NONE, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull5() + { + // Arrange + final String value = "UseAttachments"; + + // Act + final PageMode retval = PageMode.fromString(value); + + // Assert result + Assert.assertEquals(PageMode.USE_ATTACHMENTS, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull6() + { + // Arrange + final String value = "UseOutlines"; + + // Act + final PageMode retval = PageMode.fromString(value); + + // Assert result + Assert.assertEquals(PageMode.USE_OUTLINES, retval); + } + + @Test + public void fromStringInputNotNullOutputIllegalArgumentException() + { + // Arrange + final String value = ""; + + // Act + thrown.expect(IllegalArgumentException.class); + PageMode.fromString(value); + + // Method is not expected to return due to exception thrown + } + + @Test + public void fromStringInputNotNullOutputIllegalArgumentException2() + { + // Arrange + final String value = "Dulacb`ecj"; + + // Act + thrown.expect(IllegalArgumentException.class); + PageMode.fromString(value); + + // Method is not expected to return due to exception thrown + } + + @Test + public void stringValueOutputNotNull() + { + // Arrange + final PageMode objectUnderTest = PageMode.USE_OPTIONAL_CONTENT; + + // Act + final String retval = objectUnderTest.stringValue(); + + // Assert result + Assert.assertEquals("UseOC", retval); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestFDF.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestFDF.java index 75cc4d11470..7d205de1528 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestFDF.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestFDF.java @@ -16,280 +16,60 @@ */ package org.apache.pdfbox.pdmodel; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; +import java.io.PrintWriter; +import java.net.URISyntaxException; import java.util.List; -import junit.framework.Test; import junit.framework.TestCase; -import junit.framework.TestSuite; -import org.apache.pdfbox.contentstream.PDContentStream; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSString; -import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.fdf.FDFDocument; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.pdmodel.fdf.FDFField; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; -import org.apache.pdfbox.pdmodel.interactive.form.PDField; -import org.apache.pdfbox.pdmodel.interactive.form.PDRadioButton; -import org.apache.pdfbox.pdmodel.interactive.form.PDTerminalField; -import org.apache.pdfbox.pdmodel.interactive.form.PDTextField; /** * This will test the FDF algorithms in PDFBox. * * @author Ben Litchfield + * @author Tilman Hausherr * */ public class TestFDF extends TestCase { - - private static final String PDF_FDEB = "target/test-input-ext/fdeb.pdf"; - private static final String PDF_LOTSOFFIELDS = "target/test-input-ext/pdf_with_lots_of_fields.pdf"; - private static final String PDF_FREEDOM = "target/test-input-ext/FreedomExpressions.pdf"; - private static final String FDF_FREEDOM = "target/test-input-ext/FreedomExpressions.fdf"; - /** - * Constructor. + * Test load two simple fdf files with two fields. One of the files does not have a + * /Type/Catalog entry, which isn't required anyway (PDFBOX-3639). * - * @param name The name of the test to run. + * @throws URISyntaxException + * @throws IOException */ - public TestFDF( String name ) + public void testLoad2() throws URISyntaxException, IOException { - super( name ); + checkFields("/org/apache/pdfbox/pdfparser/withcatalog.fdf"); + checkFields("/org/apache/pdfbox/pdfparser/nocatalog.fdf"); } - /** - * This will get the suite of test that this class holds. - * - * @return All of the tests that this class holds. - */ - public static Test suite() - { - return new TestSuite( TestFDF.class ); - } - - /** - * infamous main method. - * - * @param args The command line arguments. - */ - public static void main( String[] args ) - { - String[] arg = {TestFDF.class.getName() }; - junit.textui.TestRunner.main( arg ); - } - - /** - * This will test some simple field setting. - * - * @throws Exception If there is an exception while encrypting. - */ - public void testFDFfdeb() throws Exception + private void checkFields(String name) throws IOException, URISyntaxException { + FDFDocument fdf = FDFDocument.load(new File(TestFDF.class.getResource(name).toURI())); + fdf.saveXFDF(new PrintWriter(new ByteArrayOutputStream())); - File filePDF = new File(PDF_FDEB); - if ( filePDF.exists() ) - { - PDDocument fdeb = null; - try - { - fdeb = PDDocument.load( filePDF ); - PDAcroForm form = fdeb.getDocumentCatalog().getAcroForm(); - PDTextField field = (PDTextField)form.getField( "f67_1" ); - field.setValue( "2" ); - - String expected = - "/Tx BMC " + - "BT " + - "/Helv 9 Tf " + - " 0 g " + - " 2 1.985585 Td " + - "2.07698 0 Td " + - "(2) Tj " + - "ET " + - "EMC"; - - testContentStreams( fdeb, field, expected ); - } - finally - { - if( fdeb != null ) - { - fdeb.close(); - } - } - } - } - - /** - * This will test a pdf with lots of fields. - * - * @throws Exception If there is an exception while encrypting. - */ - public void testFDFPDFWithLotsOfFields() throws Exception - { - File filePDF = new File(PDF_LOTSOFFIELDS); - if ( filePDF.exists() ) - { - PDDocument fdeb = null; - try - { - fdeb = PDDocument.load( filePDF ); - PDAcroForm form = fdeb.getDocumentCatalog().getAcroForm(); - PDTextField feld2 = (PDTextField)form.getField( "Feld.2" ); - feld2.setValue( "Benjamin" ); - - String expected = - "1 1 0.8000000119 rg " + - " 0 0 127.5 19.8299999237 re " + - " f " + - " 0 0 0 RG " + - " 1 w " + - " 0.5 0.5 126.5 18.8299999237 re " + - " S " + - " 0.5 g " + - " 1 1 m " + - " 1 18.8299999237 l " + - " 126.5 18.8299999237 l " + - " 125.5 17.8299999237 l " + - " 2 17.8299999237 l " + - " 2 2 l " + - " 1 1 l " + - " f " + - " 0.75 g " + - " 1 1 m " + - " 126.5 1 l " + - " 126.5 18.8299999237 l " + - " 125.5 17.8299999237 l " + - " 125.5 2 l " + - " 2 2 l " + - " 1 1 l " + - " f " + - " /Tx BMC " + - "BT " + - "/Helv 14 Tf " + - " 0 0 0 rg " + - " 4 4.721 Td " + - "(Benjamin) Tj " + - "ET " + - "EMC"; - - testContentStreams( fdeb, feld2, expected ); - - PDRadioButton feld3 = (PDRadioButton)form.getField( "Feld.3" ); - feld3.setValue("RB1"); - assertEquals( "RB1", feld3.getValue()); - } - finally - { - if( fdeb != null ) - { - fdeb.close(); - } - } - } - } - - /** - * This will test the Freedom pdf. - * - * @throws Exception If there is an error while testing. - */ - public void testFDFFreedomExpressions() throws Exception - { - File filePDF = new File(PDF_FREEDOM); - File fileFDF = new File(FDF_FREEDOM); - if (filePDF.exists() && fileFDF.exists()) - { - PDDocument freedom = null; - FDFDocument fdf = null; - try - { - freedom = PDDocument.load( filePDF ); - fdf = FDFDocument.load( fileFDF ); - PDAcroForm form = freedom.getDocumentCatalog().getAcroForm(); - form.importFDF( fdf ); - - PDTextField field2 = (PDTextField)form.getField( "eeFirstName" ); - List kids = field2.getWidgets(); - PDAnnotationWidget firstKid = kids.get( 0 ); - PDAnnotationWidget secondKid = kids.get( 1 ); - testContentStreamContains( freedom, firstKid, "Steve" ); - testContentStreamContains( freedom, secondKid, "Steve" ); - - //the appearance stream is suppose to be null because there - //is an F action in the AA dictionary that populates that field. - PDField totalAmt = form.getField( "eeSuppTotalAmt" ); - assertTrue( totalAmt.getCOSObject().getDictionaryObject( COSName.AP ) == null ); - - } - finally - { - if( freedom != null ) - { - freedom.close(); - } - if( fdf != null ) - { - fdf.close(); - } - } - } - } - - private void testContentStreamContains( PDDocument doc, PDAnnotationWidget widget, String expected ) throws Exception - { - PDAppearanceEntry normalAppearance = widget.getAppearance().getNormalAppearance(); - PDAppearanceStream appearanceStream = normalAppearance.getAppearanceStream(); - - List actualTokens = getStreamTokens( doc, appearanceStream ); - assertTrue( actualTokens.contains( new COSString( expected ) ) ); - } - - private void testContentStreams( PDDocument doc, PDTerminalField field, String expected ) throws Exception - { - PDAnnotationWidget widget = field.getWidgets().get(0); - PDAppearanceEntry normalAppearance = widget.getAppearance().getNormalAppearance(); - PDAppearanceStream appearanceStream = normalAppearance.getAppearanceStream(); - - List actualTokens = getStreamTokens( doc, appearanceStream ); - List expectedTokens = getStreamTokens( doc, expected ); - assertEquals( actualTokens.size(), expectedTokens.size() ); - for( int i=0; i getStreamTokens( PDDocument doc, String string ) throws IOException - { - PDFStreamParser parser; - - List tokens = null; - if( string != null ) - { - parser = new PDFStreamParser( string.getBytes() ); - parser.parse(); - tokens = parser.getTokens(); - } - return tokens; - } - - private List getStreamTokens( PDDocument doc, PDContentStream stream ) throws IOException - { - PDFStreamParser parser; - - List tokens = null; - if( stream != null ) - { - parser = new PDFStreamParser( stream ); - parser.parse(); - tokens = parser.getTokens(); - } - return tokens; + List fields = fdf.getCatalog().getFDF().getFields(); + + assertEquals(2, fields.size()); + assertEquals("Field1", fields.get(0).getPartialFieldName()); + assertEquals("Field2", fields.get(1).getPartialFieldName()); + assertEquals("Test1", fields.get(0).getValue()); + assertEquals("Test2", fields.get(1).getValue()); + + PDDocument pdf = PDDocument.load(new File(TestFDF.class.getResource("/org/apache/pdfbox/pdfparser/SimpleForm2Fields.pdf").toURI())); + PDAcroForm acroForm = pdf.getDocumentCatalog().getAcroForm(); + acroForm.importFDF(fdf); + assertEquals("Test1", acroForm.getField("Field1").getValueAsString()); + assertEquals("Test2", acroForm.getField("Field2").getValueAsString()); + + pdf.close(); + + fdf.close(); } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java index e203c1d86cc..fbb3dad31b2 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java @@ -26,6 +26,7 @@ import java.io.InputStream; import java.io.PrintWriter; import java.util.Arrays; +import java.util.Locale; import org.apache.pdfbox.io.IOUtils; @@ -106,63 +107,6 @@ public void testSaveLoadFile() throws IOException loadDoc.close(); } - /** - * Test document save/loadNonSeq using a stream. - * @throws IOException if something went wrong - */ - public void testSaveLoadNonSeqStream() throws IOException - { - // Create PDF with one blank page - PDDocument document = new PDDocument(); - document.addPage(new PDPage()); - - // Save - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - document.save(baos); - document.close(); - - // Verify content - byte[] pdf = baos.toByteArray(); - assertTrue(pdf.length > 200); - assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8")); - assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8")); - - // Load - PDDocument loadDoc = PDDocument.load(new ByteArrayInputStream(pdf)); - assertEquals(1, loadDoc.getNumberOfPages()); - loadDoc.close(); - } - - /** - * Test document save/loadNonSeq using a file. - * @throws IOException if something went wrong - */ - public void testSaveLoadNonSeqFile() throws IOException - { - // Create PDF with one blank page - PDDocument document = new PDDocument(); - document.addPage(new PDPage()); - - // Save - File targetFile = new File(testResultsDir, "pddocument-saveloadnonseqfile.pdf"); - document.save(targetFile); - document.close(); - - // Verify content - assertTrue(targetFile.length() > 200); - InputStream in = new FileInputStream(targetFile); - byte[] pdf = IOUtils.toByteArray(in); - in.close(); - assertTrue(pdf.length > 200); - assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8")); - assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8")); - - // Load - PDDocument loadDoc = PDDocument.load(targetFile); - assertEquals(1, loadDoc.getNumberOfPages()); - loadDoc.close(); - } - /** * Test get/setVersion. * @throws IOException if something went wrong diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java index 5646ba9165e..9e5dffba157 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java @@ -20,9 +20,11 @@ import java.io.InputStream; import java.util.List; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import org.junit.Test; @@ -157,7 +159,7 @@ public void handleOutputIntents() throws IOException assertTrue(outputIntents.isEmpty()); // add an OutputIntent - colorProfile = TestPDDocumentCatalog.class.getResourceAsStream("sRGB Color Space Profile.icm"); + colorProfile = TestPDDocumentCatalog.class.getResourceAsStream("sRGB.icc"); // create output intent PDOutputIntent oi = new PDOutputIntent(doc, colorProfile); oi.setInfo("sRGB IEC61966-2.1"); @@ -189,4 +191,13 @@ public void handleOutputIntents() throws IOException } } } + + @Test + public void handleBooleanInOpenAction() throws IOException + { + //PDFBOX-3772 -- allow for COSBoolean + PDDocument doc = new PDDocument(); + doc.getDocumentCatalog().getCOSObject().setBoolean(COSName.OPEN_ACTION, false); + assertNull(doc.getDocumentCatalog().getOpenAction()); + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentInformation.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentInformation.java index 5c6f85b1faa..d9eef37ef17 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentInformation.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentInformation.java @@ -48,9 +48,9 @@ public void testMetadataExtraction() throws Exception assertNull("Wrong subject", info.getSubject()); assertNull("Wrong trapped", info.getTrapped()); - List expectedMetadataKeys = Arrays.asList(new String[] {"CreationDate", "Author", "Creator", - "Producer", "ModDate", "Company", - "SourceModified", "Title"}); + List expectedMetadataKeys = Arrays.asList("CreationDate", "Author", "Creator", + "Producer", "ModDate", "Company", + "SourceModified", "Title"); assertEquals("Wrong metadata key count", expectedMetadataKeys.size(), info.getMetadataKeys().size()); for (String key : expectedMetadataKeys) diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageAnnotationsFiltering.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageAnnotationsFiltering.java new file mode 100644 index 00000000000..ba7c409ad0d --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageAnnotationsFiltering.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel; + +import java.io.IOException; +import java.util.List; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationRubberStamp; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationSquareCircle; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * This unit test validates the correct working behavior of PDPage annotations + * filtering + * + * @author Maxime Veron + * + */ +public class TestPDPageAnnotationsFiltering +{ + // test mock page for annotations filtering + private PDPage page; + + @Before + public void initMock() + { + COSDictionary mockedPageWithAnnotations = new COSDictionary(); + COSArray annotsDictionary = new COSArray(); + annotsDictionary.add(new PDAnnotationRubberStamp().getCOSObject()); + annotsDictionary.add(new PDAnnotationSquareCircle(PDAnnotationSquareCircle.SUB_TYPE_SQUARE).getCOSObject()); + annotsDictionary.add(new PDAnnotationLink().getCOSObject()); + mockedPageWithAnnotations.setItem(COSName.ANNOTS, annotsDictionary); + page = new PDPage(mockedPageWithAnnotations); + } + + @Test + public void validateNoFiltering() throws IOException + { + List annotations = page.getAnnotations(); + Assert.assertEquals(3, annotations.size()); + Assert.assertTrue(annotations.get(0) instanceof PDAnnotationRubberStamp); + Assert.assertTrue(annotations.get(1) instanceof PDAnnotationSquareCircle); + Assert.assertTrue(annotations.get(2) instanceof PDAnnotationLink); + } + + @Test + public void validateAllFiltered() throws IOException + { + List annotations = page.getAnnotations(new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return false; + } + }); + Assert.assertEquals(0, annotations.size()); + } + + @Test + public void validateSelectedFew() throws IOException + { + List annotations = page.getAnnotations(new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return (annotation instanceof PDAnnotationLink || annotation instanceof PDAnnotationSquareCircle); + } + }); + Assert.assertEquals(2, annotations.size()); + Assert.assertTrue(annotations.get(0) instanceof PDAnnotationSquareCircle); + Assert.assertTrue(annotations.get(1) instanceof PDAnnotationLink); + } +} \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageContentStream.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageContentStream.java index 773ad43ec13..8654f89443b 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageContentStream.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageContentStream.java @@ -17,9 +17,12 @@ package org.apache.pdfbox.pdmodel; import java.io.IOException; +import java.util.List; import junit.framework.TestCase; +import static junit.framework.TestCase.assertEquals; import org.apache.pdfbox.contentstream.operator.Operator; -import org.apache.pdfbox.cos.COSFloat; +import org.apache.pdfbox.contentstream.operator.OperatorName; +import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; @@ -43,18 +46,18 @@ public void testSetCmykColors() throws IOException // now read the PDF stream and verify that the CMYK values are correct PDFStreamParser parser = new PDFStreamParser(page); parser.parse(); - java.util.List pageTokens = parser.getTokens(); + List pageTokens = parser.getTokens(); // expected five tokens : // [0] = COSFloat{0.1} // [1] = COSFloat{0.2} // [2] = COSFloat{0.3} // [3] = COSFloat{0.4} // [4] = PDFOperator{"k"} - assertEquals(0.1f, ((COSFloat)pageTokens.get(0)).floatValue()); - assertEquals(0.2f, ((COSFloat)pageTokens.get(1)).floatValue()); - assertEquals(0.3f, ((COSFloat)pageTokens.get(2)).floatValue()); - assertEquals(0.4f, ((COSFloat)pageTokens.get(3)).floatValue()); - assertEquals("k", ((Operator) pageTokens.get(4)).getName()); + assertEquals(0.1f, ((COSNumber) pageTokens.get(0)).floatValue()); + assertEquals(0.2f, ((COSNumber) pageTokens.get(1)).floatValue()); + assertEquals(0.3f, ((COSNumber) pageTokens.get(2)).floatValue()); + assertEquals(0.4f, ((COSNumber) pageTokens.get(3)).floatValue()); + assertEquals(OperatorName.NON_STROKING_CMYK, ((Operator) pageTokens.get(4)).getName()); // same as above but for PDPageContentStream#setStrokingColor page = new PDPage(); @@ -75,10 +78,104 @@ public void testSetCmykColors() throws IOException // [2] = COSFloat{0.7} // [3] = COSFloat{0.8} // [4] = PDFOperator{"K"} - assertEquals(0.5f, ((COSFloat)pageTokens.get(0)).floatValue()); - assertEquals(0.6f, ((COSFloat)pageTokens.get(1)).floatValue()); - assertEquals(0.7f, ((COSFloat)pageTokens.get(2)).floatValue()); - assertEquals(0.8f, ((COSFloat)pageTokens.get(3)).floatValue()); - assertEquals("K", ((Operator)pageTokens.get(4)).getName()); + assertEquals(0.5f, ((COSNumber) pageTokens.get(0)).floatValue()); + assertEquals(0.6f, ((COSNumber) pageTokens.get(1)).floatValue()); + assertEquals(0.7f, ((COSNumber) pageTokens.get(2)).floatValue()); + assertEquals(0.8f, ((COSNumber) pageTokens.get(3)).floatValue()); + assertEquals(OperatorName.STROKING_COLOR_CMYK, ((Operator) pageTokens.get(4)).getName()); + } + + public void testSetRGBandGColors() throws IOException + { + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + + PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.OVERWRITE, true); + + // pass a non-stroking color in RGB and Gray color space + contentStream.setNonStrokingColor(0.1f, 0.2f, 0.3f); + contentStream.setNonStrokingColor(1, 2, 3); + contentStream.setNonStrokingColor(0.8f); + contentStream.setNonStrokingColor(8); + contentStream.close(); + + // now read the PDF stream and verify that the values are correct + PDFStreamParser parser = new PDFStreamParser(page); + parser.parse(); + List pageTokens = parser.getTokens(); + assertEquals(0.1f, ((COSNumber) pageTokens.get(0)).floatValue()); + assertEquals(0.2f, ((COSNumber) pageTokens.get(1)).floatValue()); + assertEquals(0.3f, ((COSNumber) pageTokens.get(2)).floatValue()); + assertEquals(OperatorName.NON_STROKING_RGB, ((Operator) pageTokens.get(3)).getName()); + assertEquals(1 / 255f, ((COSNumber) pageTokens.get(4)).floatValue(), 0.00001d); + assertEquals(2 / 255f, ((COSNumber) pageTokens.get(5)).floatValue(), 0.00001d); + assertEquals(3 / 255f, ((COSNumber) pageTokens.get(6)).floatValue(), 0.00001d); + assertEquals(OperatorName.NON_STROKING_RGB, ((Operator) pageTokens.get(7)).getName()); + assertEquals(0.8f, ((COSNumber) pageTokens.get(8)).floatValue()); + assertEquals(OperatorName.NON_STROKING_GRAY, ((Operator) pageTokens.get(9)).getName()); + assertEquals(8 / 255f, ((COSNumber) pageTokens.get(10)).floatValue(), 0.00001d); + assertEquals(OperatorName.NON_STROKING_GRAY, ((Operator) pageTokens.get(11)).getName()); + + // same as above but for PDPageContentStream#setStrokingColor + page = new PDPage(); + doc.addPage(page); + + contentStream = new PDPageContentStream(doc, page, AppendMode.OVERWRITE, false); + + // pass a non-stroking color in RGB and Gray color space + contentStream.setStrokingColor(0.5f, 0.6f, 0.7f); + contentStream.setStrokingColor(5, 6, 7); + contentStream.setStrokingColor(0.8f); + contentStream.setStrokingColor(8); + contentStream.close(); + + // now read the PDF stream and verify that the values are correct + parser = new PDFStreamParser(page); + parser.parse(); + pageTokens = parser.getTokens(); + assertEquals(0.5f, ((COSNumber) pageTokens.get(0)).floatValue()); + assertEquals(0.6f, ((COSNumber) pageTokens.get(1)).floatValue()); + assertEquals(0.7f, ((COSNumber) pageTokens.get(2)).floatValue()); + assertEquals(OperatorName.STROKING_COLOR_RGB, ((Operator) pageTokens.get(3)).getName()); + assertEquals(5 / 255f, ((COSNumber) pageTokens.get(4)).floatValue(), 0.00001d); + assertEquals(6 / 255f, ((COSNumber) pageTokens.get(5)).floatValue(), 0.00001d); + assertEquals(7 / 255f, ((COSNumber) pageTokens.get(6)).floatValue(), 0.00001d); + assertEquals(OperatorName.STROKING_COLOR_RGB, ((Operator) pageTokens.get(7)).getName()); + assertEquals(0.8f, ((COSNumber) pageTokens.get(8)).floatValue()); + assertEquals(OperatorName.STROKING_COLOR_GRAY, ((Operator) pageTokens.get(9)).getName()); + assertEquals(8 / 255f, ((COSNumber) pageTokens.get(10)).floatValue(), 0.00001d); + assertEquals(OperatorName.STROKING_COLOR_GRAY, ((Operator) pageTokens.get(11)).getName()); + doc.close(); + } + + /** + * PDFBOX-3510: missing content stream should not fail. + * + * @throws IOException + */ + public void testMissingContentStream() throws IOException + { + PDPage page = new PDPage(); + PDFStreamParser parser = new PDFStreamParser(page); + parser.parse(); + List tokens = parser.getTokens(); + assertEquals(0, tokens.size()); + } + + /** + * Check that close() can be called twice. + * + * @throws IOException + */ + public void testCloseContract() throws IOException + { + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.OVERWRITE, true); + contentStream.close(); + contentStream.close(); + doc.close(); } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageTransitions.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageTransitions.java index 3007c8e1cc2..bdaf7015204 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageTransitions.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDPageTransitions.java @@ -18,9 +18,10 @@ import static org.junit.Assert.assertEquals; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; +import java.net.URISyntaxException; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDTransition; @@ -36,10 +37,10 @@ public class TestPDPageTransitions { @Test - public void readTransitions() throws IOException + public void readTransitions() throws IOException, URISyntaxException { - PDDocument doc = PDDocument.load(this.getClass().getResourceAsStream( - "/org/apache/pdfbox/pdmodel/interactive/pagenavigation/transitions_test.pdf")); + PDDocument doc = PDDocument.load(new File(this.getClass().getResource( + "/org/apache/pdfbox/pdmodel/interactive/pagenavigation/transitions_test.pdf").toURI())); PDTransition firstTransition = doc.getPages().get(0).getTransition(); assertEquals(PDTransitionStyle.Glitter.name(), firstTransition.getStyle()); assertEquals(2, firstTransition.getDuration(), 0); @@ -64,8 +65,7 @@ public void saveAndReadTransitions() throws IOException document.close(); // read - byte[] pdf = baos.toByteArray(); - PDDocument doc = PDDocument.load(new ByteArrayInputStream(pdf)); + PDDocument doc = PDDocument.load(baos.toByteArray()); page = doc.getPages().get(0); PDTransition loadedTransition = page.getTransition(); assertEquals(PDTransitionStyle.Fly.name(), loadedTransition.getStyle()); diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/COSArrayListTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/COSArrayListTest.java new file mode 100644 index 00000000000..ad9398c8f67 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/COSArrayListTest.java @@ -0,0 +1,589 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.common; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import junit.framework.Assert; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationSquareCircle; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class COSArrayListTest { + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + // next two entries are to be used for comparison with + // COSArrayList behaviour in order to ensure that the + // intended object is now at the correct position. + // Will also be used for Collection/Array based setting + // and comparison + static List tbcAnnotationsList; + static COSBase[] tbcAnnotationsArray; + + // next entries are to be used within COSArrayList + static List annotationsList; + static COSArray annotationsArray; + + // to be used when testing retrieving filtered items as can be done with + // {@link PDPage.getAnnotations(AnnotationFilter annotationFilter)} + static PDPage pdPage; + + private static final File OUT_DIR = new File("target/test-output/pdmodel/common"); + + /* + * Create three new different annotations and add them to the Java List/Array as + * well as PDFBox List/Array implementations. + */ + @Before + public void setUp() throws Exception { + annotationsList = new ArrayList(); + PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); + PDAnnotationLink txtLink = new PDAnnotationLink(); + PDAnnotationSquareCircle aCircle = new PDAnnotationSquareCircle(PDAnnotationSquareCircle.SUB_TYPE_CIRCLE); + + annotationsList.add(txtMark); + annotationsList.add(txtLink); + annotationsList.add(aCircle); + annotationsList.add(txtLink); + assertEquals(4, annotationsList.size()); + + tbcAnnotationsList = new ArrayList(); + tbcAnnotationsList.add(txtMark); + tbcAnnotationsList.add(txtLink); + tbcAnnotationsList.add(aCircle); + tbcAnnotationsList.add(txtLink); + assertEquals(4, tbcAnnotationsList.size()); + + annotationsArray = new COSArray(); + annotationsArray.add(txtMark); + annotationsArray.add(txtLink); + annotationsArray.add(aCircle); + annotationsArray.add(txtLink); + assertEquals(4, annotationsArray.size()); + + tbcAnnotationsArray = new COSBase[4]; + tbcAnnotationsArray[0] = txtMark.getCOSObject(); + tbcAnnotationsArray[1] = txtLink.getCOSObject(); + tbcAnnotationsArray[2] = aCircle.getCOSObject(); + tbcAnnotationsArray[3] = txtLink.getCOSObject(); + assertEquals(4, tbcAnnotationsArray.length); + + // add the annotations to the page + pdPage = new PDPage(); + pdPage.setAnnotations(annotationsList); + + // create test output directory + OUT_DIR.mkdirs(); + } + + /** + * Test getting a PDModel element is in sync with underlying COSArray + */ + @Test + public void getFromList() throws Exception { + COSArrayList cosArrayList = new COSArrayList(annotationsList, annotationsArray); + + for (int i = 0; i < cosArrayList.size(); i++) { + PDAnnotation annot = cosArrayList.get(i); + assertEquals("PDAnnotations cosObject at " + i + " shall be equal to index " + i + " of COSArray", + annotationsArray.get(i), annot.getCOSObject()); + + // compare with Java List/Array + assertEquals("PDAnnotations at " + i + " shall be at index " + i + " of List", + tbcAnnotationsList.get(i), annot); + assertEquals("PDAnnotations cosObject at " + i + " shall be at position " + i + " of Array", + tbcAnnotationsArray[i], annot.getCOSObject()); + } + } + + /** + * Test adding a PDModel element is in sync with underlying COSArray + */ + @Test + public void addToList() throws Exception { + COSArrayList cosArrayList = new COSArrayList(annotationsList, annotationsArray); + + // add new annotation + PDAnnotationSquareCircle aSquare = new PDAnnotationSquareCircle(PDAnnotationSquareCircle.SUB_TYPE_SQUARE); + cosArrayList.add(aSquare); + + assertEquals("List size shall be 5", 5, annotationsList.size()); + assertEquals("COSArray size shall be 5", 5, annotationsArray.size()); + + PDAnnotation annot = annotationsList.get(4); + assertEquals("Added annotation shall be 4th entry in COSArray", + 4, annotationsArray.indexOf(annot.getCOSObject())); + assertEquals("Provided COSArray and underlying COSArray shall be equal", annotationsArray, cosArrayList.getCOSArray()); + } + + /** + * Test removing a PDModel element by index is in sync with underlying COSArray + */ + @Test + public void removeFromListByIndex() throws Exception { + COSArrayList cosArrayList = new COSArrayList(annotationsList, annotationsArray); + + int positionToRemove = 2; + PDAnnotation toBeRemoved = cosArrayList.get(positionToRemove); + + assertEquals("Remove operation shall return the removed object",toBeRemoved, cosArrayList.remove(positionToRemove)); + assertEquals("List size shall be 3", 3, cosArrayList.size()); + assertEquals("COSArray size shall be 3", 3, annotationsArray.size()); + + assertEquals("PDAnnotation shall no longer exist in List", + -1, cosArrayList.indexOf(tbcAnnotationsList.get(positionToRemove))); + assertEquals("COSObject shall no longer exist in COSArray", + -1, annotationsArray.indexOf(tbcAnnotationsArray[positionToRemove])); + } + + /** + * Test removing a unique PDModel element by index is in sync with underlying COSArray + */ + @Test + public void removeUniqueFromListByObject() throws Exception { + COSArrayList cosArrayList = new COSArrayList(annotationsList, annotationsArray); + + int positionToRemove = 2; + PDAnnotation toBeRemoved = annotationsList.get(positionToRemove); + + assertTrue("Remove operation shall return true",cosArrayList.remove(toBeRemoved)); + assertEquals("List size shall be 3", 3, cosArrayList.size()); + assertEquals("COSArray size shall be 3", 3, annotationsArray.size()); + + // compare with Java List/Array to ensure correct object at position + assertEquals("List object at 3 is at position 2 in COSArrayList now", + cosArrayList.get(2), tbcAnnotationsList.get(3)); + assertEquals("COSObject of List object at 3 is at position 2 in COSArray now", + annotationsArray.get(2), tbcAnnotationsList.get(3).getCOSObject()); + assertEquals("Array object at 3 is at position 2 in underlying COSArray now", + annotationsArray.get(2), tbcAnnotationsArray[3]); + + assertEquals("PDAnnotation shall no longer exist in List", + -1, cosArrayList.indexOf(tbcAnnotationsList.get(positionToRemove))); + assertEquals("COSObject shall no longer exist in COSArray", + -1, annotationsArray.indexOf(tbcAnnotationsArray[positionToRemove])); + + assertFalse("Remove shall not remove any object",cosArrayList.remove(toBeRemoved)); + } + + /** + * Test removing a unique PDModel element by index is in sync with underlying COSArray + */ + @Test + public void removeAllUniqueFromListByObject() throws Exception { + COSArrayList cosArrayList = new COSArrayList(annotationsList, annotationsArray); + + int positionToRemove = 2; + PDAnnotation toBeRemoved = annotationsList.get(positionToRemove); + + List toBeRemovedInstances = Collections.singletonList(toBeRemoved); + + assertTrue("Remove operation shall return true",cosArrayList.removeAll(toBeRemovedInstances)); + assertEquals("List size shall be 3", 3, cosArrayList.size()); + assertEquals("COSArray size shall be 3", 3, annotationsArray.size()); + + assertFalse("Remove shall not remove any object",cosArrayList.removeAll(toBeRemovedInstances)); + } + + + /** + * Test removing a multiple appearing PDModel element by index is in sync with underlying COSArray + */ + @Test + public void removeMultipleFromListByObject() throws Exception { + COSArrayList cosArrayList = new COSArrayList(annotationsList, annotationsArray); + + int positionToRemove = 1; + PDAnnotation toBeRemoved = tbcAnnotationsList.get(positionToRemove); + + assertTrue("Remove operation shall return true",cosArrayList.remove(toBeRemoved)); + assertEquals("List size shall be 3", 3, cosArrayList.size()); + assertEquals("COSArray size shall be 3", 3, annotationsArray.size()); + + assertTrue("Remove operation shall return true",cosArrayList.remove(toBeRemoved)); + assertEquals("List size shall be 2", 2, cosArrayList.size()); + assertEquals("COSArray size shall be 2", 2, annotationsArray.size()); + } + + /** + * Test removing a unique PDModel element by index is in sync with underlying COSArray + */ + @Test + public void removeAllMultipleFromListByObject() throws Exception { + COSArrayList cosArrayList = new COSArrayList(annotationsList, annotationsArray); + + int positionToRemove = 1; + PDAnnotation toBeRemoved = annotationsList.get(positionToRemove); + + List toBeRemovedInstances = Collections.singletonList(toBeRemoved); + + assertTrue("Remove operation shall return true",cosArrayList.removeAll(toBeRemovedInstances)); + assertEquals("List size shall be 2", 2, cosArrayList.size()); + assertEquals("COSArray size shall be 2", 2, annotationsArray.size()); + + assertFalse("Remove shall not remove any object",cosArrayList.removeAll(toBeRemovedInstances)); + } + + @Test + public void removeFromFilteredListByIndex() throws Exception + { + // removing from a filtered list is not permitted + thrown.expect(UnsupportedOperationException.class); + + // retrieve all annotations from page but the link annotation + // which is 2nd in list - see above setup + AnnotationFilter annotsFilter = new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return !(annotation instanceof PDAnnotationLink); + } + }; + + COSArrayList cosArrayList = (COSArrayList) pdPage.getAnnotations(annotsFilter); + + // this call should fail + cosArrayList.remove(1); + } + + + @Test + public void removeFromFilteredListByObject() throws Exception + { + // removing from a filtered list is not permitted + thrown.expect(UnsupportedOperationException.class); + + // retrieve all annotations from page but the link annotation + // which is 2nd in list - see above setup + AnnotationFilter annotsFilter = new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return !(annotation instanceof PDAnnotationLink); + } + }; + + COSArrayList cosArrayList = (COSArrayList) pdPage.getAnnotations(annotsFilter); + + // remove object + int positionToRemove = 1; + PDAnnotation toBeRemoved = cosArrayList.get(positionToRemove); + + // this call should fail + cosArrayList.remove(toBeRemoved); + + } + + @Test + public void removeSingleDirectObject() throws IOException { + + // generate test file + PDDocument pdf = new PDDocument(); + + PDPage page = new PDPage(); + pdf.addPage(page); + + ArrayList pageAnnots = new ArrayList(); + PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); + PDAnnotationLink txtLink = new PDAnnotationLink(); + + // enforce the COSDictionaries to be written directly into the COSArray + txtMark.getCOSObject().getCOSObject().setDirect(true); + txtLink.getCOSObject().getCOSObject().setDirect(true); + + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtLink); + assertEquals("There shall be 4 annotations generated", 4, pageAnnots.size()); + + page.setAnnotations(pageAnnots); + + pdf.save(OUT_DIR + "/removeSingleDirectObjectTest.pdf"); + pdf.close(); + + pdf = PDDocument.load(new File(OUT_DIR + "/removeSingleDirectObjectTest.pdf")); + page = pdf.getPage(0); + + COSArrayList annotations = (COSArrayList) page.getAnnotations(); + + assertEquals("There shall be 4 annotations retrieved", 4, annotations.size()); + assertEquals("The size of the internal COSArray shall be 4", 4, annotations.getCOSArray().size()); + + PDAnnotation toBeRemoved = annotations.get(0); + annotations.remove(toBeRemoved); + + assertEquals("There shall be 3 annotations left", 3, annotations.size()); + assertEquals("The size of the internal COSArray shall be 3", 3, annotations.getCOSArray().size()); + + pdf.close(); + } + + @Test + public void removeSingleIndirectObject() throws IOException { + + // generate test file + PDDocument pdf = new PDDocument(); + PDPage page = new PDPage(); + pdf.addPage(page); + + ArrayList pageAnnots = new ArrayList(); + PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); + PDAnnotationLink txtLink = new PDAnnotationLink(); + + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtLink); + assertEquals("There shall be 4 annotations generated", 4, pageAnnots.size()); + + page.setAnnotations(pageAnnots); + + pdf.save(OUT_DIR + "/removeSingleIndirectObjectTest.pdf"); + pdf.close(); + + pdf = PDDocument.load(new File(OUT_DIR + "/removeSingleIndirectObjectTest.pdf")); + page = pdf.getPage(0); + + COSArrayList annotations = (COSArrayList) page.getAnnotations(); + + assertEquals("There shall be 4 annotations retrieved", 4, annotations.size()); + assertEquals("The size of the internal COSArray shall be 4", 4, annotations.getCOSArray().size()); + + PDAnnotation toBeRemoved = annotations.get(0); + + annotations.remove(toBeRemoved); + + assertEquals("There shall be 3 annotations left", 3, annotations.size()); + assertEquals("The size of the internal COSArray shall be 2", 3, annotations.getCOSArray().size()); + + pdf.close(); + } + + // @Test + // PDFBOX-4669, PDFBOX-4723 + // This test is currently disabled with the removeAll function not properly + // working. See the discussion in above mentioned tickets about currently not implementing equals which the + // functionality would need to rely on. + public void removeDirectObject() throws IOException { + + // generate test file + PDDocument pdf = new PDDocument(); + PDPage page = new PDPage(); + pdf.addPage(page); + + ArrayList pageAnnots = new ArrayList(); + PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); + PDAnnotationLink txtLink = new PDAnnotationLink(); + + // enforce the COSDictionaries to be written directly into the COSArray + txtMark.getCOSObject().getCOSObject().setDirect(true); + txtLink.getCOSObject().getCOSObject().setDirect(true); + + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtLink); + assertEquals("There shall be 4 annotations generated", 4, pageAnnots.size()); + + page.setAnnotations(pageAnnots); + + pdf.save(OUT_DIR + "/removeDirectObjectTest.pdf"); + pdf.close(); + + pdf = PDDocument.load(new File(OUT_DIR + "/removeDirectObjectTest.pdf")); + page = pdf.getPage(0); + + COSArrayList annotations = (COSArrayList) page.getAnnotations(); + + assertEquals("There shall be 4 annotations retrieved", 4, annotations.size()); + assertEquals("The size of the internal COSArray shall be 4", 4, annotations.getCOSArray().size()); + + ArrayList toBeRemoved = new ArrayList(); + + toBeRemoved.add(annotations.get(0)); + annotations.removeAll(toBeRemoved); + + assertEquals("There shall be 1 annotations left", 1, annotations.size()); + assertEquals("The size of the internal COSArray shall be 1", 1, annotations.getCOSArray().size()); + + pdf.close(); + } + + @Test + public void removeIndirectObject() throws IOException { + + // generate test file + PDDocument pdf = new PDDocument(); + PDPage page = new PDPage(); + pdf.addPage(page); + + ArrayList pageAnnots = new ArrayList(); + PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); + PDAnnotationLink txtLink = new PDAnnotationLink(); + + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtLink); + assertEquals("There shall be 4 annotations generated", 4, pageAnnots.size()); + + page.setAnnotations(pageAnnots); + + pdf.save(OUT_DIR + "/removeIndirectObjectTest.pdf"); + pdf.close(); + + pdf = PDDocument.load(new File(OUT_DIR + "/removeIndirectObjectTest.pdf")); + page = pdf.getPage(0); + + COSArrayList annotations = (COSArrayList) page.getAnnotations(); + + assertEquals("There shall be 4 annotations retrieved", 4, annotations.size()); + assertEquals("The size of the internal COSArray shall be 4", 4, annotations.getCOSArray().size()); + + ArrayList toBeRemoved = new ArrayList(); + toBeRemoved.add(annotations.get(0)); + + annotations.removeAll(toBeRemoved); + + assertEquals("There shall be 1 annotations left", 1, annotations.size()); + assertEquals("The size of the internal COSArray shall be 1", 1, annotations.getCOSArray().size()); + + pdf.close(); + } + + // @Test + // PDFBOX-4669, PDFBOX-4723 + // This test is currently disabled with the retainAll function not properly + // working. See the discussion in above mentioned tickets about currently not implementing equals which the + // functionality would need to rely on. + public void retainDirectObject() throws IOException { + + // generate test file + PDDocument pdf = new PDDocument(); + PDPage page = new PDPage(); + pdf.addPage(page); + + ArrayList pageAnnots = new ArrayList(); + PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); + PDAnnotationLink txtLink = new PDAnnotationLink(); + + // enforce the COSDictionaries to be written directly into the COSArray + txtMark.getCOSObject().getCOSObject().setDirect(true); + txtLink.getCOSObject().getCOSObject().setDirect(true); + + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtLink); + assertEquals("There shall be 4 annotations generated", 4, pageAnnots.size()); + + page.setAnnotations(pageAnnots); + + pdf.save(OUT_DIR + "/retainDirectObjectTest.pdf"); + pdf.close(); + + pdf = PDDocument.load(new File(OUT_DIR + "/retainDirectObjectTest.pdf")); + page = pdf.getPage(0); + + COSArrayList annotations = (COSArrayList) page.getAnnotations(); + + assertEquals("There shall be 4 annotations retrieved", 4, annotations.size()); + assertEquals("The size of the internal COSArray shall be 4", 4, annotations.getCOSArray().size()); + + ArrayList toBeRetained = new ArrayList(); + + toBeRetained.add(annotations.get(0)); + annotations.retainAll(toBeRetained); + + assertEquals("There shall be 3 annotations left", 3, annotations.size()); + assertEquals("The size of the internal COSArray shall be 3", 3, annotations.getCOSArray().size()); + + pdf.close(); + } + + // @Test + // PDFBOX-4669, PDFBOX-4723 + // This test is currently disabled with the retainAll function not properly + // working. See the discussion in above mentioned tickets about currently not implementing equals which the + // functionality would need to rely on. + public void retainIndirectObject() throws IOException { + + // generate test file + PDDocument pdf = new PDDocument(); + PDPage page = new PDPage(); + pdf.addPage(page); + + ArrayList pageAnnots = new ArrayList(); + PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); + PDAnnotationLink txtLink = new PDAnnotationLink(); + + // enforce the COSDictionaries to be written directly into the COSArray + txtMark.getCOSObject().getCOSObject().setDirect(true); + txtLink.getCOSObject().getCOSObject().setDirect(true); + + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtMark); + pageAnnots.add(txtLink); + assertEquals("There shall be 4 annotations generated", 4, pageAnnots.size()); + + page.setAnnotations(pageAnnots); + + pdf.save(OUT_DIR + "/retainIndirectObjectTest.pdf"); + pdf.close(); + + pdf = PDDocument.load(new File(OUT_DIR + "/retainIndirectObjectTest.pdf")); + page = pdf.getPage(0); + + COSArrayList annotations = (COSArrayList) page.getAnnotations(); + + assertEquals("There shall be 4 annotations retrieved", 4, annotations.size()); + assertEquals("The size of the internal COSArray shall be 4", 4, annotations.getCOSArray().size()); + + ArrayList toBeRetained = new ArrayList(); + + toBeRetained.add(annotations.get(0)); + annotations.retainAll(toBeRetained); + + assertEquals("There shall be 3 annotations left", 3, annotations.size()); + assertEquals("The size of the internal COSArray shall be 3", 3, annotations.getCOSArray().size()); + + pdf.close(); + } +} \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/PDStreamTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/PDStreamTest.java index 5d21ac0cfdb..4aabbc9d96e 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/PDStreamTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/PDStreamTest.java @@ -91,7 +91,7 @@ public void testCreateInputStreamNullStopFilters() throws Exception PDStream pdStream = new PDStream(doc, is, new COSArray()); Assert.assertEquals(0, pdStream.getFilters().size()); - is = pdStream.createInputStream(null); + is = pdStream.createInputStream((List) null); Assert.assertEquals(12, is.read()); Assert.assertEquals(34, is.read()); Assert.assertEquals(56, is.read()); diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestEmbeddedFiles.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestEmbeddedFiles.java index fa8e4d0906c..aca92e34680 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestEmbeddedFiles.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestEmbeddedFiles.java @@ -84,7 +84,7 @@ public void testOSSpecificAttachments() throws IOException PDDocumentNameDictionary names = catalog.getNames(); PDEmbeddedFilesNameTreeNode treeNode = names.getEmbeddedFiles(); List> kids = treeNode.getKids(); - for (PDNameTreeNode kid : kids) + for (PDNameTreeNode kid : kids) { Map tmpNames = kid.getNames(); COSObjectable obj = tmpNames.get("My first attachment"); diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestPDNameTreeNode.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestPDNameTreeNode.java index 27c54caf2c3..41a6d993aae 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestPDNameTreeNode.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestPDNameTreeNode.java @@ -18,7 +18,7 @@ import java.io.IOException; import java.util.List; -import java.util.SortedMap; +import java.util.Map; import java.util.TreeMap; import junit.framework.TestCase; import org.apache.pdfbox.cos.COSInteger; @@ -31,18 +31,17 @@ */ public class TestPDNameTreeNode extends TestCase { - - private PDNameTreeNode node1; - private PDNameTreeNode node2; - private PDNameTreeNode node4; - private PDNameTreeNode node5; - private PDNameTreeNode node24; + private PDNameTreeNode node1; + private PDNameTreeNode node2; + private PDNameTreeNode node4; + private PDNameTreeNode node5; + private PDNameTreeNode node24; @Override protected void setUp() throws Exception { this.node5 = new PDIntegerNameTreeNode(); - SortedMap names = new TreeMap(); + Map names = new TreeMap(); names.put("Actinium", COSInteger.get(89)); names.put("Aluminum", COSInteger.get(13)); names.put("Americium", COSInteger.get(95)); @@ -53,7 +52,7 @@ protected void setUp() throws Exception this.node5.setNames(names); this.node24 = new PDIntegerNameTreeNode(); - names = new TreeMap(); + names = new TreeMap(); names.put("Xenon", COSInteger.get(54)); names.put("Ytterbium", COSInteger.get(70)); names.put("Yttrium", COSInteger.get(39)); @@ -62,10 +61,10 @@ protected void setUp() throws Exception this.node24.setNames(names); this.node2 = new PDIntegerNameTreeNode(); - List kids = this.node2.getKids(); + List> kids = this.node2.getKids(); if (kids == null) { - kids = new COSArrayList(); + kids = new COSArrayList>(); } kids.add(this.node5); this.node2.setKids(kids); @@ -74,7 +73,7 @@ protected void setUp() throws Exception kids = this.node4.getKids(); if (kids == null) { - kids = new COSArrayList(); + kids = new COSArrayList>(); } kids.add(this.node24); this.node4.setKids(kids); @@ -83,14 +82,13 @@ protected void setUp() throws Exception kids = this.node1.getKids(); if (kids == null) { - kids = new COSArrayList(); + kids = new COSArrayList>(); } kids.add(this.node2); kids.add(this.node4); this.node1.setKids(kids); } - public void testUpperLimit() throws IOException { Assert.assertEquals("Astatine", this.node5.getUpperLimit()); @@ -112,5 +110,4 @@ public void testLowerLimit() throws IOException Assert.assertEquals(null, this.node1.getLowerLimit()); } - } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestPDNumberTreeNode.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestPDNumberTreeNode.java index fa72e7894be..f34e742396f 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestPDNumberTreeNode.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/TestPDNumberTreeNode.java @@ -22,7 +22,6 @@ import java.util.Map; import java.util.TreeMap; import junit.framework.TestCase; -import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSInteger; import org.junit.Assert; @@ -56,7 +55,7 @@ public PDTest(COSInteger cosInt) } @Override - public COSBase getCOSObject() + public COSInteger getCOSObject() { return COSInteger.get( value ); } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/TestFunctions.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/TestFunctions.java deleted file mode 100644 index 5bd5f55a597..00000000000 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/TestFunctions.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.pdmodel.common.function; - -import org.apache.pdfbox.pdmodel.common.function.type4.TestOperators; -import org.apache.pdfbox.pdmodel.common.function.type4.TestParser; -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; - -/** - * Tests PDF functions. - */ -public class TestFunctions extends TestCase -{ - - /** - * The main method to run tests. - * - * @param args The command line arguments. - */ - public static void main( String[] args ) - { - String[] arg = {TestFunctions.class.getName()}; - junit.textui.TestRunner.main( arg ); - } - - /** - * This will get the suite of test that this class holds. - * - * @return All of the tests that this class holds. - */ - public static Test suite() - { - TestSuite suite = new TestSuite(TestFunctions.class.getName()); - suite.addTestSuite(TestOperators.class); - suite.addTestSuite(TestParser.class); - suite.addTestSuite(TestPDFunctionType4.class); - return suite; - } - -} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/TestPDFunctionType4.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/TestPDFunctionType4.java index 40489802687..5fa6c212c3e 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/TestPDFunctionType4.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/TestPDFunctionType4.java @@ -74,6 +74,7 @@ public void testFunctionSimple() throws Exception assertEquals(1f, output[0]); input = new float[] {0.8f, 1.2f}; //input argument outside Dimension + output = function.eval(input); assertEquals(1, output.length); assertEquals(1f, output[0]); diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/package.html b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/package.html index ba780871f12..76d53d6ec83 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/type4/package.html b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/type4/package.html index 87001e410c9..3a9491b43b6 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/type4/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/function/type4/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java new file mode 100644 index 00000000000..020f1113a5c --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/documentinterchange/logicalstructure/PDStructureElementTest.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure; + +import java.io.File; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.junit.Assert; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class PDStructureElementTest +{ + private static final File TARGETPDFDIR = new File("target/pdfs"); + + /** + * PDFBOX-4197: test that object references in array attributes of a PDStructureElement are caught. + * + * @throws IOException + */ + @Test + public void testPDFBox4197() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-4197.pdf")); + PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot(); + Set> attributeSet = new HashSet>(); + checkElement(structureTreeRoot.getK(), attributeSet); + doc.close(); + + // collect attributes and check their count. + Assert.assertEquals(117, attributeSet.size()); + int cnt = 0; + for (Revisions attributes : attributeSet) + { + cnt += attributes.size(); + } + Assert.assertEquals(111, cnt); // this one was 105 before PDFBOX-4197 was fixed + } + + // Each element can be an array, a dictionary or a number. + // See PDF specification Table 323 - Entries in a structure element dictionary + private void checkElement(COSBase base, Set>attributeSet) + { + if (base instanceof COSArray) + { + for (COSBase base2 : (COSArray) base) + { + if (base2 instanceof COSObject) + { + base2 = ((COSObject) base2).getObject(); + } + checkElement(base2, attributeSet); + } + } + else if (base instanceof COSDictionary) + { + COSDictionary kdict = (COSDictionary) base; + if (kdict.containsKey(COSName.PG)) + { + PDStructureElement structureElement = new PDStructureElement(kdict); + Revisions attributes = structureElement.getAttributes(); + attributeSet.add(attributes); + Revisions classNames = structureElement.getClassNames(); + //TODO: modify the test to also check for class names, if we ever have a file. + } + if (kdict.containsKey(COSName.K)) + { + checkElement(kdict.getDictionaryObject(COSName.K), attributeSet); + } + } + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationTest.java index 8e527e585d3..1ab70320643 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotationTest.java @@ -16,13 +16,14 @@ */ package org.apache.pdfbox.pdmodel.fdf; +import java.io.File; import static org.junit.Assert.assertEquals; import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; +import java.net.URISyntaxException; import java.util.List; +import org.junit.Assert; import org.junit.Test; /** @@ -33,31 +34,36 @@ */ public class FDFAnnotationTest { - @Test - public void loadXFDFAnnotations() throws IOException + public void loadXFDFAnnotations() throws IOException, URISyntaxException { - FDFDocument fdfDoc = null; - InputStream stream = null; - List fdfAnnots = new ArrayList(); - try - { - stream = FDFAnnotationTest.class - .getResourceAsStream("xfdf-test-document-annotations.xml"); - fdfDoc = FDFDocument.loadXFDF(stream); - fdfAnnots = fdfDoc.getCatalog().getFDF().getAnnotations(); - assertEquals(17, fdfAnnots.size()); - } - finally + File f = new File(FDFAnnotationTest.class.getResource("xfdf-test-document-annotations.xml").toURI()); + FDFDocument fdfDoc = FDFDocument.loadXFDF(f); + List fdfAnnots = fdfDoc.getCatalog().getFDF().getAnnotations(); + assertEquals(18, fdfAnnots.size()); + + // test PDFBOX-4345 and PDFBOX-3646 + boolean testedPDFBox4345andPDFBox3646 = false; + for (FDFAnnotation ann : fdfAnnots) { - if (stream != null) - { - stream.close(); - } - if (fdfDoc != null) + if (ann instanceof FDFAnnotationFreeText) { - fdfDoc.close(); + FDFAnnotationFreeText annotationFreeText = (FDFAnnotationFreeText) ann; + if ("P&1 P&2 P&3".equals(annotationFreeText.getContents())) + { + testedPDFBox4345andPDFBox3646 = true; + Assert.assertEquals("\n" + + "

    P&1 P&2 " + + "P&3

    \n" + + " ", annotationFreeText.getRichContents().trim()); + } } } + Assert.assertTrue(testedPDFBox4345andPDFBox3646); + fdfDoc.close(); } -} +} \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/fdf/FDFFieldTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/fdf/FDFFieldTest.java new file mode 100644 index 00000000000..61ce2e985fc --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/fdf/FDFFieldTest.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.fdf; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdmodel.common.COSArrayList; +import org.junit.Test; + +/* + * Test some characteristics of FDFFields + */ +public class FDFFieldTest +{ + @Test + public void testCOSStringValue() throws IOException + { + String testString = "Test value"; + COSString testCOSString = new COSString(testString); + + FDFField field = new FDFField(); + field.setValue(testCOSString); + + assertEquals(testCOSString, (COSString) field.getCOSValue()); + assertEquals(testString, field.getValue()); + } + + + @Test + public void testTextAsCOSStreamValue() throws IOException + { + String testString = "Test value"; + byte[] testBytes = testString.getBytes("ASCII"); + COSStream stream = createStream(testBytes, null); + + FDFField field = new FDFField(); + field.setValue(stream); + + assertEquals(testString, field.getValue()); + } + + @Test + public void testCOSNameValue() throws IOException + { + String testString = "Yes"; + COSName testCOSSName = COSName.getPDFName(testString); + + FDFField field = new FDFField(); + field.setValue(testCOSSName); + + assertEquals(testCOSSName, (COSName) field.getCOSValue()); + assertEquals(testString, field.getValue()); + } + + @Test + public void testCOSArrayValue() throws IOException + { + List testList = new ArrayList(); + testList.add("A"); + testList.add("B"); + + COSArray testCOSArray = COSArrayList.convertStringListToCOSStringCOSArray(testList); + + FDFField field = new FDFField(); + field.setValue(testCOSArray); + + assertEquals(testCOSArray, (COSArray) field.getCOSValue()); + assertEquals(testList, field.getValue()); + } + + + private COSStream createStream(byte[] testString, COSBase filters) throws IOException + { + COSStream stream = new COSStream(); + OutputStream output = stream.createOutputStream(filters); + output.write(testString); + output.close(); + return stream; + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java index e78dfc86175..c208bb55bd8 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java @@ -1,6 +1,4 @@ /* - * Copyright 2011 adam. - * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,26 +17,62 @@ package org.apache.pdfbox.pdmodel.font; -import junit.framework.TestCase; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import org.apache.fontbox.ttf.TTFParser; +import org.apache.fontbox.ttf.TrueTypeCollection; +import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.fontbox.util.autodetect.FontFileFinder; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.text.PDFTextStripper; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; /** * * @author adam + * @author Tilman Hausherr */ -public class PDFontTest extends TestCase +public class PDFontTest { + private static final File OUT_DIR = new File("target/test-output"); + + @Before + public void setUp() throws Exception + { + OUT_DIR.mkdirs(); + } /** - * Test of the error reported in PDFBox-988 + * Test of the error reported in PDFBOX-988 + * + * @throws IOException + * @throws URISyntaxException */ - public void testPDFBox988() throws Exception + @Test + public void testPDFBox988() throws IOException, URISyntaxException { PDDocument doc = null; try { - doc = PDDocument.load(PDFontTest.class.getResourceAsStream("F001u_3_7j.pdf")); + doc = PDDocument.load(new File(PDFontTest.class.getResource("F001u_3_7j.pdf").toURI())); PDFRenderer renderer = new PDFRenderer(doc); renderer.renderImage(0); // the allegation is that renderImage() will crash the JVM or hang @@ -51,4 +85,367 @@ public void testPDFBox988() throws Exception } } } + + /** + * PDFBOX-3747: Test that using "-" with Calibri in Windows 7 has "-" in text extraction and not + * \u2010, which was because of a wrong ToUnicode mapping because prior to the bugfix, + * CmapSubtable#getCharCodes provided values in random order. + * + * @throws IOException + */ + @Test + public void testPDFBox3747() throws IOException + { + File file = new File("c:/windows/fonts", "calibri.ttf"); + if (!file.exists()) + { + System.out.println("testPDFBox3747 skipped"); + return; + } + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + PDFont font = PDType0Font.load(doc, file); + + PDPageContentStream cs = new PDPageContentStream(doc, page); + cs.beginText(); + cs.setFont(font, 10); + cs.showText("PDFBOX-3747"); + cs.endText(); + cs.close(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + doc.save(baos); + doc.close(); + + doc = PDDocument.load(baos.toByteArray()); + PDFTextStripper stripper = new PDFTextStripper(); + String text = stripper.getText(doc); + Assert.assertEquals("PDFBOX-3747", text.trim()); + doc.close(); + } + + /** + * PDFBOX-3826: Test ability to reuse a TrueTypeFont created from a file or a stream for several + * PDFs to avoid parsing it over and over again. Also check that full or partial embedding is + * done, and do render and text extraction. + * + * @throws IOException + * @throws URISyntaxException + */ + @Test + public void testPDFBox3826() throws IOException, URISyntaxException + { + URL url = PDFont.class.getResource( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"); + File fontFile = new File(url.toURI()); + + TrueTypeFont ttf1 = new TTFParser().parse(fontFile); + testPDFBox3826checkFonts(testPDFBox3826createDoc(ttf1), fontFile); + ttf1.close(); + + TrueTypeFont ttf2 = new TTFParser().parse(new FileInputStream(fontFile)); + testPDFBox3826checkFonts(testPDFBox3826createDoc(ttf2), fontFile); + ttf2.close(); + } + + /** + * PDFBOX-4115: Test ability to create PDF with german umlaut glyphs with a type 1 font. + * Test for everything that went wrong before this was fixed. + * + * @throws IOException + */ + @Test + public void testPDFBOX4115() throws IOException + { + File fontFile = new File("target/fonts", "n019003l.pfb"); + File outputFile = new File(OUT_DIR, "FontType1.pdf"); + String text = "äöüÄÖÜ"; + + PDDocument doc = new PDDocument(); + + PDPage page = new PDPage(); + PDPageContentStream contentStream = new PDPageContentStream(doc, page); + + PDType1Font font = new PDType1Font(doc, new FileInputStream(fontFile), WinAnsiEncoding.INSTANCE); + + contentStream.beginText(); + contentStream.setFont(font, 10); + contentStream.newLineAtOffset(10, 700); + contentStream.showText(text); + contentStream.endText(); + contentStream.close(); + + doc.addPage(page); + + doc.save(outputFile); + doc.close(); + + doc = PDDocument.load(outputFile); + + font = (PDType1Font) doc.getPage(0).getResources().getFont(COSName.getPDFName("F1")); + Assert.assertEquals(WinAnsiEncoding.INSTANCE, font.getEncoding()); + + for (char c : text.toCharArray()) + { + String name = font.getEncoding().getName(c); + Assert.assertEquals("dieresis", name.substring(1)); + Assert.assertFalse(font.getPath(name).getBounds2D().isEmpty()); + } + + PDFTextStripper stripper = new PDFTextStripper(); + Assert.assertEquals(text, stripper.getText(doc).trim()); + + doc.close(); + } + + /** + * Test whether bug from PDFBOX-4318 is fixed, which had the wrong cache key. + * @throws java.io.IOException + */ + @Test + public void testPDFox4318() throws IOException + { + try + { + PDType1Font.HELVETICA_BOLD.encode("\u0080"); + Assert.fail("should have thrown IllegalArgumentException"); + } + catch (IllegalArgumentException ex) + { + } + PDType1Font.HELVETICA_BOLD.encode("€"); + try + { + PDType1Font.HELVETICA_BOLD.encode("\u0080"); + Assert.fail("should have thrown IllegalArgumentException"); + } + catch (IllegalArgumentException ex) + { + } + } + + @Test + public void testFullEmbeddingTTC() throws IOException + { + FontFileFinder fff = new FontFileFinder(); + TrueTypeCollection ttc = null; + for (URI uri : fff.find()) + { + if (uri.getPath().endsWith(".ttc")) + { + File file = new File(uri); + System.out.println("TrueType collection file: " + file); + ttc = new TrueTypeCollection(file); + break; + } + } + if (ttc == null) + { + System.out.println("testFullEmbeddingTTC skipped, no .ttc files available"); + return; + } + + final List names = new ArrayList(); + ttc.processAllFonts(new TrueTypeCollection.TrueTypeFontProcessor() + { + @Override + public void process(TrueTypeFont ttf) throws IOException + { + System.out.println("TrueType font in collection: " + ttf.getName()); + names.add(ttf.getName()); + } + }); + + TrueTypeFont ttf = ttc.getFontByName(names.get(0)); // take the first one + System.out.println("TrueType font used for test: " + ttf.getName()); + + try + { + PDType0Font.load(new PDDocument(), ttf, false); + } + catch (IOException ex) + { + Assert.assertEquals("Full embedding of TrueType font collections not supported", ex.getMessage()); + return; + } + Assert.fail("should have thrown IOException"); + } + + /** + * Test using broken Type1C font. + * + * @throws IOException + */ + @Test + public void testPDFox5048() throws IOException + { + InputStream is = new URL("https://issues.apache.org/jira/secure/attachment/13017227/stringwidth.pdf").openStream(); + PDDocument doc = PDDocument.load(is); + PDPage page = doc.getPage(0); + PDFont font = page.getResources().getFont(COSName.getPDFName("F70")); + Assert.assertTrue(font.isDamaged()); + Assert.assertEquals(0f, font.getHeight(0), 0); + Assert.assertEquals(0f, font.getStringWidth("Pa"), 0); + doc.close(); + is.close(); + } + + private void testPDFBox3826checkFonts(byte[] byteArray, File fontFile) throws IOException + { + PDDocument doc = PDDocument.load(byteArray); + + PDPage page2 = doc.getPage(0); + + // F1 = type0 subset + PDType0Font fontF1 = (PDType0Font) page2.getResources().getFont(COSName.getPDFName("F1")); + Assert.assertTrue(fontF1.getName().contains("+")); + Assert.assertTrue(fontFile.length() > fontF1.getFontDescriptor().getFontFile2().toByteArray().length); + + // F2 = type0 full embed + PDType0Font fontF2 = (PDType0Font) page2.getResources().getFont(COSName.getPDFName("F2")); + Assert.assertFalse(fontF2.getName().contains("+")); + Assert.assertEquals(fontFile.length(), fontF2.getFontDescriptor().getFontFile2().toByteArray().length); + + // F3 = tt full embed + PDTrueTypeFont fontF3 = (PDTrueTypeFont) page2.getResources().getFont(COSName.getPDFName("F3")); + Assert.assertFalse(fontF2.getName().contains("+")); + Assert.assertEquals(fontFile.length(), fontF3.getFontDescriptor().getFontFile2().toByteArray().length); + + new PDFRenderer(doc).renderImage(0); + + PDFTextStripper stripper = new PDFTextStripper(); + stripper.setLineSeparator("\n"); + String text = stripper.getText(doc); + Assert.assertEquals("testMultipleFontFileReuse1\ntestMultipleFontFileReuse2\ntestMultipleFontFileReuse3", text.trim()); + + doc.close(); + } + + private byte[] testPDFBox3826createDoc(TrueTypeFont ttf) throws IOException + { + PDDocument doc = new PDDocument(); + + PDPage page = new PDPage(); + doc.addPage(page); + + // type 0 subset embedding + PDFont font = PDType0Font.load(doc, ttf, true); + PDPageContentStream cs = new PDPageContentStream(doc, page); + + cs.beginText(); + cs.newLineAtOffset(10, 700); + cs.setFont(font, 10); + cs.showText("testMultipleFontFileReuse1"); + cs.endText(); + + // type 0 full embedding + font = PDType0Font.load(doc, ttf, false); + + cs.beginText(); + cs.newLineAtOffset(10, 650); + cs.setFont(font, 10); + cs.showText("testMultipleFontFileReuse2"); + cs.endText(); + + // tt full embedding but only WinAnsiEncoding + font = PDTrueTypeFont.load(doc, ttf, WinAnsiEncoding.INSTANCE); + + cs.beginText(); + cs.newLineAtOffset(10, 600); + cs.setFont(font, 10); + cs.showText("testMultipleFontFileReuse3"); + cs.endText(); + + cs.close(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + doc.save(baos); + doc.close(); + return baos.toByteArray(); + } + + /** + * Check that font can be deleted after usage. + * + * @throws IOException + */ + @Test + public void testDeleteFont() throws IOException + { + File tempFontFile = new File(OUT_DIR, "LiberationSans-Regular.ttf"); + File tempPdfFile = new File(OUT_DIR, "testDeleteFont.pdf"); + String text = "Test PDFBOX-4823"; + + InputStream is = PDFont.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"); + + OutputStream os = new FileOutputStream(tempFontFile); + IOUtils.copy(is, os); + is.close(); + os.close(); + + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + PDPageContentStream cs = new PDPageContentStream(doc, page); + PDFont font1 = PDType0Font.load(doc, tempFontFile); + cs.beginText(); + cs.setFont(font1, 50); + cs.newLineAtOffset(50, 700); + cs.showText(text); + cs.endText(); + cs.close(); + doc.save(tempPdfFile); + doc.close(); + + Assert.assertTrue(tempFontFile.delete()); + + doc = PDDocument.load(tempPdfFile); + PDFTextStripper stripper = new PDFTextStripper(); + String extractedText = stripper.getText(doc); + Assert.assertEquals(text, extractedText.trim()); + doc.close(); + + Assert.assertTrue(tempPdfFile.delete()); + } + + /** + * PDFBOX-5115: U+00AD (soft hyphen) should work with WinAnsiEncoding. + */ + @Test + public void testSoftHyphen() throws IOException + { + String text = "- \u00AD"; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + PDFont font1 = PDType1Font.HELVETICA; + PDFont font2 = PDType0Font.load(doc, PDFontTest.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf")); + + Assert.assertEquals(font1.getStringWidth("-"), font1.getStringWidth("\u00AD"), 0); + Assert.assertEquals(font2.getStringWidth("-"), font2.getStringWidth("\u00AD"), 0); + + PDPageContentStream cs = new PDPageContentStream(doc, page); + cs.beginText(); + cs.newLineAtOffset(100, 500); + cs.setFont(font1, 10); + cs.showText(text); + cs.newLineAtOffset(0, 100); + cs.setFont(font2, 10); + cs.showText(text); + cs.endText(); + cs.close(); + doc.save(baos); + doc.close(); + + doc = PDDocument.load(baos.toByteArray()); + PDFTextStripper stripper = new PDFTextStripper(); + stripper.setLineSeparator("\n"); + String extractedText = stripper.getText(doc); + Assert.assertEquals(text + "\n" + text, extractedText.trim()); + doc.close(); + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEmbedding.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEmbedding.java index 2e7f408240f..87c91449af9 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEmbedding.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEmbedding.java @@ -17,20 +17,37 @@ package org.apache.pdfbox.pdmodel.font; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + import junit.framework.TestCase; + +import org.apache.fontbox.ttf.OS2WindowsMetricsTable; +import org.apache.fontbox.ttf.TTFParser; +import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; import org.apache.pdfbox.text.PDFTextStripper; +import static org.mockito.BDDMockito.given; +import org.mockito.Mockito; + /** * Tests font embedding. * * @author John Hewson + * @author Tilman Hausherr */ public class TestFontEmbedding extends TestCase { @@ -58,14 +75,164 @@ public void testCIDFontType2Subset() throws Exception validateCIDFontType2(true); } + /** + * Embed a monospace TTF as vertical CIDFontType2 with subsetting. + * + * @throws IOException + */ + public void testCIDFontType2VerticalSubsetMonospace() throws IOException + { + String text = "「ABC」"; + String expectedExtractedtext = "「\nA\nB\nC\n」"; + File pdf = new File(OUT_DIR, "CIDFontType2VM.pdf"); + + PDDocument document = new PDDocument(); + PDPage page = new PDPage(PDRectangle.A4); + document.addPage(page); + + File ipafont = new File("target/fonts/ipag00303", "ipag.ttf"); + PDType0Font vfont = PDType0Font.loadVertical(document, ipafont); + + PDPageContentStream contentStream = new PDPageContentStream(document, page); + contentStream.beginText(); + contentStream.setFont(vfont, 20); + contentStream.newLineAtOffset(50, 700); + contentStream.showText(text); + contentStream.endText(); + contentStream.close(); + + // Check the font substitution + byte[] encode = vfont.encode(text); + int cid = ((encode[0] & 0xFF) << 8) + (encode[1] & 0xFF); + assertEquals(7392, cid); // it's 441 without substitution + + // Check the dictionaries + COSDictionary fontDict = vfont.getCOSObject(); + assertEquals(COSName.IDENTITY_V, fontDict.getDictionaryObject(COSName.ENCODING)); + + document.save(pdf); + + // Vertical metrics are fixed during subsetting, so do this after calling save() + COSDictionary descFontDict = vfont.getDescendantFont().getCOSObject(); + COSArray dw2 = (COSArray) descFontDict.getDictionaryObject(COSName.DW2); + assertNull(dw2); // This font uses default values for DW2 + COSArray w2 = (COSArray) descFontDict.getDictionaryObject(COSName.W2); + assertEquals(0, w2.size()); // Monospaced font has no entries + + document.close(); + + // Check text extraction + String extracted = getUnicodeText(pdf); + assertEquals(expectedExtractedtext, extracted.replaceAll("\r", "").trim()); + } + + /** + * Embed a proportional TTF as vertical CIDFontType2 with subsetting. + * + * @throws IOException + */ + public void testCIDFontType2VerticalSubsetProportional() throws IOException + { + String text = "「ABC」"; + String expectedExtractedtext = "「\nA\nB\nC\n」"; + File pdf = new File(OUT_DIR, "CIDFontType2VP.pdf"); + + PDDocument document = new PDDocument(); + + PDPage page = new PDPage(PDRectangle.A4); + document.addPage(page); + File ipafont = new File("target/fonts/ipagp00303", "ipagp.ttf"); + PDType0Font vfont = PDType0Font.loadVertical(document, ipafont); + PDPageContentStream contentStream = new PDPageContentStream(document, page); + + contentStream.beginText(); + contentStream.setFont(vfont, 20); + contentStream.newLineAtOffset(50, 700); + contentStream.showText(text); + contentStream.endText(); + contentStream.close(); + + // Check the font substitution + byte[] encode = vfont.encode(text); + int cid = ((encode[0] & 0xFF) << 8) + (encode[1] & 0xFF); + assertEquals(12607, cid); // it's 12461 without substitution + // Check the dictionaries + COSDictionary fontDict = vfont.getCOSObject(); + assertEquals(COSName.IDENTITY_V, fontDict.getDictionaryObject(COSName.ENCODING)); + + document.save(pdf); + + // Vertical metrics are fixed during subsetting, so do this after calling save() + COSDictionary descFontDict = vfont.getDescendantFont().getCOSObject(); + COSArray dw2 = (COSArray) descFontDict.getDictionaryObject(COSName.DW2); + assertNull(dw2); // This font uses default values for DW2 + // c [ w1_1y v_1x v_1y ... w1_ny v_nx v_ny ] + COSArray w2 = (COSArray) descFontDict.getDictionaryObject(COSName.W2); + assertEquals(2, w2.size()); + assertEquals(12607, w2.getInt(0)); // Start CID + COSArray metrics = (COSArray) w2.getObject(1); + int i = 0; + for (int n : new int[] {-570, 500, 450, -570, 500, 880}) + { + assertEquals(n, metrics.getInt(i++)); + } + document.close(); + + // Check text extraction + String extracted = getUnicodeText(pdf); + assertEquals(expectedExtractedtext, extracted.replaceAll("\r", "").trim()); + } + + /** + * Test corner case of PDFBOX-4302. + * + * @throws java.io.IOException + */ + public void testMaxEntries() throws IOException + { + File file; + String text; + text = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん" + + "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲン" + + "12345678"; + + // The test must have MAX_ENTRIES_PER_OPERATOR unique characters + Set set = new HashSet(ToUnicodeWriter.MAX_ENTRIES_PER_OPERATOR); + for (int i = 0; i < text.length(); ++i) + { + set.add(text.charAt(i)); + } + assertEquals(ToUnicodeWriter.MAX_ENTRIES_PER_OPERATOR, set.size()); + + PDDocument document = new PDDocument(); + PDPage page = new PDPage(PDRectangle.A0); + document.addPage(page); + File ipafont = new File("target/fonts/ipag00303", "ipag.ttf"); + PDType0Font font = PDType0Font.load(document, ipafont); + PDPageContentStream contentStream = new PDPageContentStream(document, page); + contentStream.beginText(); + contentStream.setFont(font, 20); + contentStream.newLineAtOffset(50, 3000); + contentStream.showText(text); + contentStream.endText(); + contentStream.close(); + file = new File(OUT_DIR, "PDFBOX-4302-test.pdf"); + document.save(file); + document.close(); + + // check that the extracted text matches what we wrote + String extracted = getUnicodeText(file); + assertEquals(text, extracted.trim()); + } + private void validateCIDFontType2(boolean useSubset) throws Exception { PDDocument document = new PDDocument(); PDPage page = new PDPage(PDRectangle.A4); document.addPage(page); - InputStream input = TestFontEmbedding.class.getClassLoader().getResourceAsStream( - "org/apache/pdfbox/ttf/LiberationSans-Regular.ttf"); + InputStream input = PDFont.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"); PDType0Font font = PDType0Font.load(document, input, useSubset); PDPageContentStream stream = new PDPageContentStream(document, page); @@ -93,6 +260,165 @@ private String getUnicodeText(File file) throws IOException { PDDocument document = PDDocument.load(file); PDFTextStripper stripper = new PDFTextStripper(); - return stripper.getText(document); + String text = stripper.getText(document); + document.close(); + return text; + } + + /** + * Test that an embedded and subsetted font can be reused. + * + * @throws IOException + */ + public void testReuseEmbeddedSubsettedFont() throws IOException + { + String text1 = "The quick brown fox"; + String text2 = "xof nworb kciuq ehT"; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PDDocument document = new PDDocument(); + PDPage page = new PDPage(); + document.addPage(page); + InputStream input = PDFont.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"); + PDType0Font font = PDType0Font.load(document, input); + PDPageContentStream stream = new PDPageContentStream(document, page); + stream.beginText(); + stream.setFont(font, 20); + stream.newLineAtOffset(50, 600); + stream.showText(text1); + stream.endText(); + stream.close(); + document.save(baos); + document.close(); + // Append, while reusing the font subset + document = PDDocument.load(baos.toByteArray()); + page = document.getPage(0); + font = (PDType0Font) page.getResources().getFont(COSName.getPDFName("F1")); + stream = new PDPageContentStream(document, page, AppendMode.APPEND, true); + stream.beginText(); + stream.setFont(font, 20); + stream.newLineAtOffset(250, 600); + stream.showText(text2); + stream.endText(); + stream.close(); + baos.reset(); + document.save(baos); + document.close(); + // Test that both texts are there + document = PDDocument.load(baos.toByteArray()); + PDFTextStripper stripper = new PDFTextStripper(); + String extractedText = stripper.getText(document); + assertEquals(text1 + " " + text2, extractedText.trim()); + document.close(); + } + + private class TrueTypeEmbedderTester extends TrueTypeEmbedder + { + + /** + * Common functionality for testing the TrueTypeFontEmbedder + * + */ + TrueTypeEmbedderTester(PDDocument document, COSDictionary dict, TrueTypeFont ttf, boolean embedSubset) + throws IOException + { + super(document, dict, ttf, embedSubset); + } + + @Override + protected void buildSubset(InputStream ttfSubset, String tag, Map gidToCid) + throws IOException + { + // no-op. Need to define method to extend abstract class, but + // this method is not currently needed for testing + } + } + + /** + * Test that we validate embedding permissions properly for all legal permissions combinations + * + * @throws IOException + */ + public void testIsEmbeddingPermittedMultipleVersions() throws IOException + { + // SETUP + PDDocument doc = new PDDocument(); + COSDictionary cosDictionary = new COSDictionary(); + InputStream input = PDFont.class.getResourceAsStream("/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"); + TrueTypeFont ttf = new TTFParser().parseEmbedded(input); + TrueTypeEmbedderTester tester = new TrueTypeEmbedderTester(doc, cosDictionary, ttf, true); + TrueTypeFont mockTtf = Mockito.mock(TrueTypeFont.class); + OS2WindowsMetricsTable mockOS2 = Mockito.mock(OS2WindowsMetricsTable.class); + given(mockTtf.getOS2Windows()).willReturn(mockOS2); + Boolean embeddingIsPermitted; + + // TEST 1: 0000 -- Installable embedding versions 0-3+ + given(mockTtf.getOS2Windows().getFsType()).willReturn((short) 0x0000); + embeddingIsPermitted = tester.isEmbeddingPermitted(mockTtf); + + // VERIFY + assertTrue(embeddingIsPermitted); + + // no test for 0001, since bit 0 is permanently reserved, and its use is deprecated + // TEST 2: 0010 -- Restricted License embedding versions 0-3+ + given(mockTtf.getOS2Windows().getFsType()).willReturn((short) 0x0002); + embeddingIsPermitted = tester.isEmbeddingPermitted(mockTtf); + + // VERIFY + assertFalse(embeddingIsPermitted); + + // no test for 0011 + // TEST 3: 0100 -- Preview & Print embedding versions 0-3+ + given(mockTtf.getOS2Windows().getFsType()).willReturn((short) 0x0004); + embeddingIsPermitted = tester.isEmbeddingPermitted(mockTtf); + + // VERIFY + assertTrue(embeddingIsPermitted); + + // no test for 0101 + // TEST 4: 0110 -- Restricted License embedding AND Preview & Print embedding versions 0-2 + // -- illegal permissions combination for versions 3+ + given(mockTtf.getOS2Windows().getFsType()).willReturn((short) 0x0006); + embeddingIsPermitted = tester.isEmbeddingPermitted(mockTtf); + + // VERIFY + assertTrue(embeddingIsPermitted); + + // no test for 0111 + // TEST 5: 1000 -- Editable embedding versions 0-3+ + given(mockTtf.getOS2Windows().getFsType()).willReturn((short) 0x0008); + embeddingIsPermitted = tester.isEmbeddingPermitted(mockTtf); + + // VERIFY + assertTrue(embeddingIsPermitted); + + // no test for 1001 + // TEST 6: 1010 -- Restricted License embedding AND Editable embedding versions 0-2 + // -- illegal permissions combination for versions 3+ + given(mockTtf.getOS2Windows().getFsType()).willReturn((short) 0x000A); + embeddingIsPermitted = tester.isEmbeddingPermitted(mockTtf); + + // VERIFY + assertTrue(embeddingIsPermitted); + + // no test for 1011 + // TEST 7: 1100 -- Editable embedding AND Preview & Print embedding versions 0-2 + // -- illegal permissions combination for versions 3+ + given(mockTtf.getOS2Windows().getFsType()).willReturn((short) 0x000C); + embeddingIsPermitted = tester.isEmbeddingPermitted(mockTtf); + + // VERIFY + assertTrue(embeddingIsPermitted); + + // no test for 1101 + // TEST 8: 1110 Editable embedding AND Preview & Print embedding AND Restricted License embedding versions 0-2 + // -- illegal permissions combination for versions 3+ + given(mockTtf.getOS2Windows().getFsType()).willReturn((short) 0x000E); + embeddingIsPermitted = tester.isEmbeddingPermitted(mockTtf); + + // VERIFY + assertTrue(embeddingIsPermitted); + + // no test for 1111 } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEncoding.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEncoding.java new file mode 100644 index 00000000000..985a6a07236 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEncoding.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.font; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSInteger; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding; +import org.apache.pdfbox.pdmodel.font.encoding.MacRomanEncoding; +import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; +import org.apache.pdfbox.text.PDFTextStripper; + +/** + * Tests font encoding. + * + */ +public class TestFontEncoding extends TestCase +{ + /** + * Test the add method of a font encoding. + */ + public void testAdd() throws Exception + { + // see PDFDBOX-3332 + int codeForSpace = WinAnsiEncoding.INSTANCE.getNameToCodeMap().get("space"); + assertEquals(32, codeForSpace); + + codeForSpace = MacRomanEncoding.INSTANCE.getNameToCodeMap().get("space"); + assertEquals(32, codeForSpace); + } + + public void testOverwrite() throws Exception + { + // see PDFDBOX-3332 + COSDictionary dictEncodingDict = new COSDictionary(); + dictEncodingDict.setItem(COSName.TYPE, COSName.ENCODING); + dictEncodingDict.setItem(COSName.BASE_ENCODING, COSName.WIN_ANSI_ENCODING); + COSArray differences = new COSArray(); + differences.add(COSInteger.get(32)); + differences.add(COSName.getPDFName("a")); + dictEncodingDict.setItem(COSName.DIFFERENCES, differences); + DictionaryEncoding dictEncoding = new DictionaryEncoding(dictEncodingDict, false, null); + assertNull(dictEncoding.getNameToCodeMap().get("space")); + assertEquals(32, dictEncoding.getNameToCodeMap().get("a").intValue()); + } + + /** + * PDFBOX-3826: Some unicodes are reached by several names in glyphlist.txt, e.g. tilde and + * ilde. + * + * @throws IOException + */ + public void testPDFBox3884() throws IOException + { + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + PDPageContentStream cs = new PDPageContentStream(doc, page); + cs.setFont(PDType1Font.HELVETICA, 20); + cs.beginText(); + cs.newLineAtOffset(100, 700); + // first tilde is "asciitilde" (from the keyboard), 2nd tilde is "tilde" + // using ˜ would bring IllegalArgumentException prior to bugfix + cs.showText("~˜"); + cs.endText(); + cs.close(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + doc.save(baos); + doc.close(); + + // verify + doc = PDDocument.load(baos.toByteArray()); + PDFTextStripper stripper = new PDFTextStripper(); + String text = stripper.getText(doc); + assertEquals("~˜", text.trim()); + doc.close(); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java index bc0b1a54064..f0fcd65059e 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java @@ -41,8 +41,8 @@ public class TestTTFParser @Test public void testPostTable() throws IOException { - InputStream input = TestTTFParser.class.getClassLoader().getResourceAsStream( - "org/apache/pdfbox/ttf/LiberationSans-Regular.ttf"); + InputStream input = PDFont.class.getResourceAsStream( + "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"); Assert.assertNotNull(input); TTFParser parser = new TTFParser(); diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/PDLineDashPatternTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/PDLineDashPatternTest.java new file mode 100644 index 00000000000..ab35ff5491f --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/PDLineDashPatternTest.java @@ -0,0 +1,55 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *******************************************************************************/ + +package org.apache.pdfbox.pdmodel.graphics; + +import static junit.framework.TestCase.assertEquals; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSFloat; +import org.apache.pdfbox.cos.COSInteger; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class PDLineDashPatternTest +{ + + /** + * Test of getCOSObject method, of class PDLineDashPattern. + */ + @Test + public void testGetCOSObject() + { + COSArray ar = new COSArray(); + ar.add(COSInteger.ONE); + ar.add(COSInteger.TWO); + PDLineDashPattern dash = new PDLineDashPattern(ar, 3); + COSArray dashBase = (COSArray) dash.getCOSObject(); + COSArray dashArray = (COSArray) dashBase.getObject(0); + assertEquals(2, dashBase.size()); + assertEquals(2, dashArray.size()); + assertEquals(new COSFloat(1), dashArray.get(0)); + assertEquals(new COSFloat(2), dashArray.get(1)); + assertEquals(COSInteger.THREE, dashBase.get(1)); + System.out.println(dash); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendModeTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendModeTest.java new file mode 100644 index 00000000000..0b20b26659a --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/blend/BlendModeTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License")); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.blend; + +import static junit.framework.TestCase.assertEquals; +import org.apache.pdfbox.cos.COSName; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class BlendModeTest +{ + public BlendModeTest() + { + } + + /** + * Check that BlendMode.* constant instances are not null. This could happen if the declaration + * sequence is changed. + */ + @Test + public void testInstances() + { + assertEquals(BlendMode.NORMAL, BlendMode.getInstance(COSName.NORMAL)); + assertEquals(BlendMode.NORMAL, BlendMode.getInstance(COSName.COMPATIBLE)); + assertEquals(BlendMode.MULTIPLY, BlendMode.getInstance(COSName.MULTIPLY)); + assertEquals(BlendMode.SCREEN, BlendMode.getInstance(COSName.SCREEN)); + assertEquals(BlendMode.OVERLAY, BlendMode.getInstance(COSName.OVERLAY)); + assertEquals(BlendMode.DARKEN, BlendMode.getInstance(COSName.DARKEN)); + assertEquals(BlendMode.LIGHTEN, BlendMode.getInstance(COSName.LIGHTEN)); + assertEquals(BlendMode.COLOR_DODGE, BlendMode.getInstance(COSName.COLOR_DODGE)); + assertEquals(BlendMode.COLOR_BURN, BlendMode.getInstance(COSName.COLOR_BURN)); + assertEquals(BlendMode.HARD_LIGHT, BlendMode.getInstance(COSName.HARD_LIGHT)); + assertEquals(BlendMode.SOFT_LIGHT, BlendMode.getInstance(COSName.SOFT_LIGHT)); + assertEquals(BlendMode.DIFFERENCE, BlendMode.getInstance(COSName.DIFFERENCE)); + assertEquals(BlendMode.EXCLUSION, BlendMode.getInstance(COSName.EXCLUSION)); + assertEquals(BlendMode.HUE, BlendMode.getInstance(COSName.HUE)); + assertEquals(BlendMode.SATURATION, BlendMode.getInstance(COSName.SATURATION)); + assertEquals(BlendMode.LUMINOSITY, BlendMode.getInstance(COSName.LUMINOSITY)); + assertEquals(BlendMode.COLOR, BlendMode.getInstance(COSName.COLOR)); + } + + /** + * Check that COSName constants returned for BlendMode.* instances are not null. This could + * happen if the declaration sequence is changed. + */ + @Test + public void testCOSNames() + { + assertEquals(COSName.NORMAL, BlendMode.getCOSName(BlendMode.NORMAL)); + assertEquals(COSName.NORMAL, BlendMode.getCOSName(BlendMode.COMPATIBLE)); + assertEquals(COSName.MULTIPLY, BlendMode.getCOSName(BlendMode.MULTIPLY)); + assertEquals(COSName.SCREEN, BlendMode.getCOSName(BlendMode.SCREEN)); + assertEquals(COSName.OVERLAY, BlendMode.getCOSName(BlendMode.OVERLAY)); + assertEquals(COSName.DARKEN, BlendMode.getCOSName(BlendMode.DARKEN)); + assertEquals(COSName.LIGHTEN, BlendMode.getCOSName(BlendMode.LIGHTEN)); + assertEquals(COSName.COLOR_DODGE, BlendMode.getCOSName(BlendMode.COLOR_DODGE)); + assertEquals(COSName.COLOR_BURN, BlendMode.getCOSName(BlendMode.COLOR_BURN)); + assertEquals(COSName.HARD_LIGHT, BlendMode.getCOSName(BlendMode.HARD_LIGHT)); + assertEquals(COSName.SOFT_LIGHT, BlendMode.getCOSName(BlendMode.SOFT_LIGHT)); + assertEquals(COSName.DIFFERENCE, BlendMode.getCOSName(BlendMode.DIFFERENCE)); + assertEquals(COSName.EXCLUSION, BlendMode.getCOSName(BlendMode.EXCLUSION)); + assertEquals(COSName.HUE, BlendMode.getCOSName(BlendMode.HUE)); + assertEquals(COSName.SATURATION, BlendMode.getCOSName(BlendMode.SATURATION)); + assertEquals(COSName.LUMINOSITY, BlendMode.getCOSName(BlendMode.LUMINOSITY)); + assertEquals(COSName.COLOR, BlendMode.getCOSName(BlendMode.COLOR)); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java index 9052f9a1542..bec1d0f1d1e 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java @@ -17,11 +17,17 @@ import java.awt.image.BufferedImage; import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; import javax.imageio.ImageIO; import javax.imageio.ImageReader; import javax.imageio.stream.ImageInputStream; import junit.framework.TestCase; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; @@ -31,6 +37,7 @@ import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.checkIdent; import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.validate; +import org.junit.Assert; /** * Unit tests for CCITTFactory @@ -81,7 +88,7 @@ public void testCreateFromRandomAccessSingle() throws IOException document.save(testResultsDir + "/singletiff.pdf"); document.close(); - document = PDDocument.load(new File(testResultsDir, "singletiff.pdf"), (String)null); + document = PDDocument.load(new File(testResultsDir, "singletiff.pdf")); assertEquals(2, document.getNumberOfPages()); document.close(); @@ -136,4 +143,128 @@ public void testCreateFromRandomAccessMulti() throws IOException document.close(); imageReader.dispose(); } + + public void testCreateFromBufferedImage() throws IOException + { + String tiffG4Path = "src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg4.tif"; + + PDDocument document = new PDDocument(); + BufferedImage bim = ImageIO.read(new File(tiffG4Path)); + PDImageXObject ximage3 = CCITTFactory.createFromImage(document, bim); + validate(ximage3, 1, 344, 287, "tiff", PDDeviceGray.INSTANCE.getName()); + checkIdent(bim, ximage3.getOpaqueImage()); + + PDPage page = new PDPage(PDRectangle.A4); + document.addPage(page); + PDPageContentStream contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, false); + contentStream.drawImage(ximage3, 0, 0, ximage3.getWidth(), ximage3.getHeight()); + contentStream.close(); + + document.save(testResultsDir + "/singletifffrombi.pdf"); + document.close(); + + document = PDDocument.load(new File(testResultsDir, "singletifffrombi.pdf")); + assertEquals(1, document.getNumberOfPages()); + + document.close(); + } + + public void testCreateFromBufferedChessImage() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage bim = new BufferedImage(343, 287, BufferedImage.TYPE_BYTE_BINARY); + Assert.assertNotEquals((bim.getWidth() / 8) * 8, bim.getWidth()); // not mult of 8 + int col = 0; + for (int x = 0; x < bim.getWidth(); ++x) + { + for (int y = 0; y < bim.getHeight(); ++y) + { + bim.setRGB(x, y, col & 0xFFFFFF); + col = ~col; + } + } + + PDImageXObject ximage3 = CCITTFactory.createFromImage(document, bim); + validate(ximage3, 1, 343, 287, "tiff", PDDeviceGray.INSTANCE.getName()); + checkIdent(bim, ximage3.getOpaqueImage()); + + PDPage page = new PDPage(PDRectangle.A4); + document.addPage(page); + PDPageContentStream contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, false); + contentStream.drawImage(ximage3, 0, 0, ximage3.getWidth(), ximage3.getHeight()); + contentStream.close(); + + document.save(testResultsDir + "/singletifffromchessbi.pdf"); + document.close(); + + document = PDDocument.load(new File(testResultsDir, "singletifffromchessbi.pdf")); + assertEquals(1, document.getNumberOfPages()); + + document.close(); + } + + /** + * Tests that CCITTFactory#createFromFile(PDDocument document, File file) doesn't lock the + * source file + */ + public void testCreateFromFileLock() throws IOException + { + // copy the source file to a temp directory, as we will be deleting it + String tiffG3Path = "src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3.tif"; + File copiedTiffFile = new File(testResultsDir, "ccittg3.tif"); + copyFile(new File(tiffG3Path), copiedTiffFile); + PDDocument document = new PDDocument(); + CCITTFactory.createFromFile(document, copiedTiffFile); + assertTrue(copiedTiffFile.delete()); + } + + /** + * Tests that CCITTFactory#createFromFile(PDDocument document, File file, int number) doesn't + * lock the source file + */ + public void testCreateFromFileNumberLock() throws IOException + { + // copy the source file to a temp directory, as we will be deleting it + String tiffG3Path = "src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3.tif"; + File copiedTiffFile = new File(testResultsDir, "ccittg3n.tif"); + copyFile(new File(tiffG3Path), copiedTiffFile); + PDDocument document = new PDDocument(); + CCITTFactory.createFromFile(document, copiedTiffFile, 0); + assertTrue(copiedTiffFile.delete()); + } + + private void copyFile(File source, File dest) throws IOException + { + InputStream is = null; + OutputStream os = null; + try + { + is = new FileInputStream(source); + os = new FileOutputStream(dest); + IOUtils.copy(is, os); + } + finally + { + is.close(); + os.close(); + } + } + + /** + * Tests that byte/short tag values are read correctly (ignoring possible garbage in remaining + * bytes). + */ + public void testByteShortPaddedWithGarbage() throws IOException + { + PDDocument document = new PDDocument(); + String basePath = "src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields"; + for (String ext : Arrays.asList(".tif", "-bigendian.tif")) + { + String tiffPath = basePath + ext; + PDImageXObject ximage3 = CCITTFactory.createFromFile(document, new File(tiffPath)); + validate(ximage3, 1, 344, 287, "tiff", PDDeviceGray.INSTANCE.getName()); + } + document.close(); + } + } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/JPEGFactoryTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/JPEGFactoryTest.java index 3c638b872e5..7c855732db6 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/JPEGFactoryTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/JPEGFactoryTest.java @@ -17,8 +17,10 @@ import java.awt.Graphics; import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; @@ -27,6 +29,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; @@ -66,6 +69,21 @@ public void testCreateFromStream() throws IOException checkJpegStream(testResultsDir, "jpegrgbstream.pdf", JPEGFactoryTest.class.getResourceAsStream("jpeg.jpg")); } + /* + * Tests JPEGFactory#createFromStream(PDDocument document, InputStream + * stream) with CMYK color JPEG file + */ + public void testCreateFromStreamCMYK() throws IOException + { + PDDocument document = new PDDocument(); + InputStream stream = JPEGFactoryTest.class.getResourceAsStream("jpegcmyk.jpg"); + PDImageXObject ximage = JPEGFactory.createFromStream(document, stream); + validate(ximage, 8, 343, 287, "jpg", PDDeviceCMYK.INSTANCE.getName()); + + doWritePDF(document, ximage, testResultsDir, "jpegcmykstream.pdf"); + checkJpegStream(testResultsDir, "jpegcmykstream.pdf", JPEGFactoryTest.class.getResourceAsStream("jpegcmyk.jpg")); + } + /** * Tests JPEGFactory#createFromStream(PDDocument document, InputStream * stream) with gray JPEG file @@ -199,6 +217,71 @@ public void testCreateFromImage4BYTE_ABGR() throws IOException doWritePDF(document, ximage, testResultsDir, "jpeg-4bargb.pdf"); } + /** + * Tests USHORT_555_RGB JPEGFactory#createFromImage(PDDocument document, BufferedImage + * image), see also PDFBOX-4674. + * @throws java.io.IOException + */ + public void testCreateFromImageUSHORT_555_RGB() throws IOException + { + // workaround Open JDK bug + // http://bugs.java.com/bugdatabase/view_bug.do?bug_id=7044758 + if (System.getProperty("java.runtime.name").equals("OpenJDK Runtime Environment") + && (System.getProperty("java.specification.version").equals("1.6") + || System.getProperty("java.specification.version").equals("1.7") + || System.getProperty("java.specification.version").equals("1.8"))) + { + return; + } + + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(JPEGFactoryTest.class.getResourceAsStream("jpeg.jpg")); + + // create an USHORT_555_RGB image + int width = image.getWidth(); + int height = image.getHeight(); + BufferedImage rgbImage = new BufferedImage(width, height, BufferedImage.TYPE_USHORT_555_RGB); + Graphics ag = rgbImage.getGraphics(); + ag.drawImage(image, 0, 0, null); + ag.dispose(); + + for (int x = 0; x < rgbImage.getWidth(); ++x) + { + for (int y = 0; y < rgbImage.getHeight(); ++y) + { + rgbImage.setRGB(x, y, (rgbImage.getRGB(x, y) & 0xFFFFFF) | ((y / 10 * 10) << 24)); + } + } + + PDImageXObject ximage = JPEGFactory.createFromImage(document, rgbImage); + validate(ximage, 8, width, height, "jpg", PDDeviceRGB.INSTANCE.getName()); + assertNull(ximage.getSoftMask()); + + doWritePDF(document, ximage, testResultsDir, "jpeg-ushort555rgb.pdf"); + } + + /** + * PDFBOX-5137 and PDFBOX-5196: check that numFrameComponents and not numScanComponents is used + * to determine the color space. + * + * @throws IOException + */ + public void testPDFBox5137() throws IOException + { + InputStream is = new FileInputStream("target/imgs/PDFBOX-5196-lotus.jpg"); + byte[] ba = IOUtils.toByteArray(is); + is.close(); + + PDDocument document = new PDDocument(); + + PDImageXObject ximage = JPEGFactory.createFromByteArray(document, ba); + + validate(ximage, 8, 500, 500, "jpg", PDDeviceRGB.INSTANCE.getName()); + + doWritePDF(document, ximage, testResultsDir, "PDFBOX-5196-lotus.pdf"); + checkJpegStream(testResultsDir, "PDFBOX-5196-lotus.pdf", new ByteArrayInputStream(ba)); + } + // check whether it is possible to extract the jpeg stream exactly // as it was passed to createFromStream private void checkJpegStream(File testResultsDir, String filename, InputStream resourceStream) diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryTest.java index ea632d68a4a..63d571cb441 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/LosslessFactoryTest.java @@ -19,10 +19,21 @@ import java.awt.Graphics; import java.awt.Graphics2D; import java.awt.GraphicsConfiguration; +import java.awt.Point; import java.awt.Transparency; +import java.awt.color.ColorSpace; +import java.awt.color.ICC_ColorSpace; +import java.awt.color.ICC_Profile; import java.awt.image.BufferedImage; +import java.awt.image.ColorConvertOp; +import java.awt.image.ColorModel; +import java.awt.image.ComponentColorModel; +import java.awt.image.DataBuffer; +import java.awt.image.Raster; +import java.awt.image.WritableRaster; import java.io.File; import java.io.IOException; +import java.util.Hashtable; import java.util.Random; import javax.imageio.ImageIO; import junit.framework.TestCase; @@ -37,6 +48,7 @@ import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.doWritePDF; import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.validate; import org.apache.pdfbox.rendering.PDFRenderer; +import org.junit.Assert; /** * Unit tests for LosslessFactory @@ -82,7 +94,7 @@ public void testCreateLosslessFromImageRGB() throws IOException BufferedImage bitonalImage = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); // avoid multiple of 8 to test padding - assertFalse(bitonalImage.getWidth() % 8 == 0); + Assert.assertNotEquals(0, bitonalImage.getWidth() % 8); g = bitonalImage.getGraphics(); g.drawImage(image, 0, 0, null); @@ -158,7 +170,7 @@ public void testCreateLosslessFromImageINT_ARGB() throws IOException */ public void testCreateLosslessFromImageBITMASK_INT_ARGB() throws IOException { - doBitmaskTransparencyTest(BufferedImage.TYPE_INT_ARGB, "bitmaskintargb.pdf"); + doBitmaskTransparencyTest(BufferedImage.TYPE_4BYTE_ABGR, "bitmaskintargb.pdf"); } /** @@ -221,6 +233,45 @@ public void testCreateLosslessFromImage4BYTE_ABGR() throws IOException doWritePDF(document, ximage, testResultsDir, "4babgr.pdf"); } + /** + * Tests USHORT_555_RGB LosslessFactoryTest#createFromImage(PDDocument document, BufferedImage + * image). This should create an 8-bit-image; prevent the problems from PDFBOX-4674 in case + * image creation is modified in the future. + * + * @throws java.io.IOException + */ + public void testCreateLosslessFromImageUSHORT_555_RGB() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png")); + + // create an USHORT_555_RGB image + int w = image.getWidth(); + int h = image.getHeight(); + BufferedImage rgbImage = new BufferedImage(w, h, BufferedImage.TYPE_USHORT_555_RGB); + Graphics ag = rgbImage.getGraphics(); + ag.drawImage(image, 0, 0, null); + ag.dispose(); + + for (int x = 0; x < rgbImage.getWidth(); ++x) + { + for (int y = 0; y < rgbImage.getHeight(); ++y) + { + rgbImage.setRGB(x, y, (rgbImage.getRGB(x, y) & 0xFFFFFF) | ((y / 10 * 10) << 24)); + } + } + + PDImageXObject ximage = LosslessFactory.createFromImage(document, rgbImage); + + validate(ximage, 8, w, h, "png", PDDeviceRGB.INSTANCE.getName()); + checkIdent(rgbImage, ximage.getImage()); + checkIdentRGB(rgbImage, ximage.getOpaqueImage()); + + assertNull(ximage.getSoftMask()); + + doWritePDF(document, ximage, testResultsDir, "ushort555rgb.pdf"); + } + /** * Tests LosslessFactoryTest#createFromImage(PDDocument document, * BufferedImage image) with transparent GIF @@ -249,6 +300,52 @@ public void testCreateLosslessFromTransparentGIF() throws IOException doWritePDF(document, ximage, testResultsDir, "gif.pdf"); } + /** + * Tests LosslessFactoryTest#createFromImage(PDDocument document, + * BufferedImage image) with a transparent 1 bit GIF. (PDFBOX-4672) + * This ends up as RGB because the 1 bit fast path doesn't support transparency. + * + * @throws java.io.IOException + */ + public void testCreateLosslessFromTransparent1BitGIF() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("gif-1bit-transparent.gif")); + + assertEquals(Transparency.BITMASK, image.getColorModel().getTransparency()); + assertEquals(BufferedImage.TYPE_BYTE_BINARY, image.getType()); + + PDImageXObject ximage = LosslessFactory.createFromImage(document, image); + + int w = image.getWidth(); + int h = image.getHeight(); + validate(ximage, 8, w, h, "png", PDDeviceRGB.INSTANCE.getName()); + checkIdent(image, ximage.getImage()); + checkIdentRGB(image, ximage.getOpaqueImage()); + + assertNotNull(ximage.getSoftMask()); + validate(ximage.getSoftMask(), 1, w, h, "png", PDDeviceGray.INSTANCE.getName()); + assertEquals(2, colorCount(ximage.getSoftMask().getImage())); + + doWritePDF(document, ximage, testResultsDir, "gif-1bit-transparent.pdf"); + } + + /** + * Test file that had a predictor encoding bug in PDFBOX-4184. + * + * @throws java.io.IOException + */ + public void testCreateLosslessFromGovdocs032163() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(new File("target/imgs", "PDFBOX-4184-032163.jpg")); + PDImageXObject ximage = LosslessFactory.createFromImage(document, image); + validate(ximage, 8, image.getWidth(), image.getHeight(), "png", PDDeviceRGB.INSTANCE.getName()); + checkIdent(image, ximage.getImage()); + + doWritePDF(document, ximage, testResultsDir, "PDFBOX-4184-032163.pdf"); + } + /** * Check whether the RGB part of images are identical. * @@ -276,6 +373,57 @@ private void checkIdentRGB(BufferedImage expectedImage, BufferedImage actualImag } } + /** + * Check whether the raw data of images are identical. + * @param expectedImage + * @param actualImage + */ + static void checkIdentRaw(BufferedImage expectedImage, PDImageXObject actualImage) + throws IOException + { + WritableRaster expectedRaster = expectedImage.getRaster(); + WritableRaster actualRaster = actualImage.getRawRaster(); + int w = expectedRaster.getWidth(); + int h = expectedRaster.getHeight(); + assertEquals(w, actualRaster.getWidth()); + assertEquals(h, actualRaster.getHeight()); + assertEquals(expectedRaster.getDataBuffer().getDataType(), actualRaster.getDataBuffer().getDataType()); + int numDataElements = expectedRaster.getNumDataElements(); + int numDataElementsToCompare; + if (expectedImage.getAlphaRaster() != null) + { + // We do not compare the alpha channel, as this is stored extra + numDataElementsToCompare = numDataElements - 1; + assertEquals(numDataElementsToCompare, actualRaster.getNumDataElements()); + } + else + { + numDataElementsToCompare = numDataElements; + assertEquals(numDataElements, actualRaster.getNumDataElements()); + } + int[] expectedData = new int[numDataElements]; + int[] actualData = new int[numDataElements]; + for (int y = 0; y < h; ++y) + { + for (int x = 0; x < w; ++x) + { + expectedRaster.getPixel(x, y, expectedData); + actualRaster.getPixel(x, y, actualData); + for (int i = 0; i < numDataElementsToCompare; i++) + { + int expectedValue = expectedData[i]; + int actualValue = actualData[i]; + if (expectedValue != actualValue) + { + String errMsg = String.format("(%d,%d) Channel %d %04X != %04X", x, y, i, expectedValue, + actualValue); + assertEquals(errMsg, expectedValue, actualValue); + } + } + } + } + } + private void doBitmaskTransparencyTest(int imageType, String pdfFilename) throws IOException { PDDocument document = new PDDocument(); @@ -342,8 +490,8 @@ private void doBitmaskTransparencyTest(int imageType, String pdfFilename) throws BufferedImage maskImage = ximage.getSoftMask().getImage(); // avoid multiple of 8 to test padding - assertFalse(maskImage.getWidth() % 8 == 0); - + Assert.assertNotEquals(0, maskImage.getWidth() % 8); + assertEquals(Transparency.OPAQUE, maskImage.getTransparency()); for (int x = 0; x < width; ++x) { @@ -385,4 +533,115 @@ private void doBitmaskTransparencyTest(int imageType, String pdfFilename) throws document.close(); } + + /** + * Test lossless encoding of CMYK images + */ + public void testCreateLosslessFromImageCMYK() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png")); + + final ColorSpace targetCS = new ICC_ColorSpace(ICC_Profile + .getInstance(this.getClass().getResourceAsStream("/org/apache/pdfbox/resources/icc/ISOcoated_v2_300_bas.icc"))); + ColorConvertOp op = new ColorConvertOp(image.getColorModel().getColorSpace(), targetCS, null); + BufferedImage imageCMYK = op.filter(image, null); + + PDImageXObject ximage = LosslessFactory.createFromImage(document, imageCMYK); + validate(ximage, 8, imageCMYK.getWidth(), imageCMYK.getHeight(), "png", "ICCBased"); + + doWritePDF(document, ximage, testResultsDir, "cmyk.pdf"); + + // still slight difference of 1 color level + //checkIdent(imageCMYK, ximage.getImage()); + } + + public void testCreateLosslessFrom16Bit() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png")); + + ColorSpace targetCS = ColorSpace.getInstance(ColorSpace.CS_sRGB); + int dataBufferType = DataBuffer.TYPE_USHORT; + final ColorModel colorModel = new ComponentColorModel(targetCS, false, false, + ColorModel.OPAQUE, dataBufferType); + WritableRaster targetRaster = Raster.createInterleavedRaster(dataBufferType, image.getWidth(), image.getHeight(), + targetCS.getNumComponents(), new Point(0, 0)); + BufferedImage img16Bit = new BufferedImage(colorModel, targetRaster, false, new Hashtable()); + ColorConvertOp op = new ColorConvertOp(image.getColorModel().getColorSpace(), targetCS, null); + op.filter(image, img16Bit); + + PDImageXObject ximage = LosslessFactory.createFromImage(document, img16Bit); + validate(ximage, 16, img16Bit.getWidth(), img16Bit.getHeight(), "png", PDDeviceRGB.INSTANCE.getName()); + checkIdent(image, ximage.getImage()); + doWritePDF(document, ximage, testResultsDir, "misc-16bit.pdf"); + } + + public void testCreateLosslessFromImageINT_BGR() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png")); + + BufferedImage imgBgr = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_BGR); + Graphics2D graphics = imgBgr.createGraphics(); + graphics.drawImage(image, 0, 0, null); + + PDImageXObject ximage = LosslessFactory.createFromImage(document, imgBgr); + validate(ximage, 8, imgBgr.getWidth(), imgBgr.getHeight(), "png", PDDeviceRGB.INSTANCE.getName()); + checkIdent(image, ximage.getImage()); + } + + public void testCreateLosslessFromImageINT_RGB() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png")); + + BufferedImage imgRgb = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_RGB); + Graphics2D graphics = imgRgb.createGraphics(); + graphics.drawImage(image, 0, 0, null); + + PDImageXObject ximage = LosslessFactory.createFromImage(document, imgRgb); + validate(ximage, 8, imgRgb.getWidth(), imgRgb.getHeight(), "png", PDDeviceRGB.INSTANCE.getName()); + checkIdent(image, ximage.getImage()); + } + + public void testCreateLosslessFromImageBYTE_3BGR() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(this.getClass().getResourceAsStream("png.png")); + + BufferedImage imgRgb = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_3BYTE_BGR); + Graphics2D graphics = imgRgb.createGraphics(); + graphics.drawImage(image, 0, 0, null); + + PDImageXObject ximage = LosslessFactory.createFromImage(document, imgRgb); + validate(ximage, 8, imgRgb.getWidth(), imgRgb.getHeight(), "png", PDDeviceRGB.INSTANCE.getName()); + checkIdent(image, ximage.getImage()); + } + + public void testCreateLosslessFrom16BitPNG() throws IOException + { + PDDocument document = new PDDocument(); + BufferedImage image = ImageIO.read(new File("target/imgs", "PDFBOX-4184-16bit.png")); + + assertEquals(64, image.getColorModel().getPixelSize()); + assertEquals(Transparency.TRANSLUCENT, image.getColorModel().getTransparency()); + assertEquals(4, image.getRaster().getNumDataElements()); + assertEquals(java.awt.image.DataBuffer.TYPE_USHORT, image.getRaster().getDataBuffer().getDataType()); + + PDImageXObject ximage = LosslessFactory.createFromImage(document, image); + + int w = image.getWidth(); + int h = image.getHeight(); + validate(ximage, 16, w, h, "png", PDDeviceRGB.INSTANCE.getName()); + checkIdent(image, ximage.getImage()); + checkIdentRGB(image, ximage.getOpaqueImage()); + checkIdentRaw(image, ximage); + + assertNotNull(ximage.getSoftMask()); + validate(ximage.getSoftMask(), 16, w, h, "png", PDDeviceGray.INSTANCE.getName()); + assertEquals(35, colorCount(ximage.getSoftMask().getImage())); + + doWritePDF(document, ximage, testResultsDir, "png16bit.pdf"); + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/PDImageXObjectTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/PDImageXObjectTest.java new file mode 100644 index 00000000000..93bea884263 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/PDImageXObjectTest.java @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.image; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.net.URISyntaxException; +import javax.imageio.ImageIO; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test that the convenience methods are really doing what's expected, and having the same as + * the more focused factory methods. + * + * @author Tilman Hausherr + */ +public class PDImageXObjectTest +{ + + public PDImageXObjectTest() + { + } + + /** + * Test of createFromFileByExtension method, of class PDImageXObject. + */ + @Test + public void testCreateFromFileByExtension() throws Exception + { + testCompareCreatedFileByExtensionWithCreatedByCCITTFactory("ccittg4.tif"); + + testCompareCreatedFileByExtensionWithCreatedByJPEGFactory("jpeg.jpg"); + testCompareCreatedFileByExtensionWithCreatedByJPEGFactory("jpegcmyk.jpg"); + + testCompareCreatedFileByExtensionWithCreatedByLosslessFactory("gif.gif"); + testCompareCreatedFileByExtensionWithCreatedByLosslessFactory("gif-1bit-transparent.gif"); + testCompareCreatedFileByExtensionWithCreatedByLosslessFactory("png_indexed_8bit_alpha.png"); + testCompareCreatedFileByExtensionWithCreatedByLosslessFactory("png.png"); + } + + /** + * Test of createFromFile method, of class PDImageXObject. + */ + @Test + public void testCreateFromFile() throws Exception + { + testCompareCreatedFileWithCreatedByCCITTFactory("ccittg4.tif"); + + testCompareCreatedFileWithCreatedByJPEGFactory("jpeg.jpg"); + testCompareCreatedFileWithCreatedByJPEGFactory("jpegcmyk.jpg"); + + testCompareCreatedFileWithCreatedByLosslessFactory("gif.gif"); + testCompareCreatedFileWithCreatedByLosslessFactory("gif-1bit-transparent.gif"); + testCompareCreatedFileWithCreatedByLosslessFactory("png_indexed_8bit_alpha.png"); + testCompareCreatedFileWithCreatedByLosslessFactory("png.png"); + } + + + /** + * Test of createFromFileByContent method, of class PDImageXObject. + */ + @Test + public void testCreateFromFileByContent() throws Exception + { + testCompareCreateByContentWithCreatedByCCITTFactory("ccittg4.tif"); + + testCompareCreatedByContentWithCreatedByJPEGFactory("jpeg.jpg"); + testCompareCreatedByContentWithCreatedByJPEGFactory("jpegcmyk.jpg"); + + testCompareCreatedByContentWithCreatedByLosslessFactory("gif.gif"); + testCompareCreatedByContentWithCreatedByLosslessFactory("gif-1bit-transparent.gif"); + testCompareCreatedByContentWithCreatedByLosslessFactory("png_indexed_8bit_alpha.png"); + testCompareCreatedByContentWithCreatedByLosslessFactory("png.png"); + } + + + /** + * Test of createFromByteArray method, of class PDImageXObject. + */ + @Test + public void testCreateFromByteArray() throws Exception + { + testCompareCreatedFromByteArrayWithCreatedByCCITTFactory("ccittg4.tif"); + + testCompareCreatedFromByteArrayWithCreatedByJPEGFactory("jpeg.jpg"); + testCompareCreatedFromByteArrayWithCreatedByJPEGFactory("jpegcmyk.jpg"); + + testCompareCreatedFromByteArrayWithCreatedByLosslessFactory("gif.gif"); + testCompareCreatedFromByteArrayWithCreatedByLosslessFactory("gif-1bit-transparent.gif"); + testCompareCreatedFromByteArrayWithCreatedByLosslessFactory("png_indexed_8bit_alpha.png"); + testCompareCreatedFromByteArrayWithCreatedByLosslessFactory("png.png"); + } + + private void testCompareCreatedFileByExtensionWithCreatedByLosslessFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFileByExtension(file, doc); + + BufferedImage bim = ImageIO.read(PDImageXObjectTest.class.getResourceAsStream(filename)); + PDImageXObject expectedImage = LosslessFactory.createFromImage(doc, bim); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedFileByExtensionWithCreatedByCCITTFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFileByExtension(file, doc); + + PDImageXObject expectedImage = CCITTFactory.createFromFile(doc, file); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedFileByExtensionWithCreatedByJPEGFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFileByExtension(file, doc); + + PDImageXObject expectedImage = JPEGFactory.createFromStream(doc, new FileInputStream(file)); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedFileWithCreatedByLosslessFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFile(file.getAbsolutePath(), doc); + + BufferedImage bim = ImageIO.read(PDImageXObjectTest.class.getResourceAsStream(filename)); + PDImageXObject expectedImage = LosslessFactory.createFromImage(doc, bim); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedFileWithCreatedByCCITTFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFile(file.getAbsolutePath(), doc); + + PDImageXObject expectedImage = CCITTFactory.createFromFile(doc, file); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedFileWithCreatedByJPEGFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFile(file.getAbsolutePath(), doc); + + PDImageXObject expectedImage = JPEGFactory.createFromStream(doc, new FileInputStream(file)); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedByContentWithCreatedByLosslessFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFileByContent(file, doc); + + BufferedImage bim = ImageIO.read(PDImageXObjectTest.class.getResourceAsStream(filename)); + PDImageXObject expectedImage = LosslessFactory.createFromImage(doc, bim); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreateByContentWithCreatedByCCITTFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFileByContent(file, doc); + + PDImageXObject expectedImage = CCITTFactory.createFromFile(doc, file); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedByContentWithCreatedByJPEGFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + PDImageXObject image = PDImageXObject.createFromFileByContent(file, doc); + + PDImageXObject expectedImage = JPEGFactory.createFromStream(doc, new FileInputStream(file)); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + + + + private void testCompareCreatedFromByteArrayWithCreatedByLosslessFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + byte[] byteArray = IOUtils.toByteArray(new FileInputStream(file)); + PDImageXObject image = PDImageXObject.createFromByteArray(doc, byteArray, null); + + BufferedImage bim = ImageIO.read(PDImageXObjectTest.class.getResourceAsStream(filename)); + PDImageXObject expectedImage = LosslessFactory.createFromImage(doc, bim); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedFromByteArrayWithCreatedByCCITTFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + byte[] byteArray = IOUtils.toByteArray(new FileInputStream(file)); + PDImageXObject image = PDImageXObject.createFromByteArray(doc, byteArray, null); + + PDImageXObject expectedImage = CCITTFactory.createFromFile(doc, file); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void testCompareCreatedFromByteArrayWithCreatedByJPEGFactory(String filename) + throws IOException, URISyntaxException + { + PDDocument doc = new PDDocument(); + File file = new File(PDImageXObjectTest.class.getResource(filename).toURI()); + byte[] byteArray = IOUtils.toByteArray(new FileInputStream(file)); + PDImageXObject image = PDImageXObject.createFromByteArray(doc, byteArray, null); + + PDImageXObject expectedImage = JPEGFactory.createFromStream(doc, new FileInputStream(file)); + + Assert.assertEquals(expectedImage.getSuffix(), image.getSuffix()); + checkIdentARGB(image.getImage(), expectedImage.getImage()); + + doc.close(); + } + + private void checkIdentARGB(BufferedImage expectedImage, BufferedImage actualImage) + { + String errMsg = ""; + + int w = expectedImage.getWidth(); + int h = expectedImage.getHeight(); + Assert.assertEquals(w, actualImage.getWidth()); + Assert.assertEquals(h, actualImage.getHeight()); + for (int y = 0; y < h; ++y) + { + for (int x = 0; x < w; ++x) + { + if (expectedImage.getRGB(x, y) != actualImage.getRGB(x, y)) + { + errMsg = String.format("(%d,%d) %06X != %06X", x, y, expectedImage.getRGB(x, y), actualImage.getRGB(x, y)); + } + Assert.assertEquals(errMsg, expectedImage.getRGB(x, y), actualImage.getRGB(x, y)); + } + } + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/PNGConverterTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/PNGConverterTest.java new file mode 100644 index 00000000000..8643e7f4e96 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/PNGConverterTest.java @@ -0,0 +1,389 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.image; + +import java.awt.Color; +import java.awt.color.ColorSpace; +import java.awt.color.ICC_ColorSpace; +import java.awt.color.ICC_Profile; +import java.awt.image.BufferedImage; +import java.awt.image.ColorModel; +import java.awt.image.ComponentColorModel; +import java.awt.image.WritableRaster; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Hashtable; +import javax.imageio.ImageIO; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.graphics.color.PDICCBased; +import org.apache.pdfbox.pdmodel.graphics.color.PDIndexed; + +import static org.apache.pdfbox.pdmodel.graphics.image.LosslessFactoryTest.checkIdentRaw; +import static org.apache.pdfbox.pdmodel.graphics.image.ValidateXImage.checkIdent; +import org.junit.Assert; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.junit.Before; +import org.junit.Test; + +public class PNGConverterTest +{ + + @Before + public void setup() + { + //noinspection ResultOfMethodCallIgnored + parentDir.mkdirs(); + } + + /** + * This "test" just dumps the list of constants for the PNGConverter CHUNK_??? types, so that + * it can just be copy&pasted into the PNGConverter class. + */ + //@Test + public void dumpChunkTypes() + { + final String[] chunkTypes = { "IHDR", "IDAT", "PLTE", "IEND", "tRNS", "cHRM", "gAMA", + "iCCP", "sBIT", "sRGB", "tEXt", "zTXt", "iTXt", "kBKG", "hIST", "pHYs", "sPLT", + "tIME" }; + + for (String chunkType : chunkTypes) + { + byte[] bytes = chunkType.getBytes(); + assertEquals(4, bytes.length); + System.out.println(String.format("\tprivate static final int CHUNK_" + chunkType + + " = 0x%02X%02X%02X%02X; // %s: %d %d %d %d", (int) bytes[0] & 0xFF, + (int) bytes[1] & 0xFF, (int) bytes[2] & 0xFF, (int) bytes[3] & 0xFF, chunkType, + (int) bytes[0] & 0xFF, (int) bytes[1] & 0xFF, (int) bytes[2] & 0xFF, + (int) bytes[3] & 0xFF)); + } + } + + @Test + public void testImageConversionRGB() throws IOException + { + checkImageConvert("png.png"); + } + + @Test + public void testImageConversionRGBGamma() throws IOException + { + checkImageConvert("png_rgb_gamma.png"); + } + + @Test + public void testImageConversionRGB16BitICC() throws IOException + { + checkImageConvert("png_rgb_romm_16bit.png"); + } + + @Test + public void testImageConversionRGBIndexed() throws IOException + { + checkImageConvert("png_indexed.png"); + } + + @Test + public void testImageConversionRGBIndexedAlpha1Bit() throws IOException + { + checkImageConvert("png_indexed_1bit_alpha.png"); + } + + @Test + public void testImageConversionRGBIndexedAlpha2Bit() throws IOException + { + checkImageConvert("png_indexed_2bit_alpha.png"); + } + + @Test + public void testImageConversionRGBIndexedAlpha4Bit() throws IOException + { + checkImageConvert("png_indexed_4bit_alpha.png"); + } + + @Test + public void testImageConversionRGBIndexedAlpha8Bit() throws IOException + { + checkImageConvert("png_indexed_8bit_alpha.png"); + } + + @Test + public void testImageConversionRGBAlpha() throws IOException + { + // We can't handle Alpha RGB + checkImageConvertFail("png_alpha_rgb.png"); + } + + @Test + public void testImageConversionGrayAlpha() throws IOException + { + // We can't handle Alpha RGB + checkImageConvertFail("png_alpha_gray.png"); + } + + @Test + public void testImageConversionGray() throws IOException + { + checkImageConvertFail("png_gray.png"); + } + + @Test + public void testImageConversionGrayGamma() throws IOException + { + checkImageConvertFail("png_gray_with_gama.png"); + } + + private final File parentDir = new File("target/test-output/graphics/graphics"); + + private void checkImageConvertFail(String name) throws IOException + { + PDDocument doc = new PDDocument(); + byte[] imageBytes = IOUtils.toByteArray(PNGConverterTest.class.getResourceAsStream(name)); + PDImageXObject pdImageXObject = PNGConverter.convertPNGImage(doc, imageBytes); + assertNull(pdImageXObject); + doc.close(); + } + + private void checkImageConvert(String name) throws IOException + { + PDDocument doc = new PDDocument(); + byte[] imageBytes = IOUtils.toByteArray(PNGConverterTest.class.getResourceAsStream(name)); + + PDImageXObject pdImageXObject = PNGConverter.convertPNGImage(doc, imageBytes); + assertNotNull(pdImageXObject); + + ICC_Profile imageProfile = null; + if (pdImageXObject.getColorSpace() instanceof PDICCBased) + { + // Make sure that ICC profile is a valid one + PDICCBased iccColorSpace = (PDICCBased) pdImageXObject.getColorSpace(); + imageProfile = ICC_Profile.getInstance(iccColorSpace.getPDStream().toByteArray()); + } + PDPage page = new PDPage(); + doc.addPage(page); + PDPageContentStream contentStream = new PDPageContentStream(doc, page); + contentStream.setNonStrokingColor(Color.PINK); + contentStream.addRect(0, 0, page.getCropBox().getWidth(), page.getCropBox().getHeight()); + contentStream.fill(); + + contentStream.drawImage(pdImageXObject, 0, 0, pdImageXObject.getWidth(), + pdImageXObject.getHeight()); + contentStream.close(); + doc.save(new File(parentDir, name + ".pdf")); + BufferedImage image = pdImageXObject.getImage(); + + assertNotNull(pdImageXObject.getRawRaster()); + + BufferedImage expectedImage = ImageIO.read(new ByteArrayInputStream(imageBytes)); + if (imageProfile != null && expectedImage.getColorModel().getColorSpace().isCS_sRGB()) + { + // The image has an embedded ICC Profile, but the default java PNG + // reader does not correctly read that. + expectedImage = getImageWithProfileData(expectedImage, imageProfile); + } + + checkIdent(expectedImage, image); + + BufferedImage rawImage = pdImageXObject.getRawImage(); + if (rawImage != null) + { + assertEquals(rawImage.getWidth(), pdImageXObject.getWidth()); + assertEquals(rawImage.getHeight(), pdImageXObject.getHeight()); + // We compare the raw data + checkIdentRaw(expectedImage, pdImageXObject); + } + + doc.close(); + } + + public static BufferedImage getImageWithProfileData(BufferedImage sourceImage, + ICC_Profile realProfile) + { + Hashtable properties = new Hashtable(); + String[] propertyNames = sourceImage.getPropertyNames(); + if (propertyNames != null) + { + for (String propertyName : propertyNames) + { + properties.put(propertyName, sourceImage.getProperty(propertyName)); + } + } + ComponentColorModel oldColorModel = (ComponentColorModel) sourceImage.getColorModel(); + boolean hasAlpha = oldColorModel.hasAlpha(); + int transparency = oldColorModel.getTransparency(); + boolean alphaPremultiplied = oldColorModel.isAlphaPremultiplied(); + WritableRaster raster = sourceImage.getRaster(); + int dataType = raster.getDataBuffer().getDataType(); + int[] componentSize = oldColorModel.getComponentSize(); + final ColorModel colorModel = new ComponentColorModel(new ICC_ColorSpace(realProfile), + componentSize, hasAlpha, alphaPremultiplied, transparency, dataType); + return new BufferedImage(colorModel, raster, sourceImage.isAlphaPremultiplied(), + properties); + } + + @Test + public void testCheckConverterState() + { + assertFalse(PNGConverter.checkConverterState(null)); + PNGConverter.PNGConverterState state = new PNGConverter.PNGConverterState(); + assertFalse(PNGConverter.checkConverterState(state)); + + PNGConverter.Chunk invalidChunk = new PNGConverter.Chunk(); + invalidChunk.bytes = new byte[0]; + assertFalse(PNGConverter.checkChunkSane(invalidChunk)); + + // Valid Dummy Chunk + PNGConverter.Chunk validChunk = new PNGConverter.Chunk(); + validChunk.bytes = new byte[16]; + validChunk.start = 4; + validChunk.length = 8; + validChunk.crc = 2077607535; + assertTrue(PNGConverter.checkChunkSane(validChunk)); + + state.IHDR = invalidChunk; + assertFalse(PNGConverter.checkConverterState(state)); + state.IDATs = Collections.singletonList(validChunk); + assertFalse(PNGConverter.checkConverterState(state)); + state.IHDR = validChunk; + assertTrue(PNGConverter.checkConverterState(state)); + state.IDATs = new ArrayList(); + assertFalse(PNGConverter.checkConverterState(state)); + state.IDATs = Collections.singletonList(validChunk); + assertTrue(PNGConverter.checkConverterState(state)); + + state.PLTE = invalidChunk; + assertFalse(PNGConverter.checkConverterState(state)); + state.PLTE = validChunk; + assertTrue(PNGConverter.checkConverterState(state)); + + state.cHRM = invalidChunk; + assertFalse(PNGConverter.checkConverterState(state)); + state.cHRM = validChunk; + assertTrue(PNGConverter.checkConverterState(state)); + + state.tRNS = invalidChunk; + assertFalse(PNGConverter.checkConverterState(state)); + state.tRNS = validChunk; + assertTrue(PNGConverter.checkConverterState(state)); + + state.iCCP = invalidChunk; + assertFalse(PNGConverter.checkConverterState(state)); + state.iCCP = validChunk; + assertTrue(PNGConverter.checkConverterState(state)); + + state.sRGB = invalidChunk; + assertFalse(PNGConverter.checkConverterState(state)); + state.sRGB = validChunk; + assertTrue(PNGConverter.checkConverterState(state)); + + state.gAMA = invalidChunk; + assertFalse(PNGConverter.checkConverterState(state)); + state.gAMA = validChunk; + assertTrue(PNGConverter.checkConverterState(state)); + + state.IDATs = Arrays.asList(validChunk, invalidChunk); + assertFalse(PNGConverter.checkConverterState(state)); + } + + @Test + public void testChunkSane() + { + PNGConverter.Chunk chunk = new PNGConverter.Chunk(); + assertTrue(PNGConverter.checkChunkSane(null)); + chunk.bytes = "IHDRsomedummyvaluesDummyValuesAtEnd".getBytes(); + chunk.length = 19; + assertEquals(35, chunk.bytes.length); + + assertEquals("IHDRsomedummyvalues", new String(chunk.getData())); + + assertFalse(PNGConverter.checkChunkSane(chunk)); + chunk.start = 4; + assertEquals("somedummyvaluesDumm", new String(chunk.getData())); + assertFalse(PNGConverter.checkChunkSane(chunk)); + chunk.crc = -1729802258; + assertTrue(PNGConverter.checkChunkSane(chunk)); + chunk.start = 6; + assertFalse(PNGConverter.checkChunkSane(chunk)); + chunk.length = 60; + assertFalse(PNGConverter.checkChunkSane(chunk)); + } + + @Test + public void testCRCImpl() + { + byte[] b1 = "Hello World!".getBytes(); + assertEquals(472456355, PNGConverter.crc(b1, 0, b1.length)); + assertEquals(-632335482, PNGConverter.crc(b1, 2, b1.length - 4)); + } + + @Test + public void testMapPNGRenderIntent() + { + assertEquals(COSName.PERCEPTUAL, PNGConverter.mapPNGRenderIntent(0)); + assertEquals(COSName.RELATIVE_COLORIMETRIC, PNGConverter.mapPNGRenderIntent(1)); + assertEquals(COSName.SATURATION, PNGConverter.mapPNGRenderIntent(2)); + assertEquals(COSName.ABSOLUTE_COLORIMETRIC, PNGConverter.mapPNGRenderIntent(3)); + assertNull(PNGConverter.mapPNGRenderIntent(-1)); + assertNull(PNGConverter.mapPNGRenderIntent(4)); + } + + /** + * Test code coverage for /Intent /Perceptual and for sRGB icc profile in indexed colorspace. + * + * @throws IOException + */ + @Test + public void testImageConversionIntentIndexed() throws IOException + { + checkImageConvert("929316.png"); + + PDDocument doc = new PDDocument(); + + byte[] imageBytes = IOUtils.toByteArray(PNGConverterTest.class.getResourceAsStream("929316.png")); + PDImageXObject pdImageXObject = PNGConverter.convertPNGImage(doc, imageBytes); + assertEquals(COSName.PERCEPTUAL, pdImageXObject.getCOSObject().getItem(COSName.INTENT)); + + // Check that this image gets an indexed colorspace with sRGB ICC based colorspace + PDIndexed indexedColorspace = (PDIndexed) pdImageXObject.getColorSpace(); + + PDICCBased iccColorspace = (PDICCBased) indexedColorspace.getBaseColorSpace(); + // validity of ICC CS is tested in checkImageConvert + + // should be an sRGB profile. Or at least, the data that is in ColorSpace.CS_sRGB and + // that was assigned in PNGConvert. + // (PDICCBased.is_sRGB() fails in openjdk on that data, maybe it is not a "real" sRGB) + ICC_Profile rgbProfile = ICC_Profile.getInstance(ColorSpace.CS_sRGB); + byte[] sRGB_bytes = rgbProfile.getData(); + Assert.assertArrayEquals(sRGB_bytes, iccColorspace.getPDStream().toByteArray()); + + doc.close(); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/ValidateXImage.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/ValidateXImage.java index b7c057a02c6..e6b0a0203e7 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/ValidateXImage.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/ValidateXImage.java @@ -15,13 +15,24 @@ */ package org.apache.pdfbox.pdmodel.graphics.image; +import java.awt.Point; +import java.awt.color.ColorSpace; import java.awt.image.BufferedImage; -import java.io.ByteArrayOutputStream; +import java.awt.image.ColorConvertOp; +import java.awt.image.DataBuffer; +import java.awt.image.DirectColorModel; +import java.awt.image.Raster; +import java.awt.image.WritableRaster; import java.io.File; import java.io.IOException; +import java.io.OutputStream; import java.util.HashSet; +import java.util.Hashtable; import java.util.Set; import javax.imageio.ImageIO; +import javax.imageio.ImageWriter; +import javax.imageio.spi.ImageWriterSpi; +import static junit.framework.TestCase.assertEquals; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDDocument; @@ -30,7 +41,6 @@ import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; -import static junit.framework.TestCase.assertEquals; import static junit.framework.TestCase.assertNotNull; import static junit.framework.TestCase.assertTrue; @@ -60,14 +70,45 @@ public static void validate(PDImageXObject ximage, int bpc, int width, int heigh assertNotNull(ximage.getImage()); assertEquals(ximage.getWidth(), ximage.getImage().getWidth()); assertEquals(ximage.getHeight(), ximage.getImage().getHeight()); + WritableRaster rawRaster = ximage.getRawRaster(); + assertNotNull(rawRaster); + assertEquals(rawRaster.getWidth(), ximage.getWidth()); + assertEquals(rawRaster.getHeight(), ximage.getHeight()); + if (colorSpaceName.equals("ICCBased")) + { + BufferedImage rawImage = ximage.getRawImage(); + assertNotNull(rawImage); + assertEquals(rawImage.getWidth(), ximage.getWidth()); + assertEquals(rawImage.getHeight(), ximage.getHeight()); + } - boolean writeOk = ImageIO.write(ximage.getImage(), - format, new ByteArrayOutputStream()); - assertTrue(writeOk); - writeOk = ImageIO.write(ximage.getOpaqueImage(), - format, new ByteArrayOutputStream()); + boolean canEncode = true; + boolean writeOk; + // jdk11+ no longer encodes ARGB jpg + // https://bugs.openjdk.java.net/browse/JDK-8211748 + if ("jpg".equals(format) && + ximage.getImage().getType() == BufferedImage.TYPE_INT_ARGB) + { + ImageWriter writer = ImageIO.getImageWritersBySuffix(format).next(); + ImageWriterSpi originatingProvider = writer.getOriginatingProvider(); + canEncode = originatingProvider.canEncodeImage(ximage.getImage()); + } + if (canEncode) + { + writeOk = ImageIO.write(ximage.getImage(), format, new NullOutputStream()); + assertTrue(writeOk); + } + writeOk = ImageIO.write(ximage.getOpaqueImage(), format, new NullOutputStream()); assertTrue(writeOk); } + + private static class NullOutputStream extends OutputStream + { + @Override + public void write(int b) throws IOException + { + } + } static int colorCount(BufferedImage bim) { @@ -133,6 +174,9 @@ public static void checkIdent(BufferedImage expectedImage, BufferedImage actualI { String errMsg = ""; + expectedImage = convertToSRGB(expectedImage); + actualImage = convertToSRGB(actualImage); + int w = expectedImage.getWidth(); int h = expectedImage.getHeight(); assertEquals(w, actualImage.getWidth()); @@ -143,13 +187,79 @@ public static void checkIdent(BufferedImage expectedImage, BufferedImage actualI { if (expectedImage.getRGB(x, y) != actualImage.getRGB(x, y)) { - errMsg = String.format("(%d,%d) %08X != %08X", x, y, expectedImage.getRGB(x, y), actualImage.getRGB(x, y)); + errMsg = String.format("(%d,%d) expected: <%08X> but was: <%08X>; ", x, y, expectedImage.getRGB(x, y), actualImage.getRGB(x, y)); } assertEquals(errMsg, expectedImage.getRGB(x, y), actualImage.getRGB(x, y)); } } } - + public static BufferedImage convertToSRGB(BufferedImage image) + { + // The image is already sRGB - we don't need to do anything + if (image.getColorModel().getColorSpace().isCS_sRGB()) + { + return image; + } + // 16-Bit images need to converted to 8 bit first, to avoid rounding differences + if (image.getRaster().getDataBuffer().getDataType() == DataBuffer.TYPE_USHORT) + { + final int width = image.getWidth(); + final boolean hasAlpha = image.getColorModel().hasAlpha(); + + final DirectColorModel colorModel = new DirectColorModel( + image.getColorModel().getColorSpace(), 32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000, + false, DataBuffer.TYPE_INT); + WritableRaster targetRaster = Raster + .createPackedRaster(DataBuffer.TYPE_INT, image.getWidth(), image.getHeight(), + colorModel.getMasks(), new Point(0, 0)); + + BufferedImage image8Bit = new BufferedImage(colorModel, targetRaster, false, + new Hashtable()); + + WritableRaster sourceRaster = image.getRaster(); + + final int numShortPixelElements = hasAlpha ? 3 : 4; + // 3 or 4 short per pixel + short[] pixelShort = new short[numShortPixelElements * width]; + // Packed RGB + int[] pixelInt = new int[width]; + for (int y = 0; y < image.getHeight(); y++) + { + sourceRaster.getDataElements(0, y, width, 1, pixelShort); + int ptrShort = 0; + for (int x = 0; x < width; x++) + { + int r = pixelShort[ptrShort++] & 0xFFFF; + int g = pixelShort[ptrShort++] & 0xFFFF; + int b = pixelShort[ptrShort++] & 0xFFFF; + if (hasAlpha) + ptrShort++; + + // We devide using a float exactly the same way as SampledImageReader + // to get from 16 bit to 8 bit sample values + int r8bit = convert16To8Bit(r); + int g8bit = convert16To8Bit(g); + int b8bit = convert16To8Bit(b); + int v = r8bit | (g8bit << 8) | (b8bit << 16) | 0xFF000000; + pixelInt[x] = v; + } + targetRaster.setDataElements(0, y, width, 1, pixelInt); + + } + image = image8Bit; + + } + BufferedImage destination = new BufferedImage(image.getWidth(), image.getHeight(), + BufferedImage.TYPE_INT_RGB); + ColorConvertOp op = new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_sRGB), null); + return op.filter(image, destination); + } + + private static int convert16To8Bit(int v) + { + float output = (float) v / (float) 0xFFFF; + return Math.round(output * 0xFF); + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/TestOptionalContentGroups.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/TestOptionalContentGroups.java index 23186265f4d..de7cdb7d2d7 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/TestOptionalContentGroups.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/optionalcontent/TestOptionalContentGroups.java @@ -17,12 +17,20 @@ package org.apache.pdfbox.pdmodel.graphics.optionalcontent; import java.awt.Color; +import java.awt.image.BufferedImage; +import java.awt.image.DataBufferInt; import java.io.File; +import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.HashSet; +import java.util.List; import java.util.Set; +import javax.imageio.ImageIO; import junit.framework.TestCase; +import static junit.framework.TestCase.assertFalse; +import static junit.framework.TestCase.assertTrue; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; @@ -31,16 +39,22 @@ import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; +import org.apache.pdfbox.pdmodel.PageMode; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties.BaseState; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.text.PDFMarkedContentExtractor; +import org.apache.pdfbox.text.TextPosition; +import org.junit.Assert; /** * Tests optional content group functionality (also called layers). */ public class TestOptionalContentGroups extends TestCase { - private File testResultsDir = new File("target/test-output"); + private final File testResultsDir = new File("target/test-output"); @Override protected void setUp() throws Exception @@ -188,8 +202,119 @@ public void testOCGConsumption() throws Exception assertNull(ocgs.getGroup("inexistent")); Collection coll = ocgs.getOptionalContentGroups(); - coll.contains(background); + assertEquals(3, coll.size()); + Set nameSet = new HashSet(); + for (PDOptionalContentGroup ocg2 : coll) + { + nameSet.add(ocg2.getName()); + } + assertTrue(nameSet.contains("background")); + assertTrue(nameSet.contains("enabled")); + assertTrue(nameSet.contains("disabled")); + + PDFMarkedContentExtractor extractor = new PDFMarkedContentExtractor(); + extractor.processPage(page); + List markedContents = extractor.getMarkedContents(); + assertEquals("oc1", markedContents.get(0).getTag()); + assertEquals("PDF 1.5: Optional Content Groups" + + "You should see a green textline, but no red text line.", + textPositionListToString(markedContents.get(0).getContents())); + assertEquals("oc2", markedContents.get(1).getTag()); + assertEquals("This is from an enabled layer. If you see this, that's good.", + textPositionListToString(markedContents.get(1).getContents())); + assertEquals("oc3", markedContents.get(2).getTag()); + assertEquals("This is from a disabled layer. If you see this, that's NOT good!", + textPositionListToString(markedContents.get(2).getContents())); + } + finally + { + doc.close(); + } + } + + /** + * Convert a list of TextPosition objects to a string. + * + * @param contents list of TextPosition objects. + * @return + */ + private String textPositionListToString(List contents) + { + StringBuilder sb = new StringBuilder(); + for (Object o : contents) + { + TextPosition tp = (TextPosition) o; + sb.append(tp.getUnicode()); + } + return sb.toString(); + } + + public void testOCGsWithSameNameCanHaveDifferentVisibility() throws Exception + { + PDDocument doc = new PDDocument(); + try + { + //Create new page + PDPage page = new PDPage(); + doc.addPage(page); + PDResources resources = page.getResources(); + if( resources == null ) + { + resources = new PDResources(); + page.setResources( resources ); + } + + //Prepare OCG functionality + PDOptionalContentProperties ocprops = new PDOptionalContentProperties(); + doc.getDocumentCatalog().setOCProperties(ocprops); + //ocprops.setBaseState(BaseState.ON); //ON=default + + //Create visible OCG + PDOptionalContentGroup visible = new PDOptionalContentGroup("layer"); + ocprops.addGroup(visible); + assertTrue(ocprops.isGroupEnabled(visible)); + + //Create invisible OCG + PDOptionalContentGroup invisible = new PDOptionalContentGroup("layer"); + ocprops.addGroup(invisible); + assertFalse(ocprops.setGroupEnabled(invisible, false)); + assertFalse(ocprops.isGroupEnabled(invisible)); + + //Check that visible layer is still visible + assertTrue(ocprops.isGroupEnabled(visible)); + + //Setup page content stream and paint background/title + PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.OVERWRITE, false); + PDFont font = PDType1Font.HELVETICA_BOLD; + contentStream.beginMarkedContent(COSName.OC, visible); + contentStream.beginText(); + contentStream.setFont(font, 14); + contentStream.newLineAtOffset(80, 700); + contentStream.showText("PDF 1.5: Optional Content Groups"); + contentStream.endText(); + font = PDType1Font.HELVETICA; + contentStream.beginText(); + contentStream.setFont(font, 12); + contentStream.newLineAtOffset(80, 680); + contentStream.showText("You should see this text, but no red text line."); + contentStream.endText(); + contentStream.endMarkedContent(); + + //Paint disabled layer + contentStream.beginMarkedContent(COSName.OC, invisible); + contentStream.setNonStrokingColor(Color.RED); + contentStream.beginText(); + contentStream.setFont(font, 12); + contentStream.newLineAtOffset(80, 500); + contentStream.showText( + "This is from a disabled layer. If you see this, that's NOT good!"); + contentStream.endText(); + contentStream.endMarkedContent(); + + contentStream.close(); + File targetFile = new File(testResultsDir, "ocg-generation-same-name.pdf"); + doc.save(targetFile.getAbsolutePath()); } finally { @@ -197,4 +322,165 @@ public void testOCGConsumption() throws Exception } } + /** + * PDFBOX-4496: setGroupEnabled(String, boolean) must catch all OCGs of a name even when several + * names are identical. + * + * @throws IOException + */ + public void testOCGGenerationSameNameCanHaveSameVisibilityOff() throws IOException + { + BufferedImage expectedImage; + BufferedImage actualImage; + + PDDocument doc = new PDDocument(); + try + { + //Create new page + PDPage page = new PDPage(); + doc.addPage(page); + PDResources resources = page.getResources(); + if (resources == null) + { + resources = new PDResources(); + page.setResources(resources); + } + + //Prepare OCG functionality + PDOptionalContentProperties ocprops = new PDOptionalContentProperties(); + doc.getDocumentCatalog().setOCProperties(ocprops); + //ocprops.setBaseState(BaseState.ON); //ON=default + + //Create OCG for background + PDOptionalContentGroup background = new PDOptionalContentGroup("background"); + ocprops.addGroup(background); + assertTrue(ocprops.isGroupEnabled("background")); + + //Create OCG for enabled + PDOptionalContentGroup enabled = new PDOptionalContentGroup("science"); + ocprops.addGroup(enabled); + assertFalse(ocprops.setGroupEnabled("science", true)); + assertTrue(ocprops.isGroupEnabled("science")); + + //Create OCG for disabled1 + PDOptionalContentGroup disabled1 = new PDOptionalContentGroup("alternative"); + ocprops.addGroup(disabled1); + + //Create OCG for disabled2 with same name as disabled1 + PDOptionalContentGroup disabled2 = new PDOptionalContentGroup("alternative"); + ocprops.addGroup(disabled2); + + assertFalse(ocprops.setGroupEnabled("alternative", false)); + assertFalse(ocprops.isGroupEnabled("alternative")); + + //Setup page content stream and paint background/title + PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.OVERWRITE, false); + PDFont font = PDType1Font.HELVETICA_BOLD; + contentStream.beginMarkedContent(COSName.OC, background); + contentStream.beginText(); + contentStream.setFont(font, 14); + contentStream.newLineAtOffset(80, 700); + contentStream.showText("PDF 1.5: Optional Content Groups"); + contentStream.endText(); + contentStream.endMarkedContent(); + + font = PDType1Font.HELVETICA; + + //Paint enabled layer + contentStream.beginMarkedContent(COSName.OC, enabled); + contentStream.setNonStrokingColor(Color.GREEN); + contentStream.beginText(); + contentStream.setFont(font, 12); + contentStream.newLineAtOffset(80, 600); + contentStream.showText("The earth is a sphere"); + contentStream.endText(); + contentStream.endMarkedContent(); + + //Paint disabled layer1 + contentStream.beginMarkedContent(COSName.OC, disabled1); + contentStream.setNonStrokingColor(Color.RED); + contentStream.beginText(); + contentStream.setFont(font, 12); + contentStream.newLineAtOffset(80, 500); + contentStream.showText("Alternative 1: The earth is a flat circle"); + contentStream.endText(); + contentStream.endMarkedContent(); + + //Paint disabled layer2 + contentStream.beginMarkedContent(COSName.OC, disabled2); + contentStream.setNonStrokingColor(Color.BLUE); + contentStream.beginText(); + contentStream.setFont(font, 12); + contentStream.newLineAtOffset(80, 450); + contentStream.showText("Alternative 2: The earth is a flat parallelogram"); + contentStream.endText(); + contentStream.endMarkedContent(); + + contentStream.close(); + + doc.getDocumentCatalog().setPageMode(PageMode.USE_OPTIONAL_CONTENT); + + File targetFile = new File(testResultsDir, "ocg-generation-same-name-off.pdf"); + doc.save(targetFile.getAbsolutePath()); + doc.close(); + + // render PDF with science disabled and alternatives with same name enabled + doc = PDDocument.load(new File(testResultsDir, "ocg-generation-same-name-off.pdf")); + doc.getDocumentCatalog().getOCProperties().setGroupEnabled("background", false); + doc.getDocumentCatalog().getOCProperties().setGroupEnabled("science", false); + doc.getDocumentCatalog().getOCProperties().setGroupEnabled("alternative", true); + actualImage = new PDFRenderer(doc).renderImage(0, 2); + ImageIO.write(actualImage, "png", new File(testResultsDir, "ocg-generation-same-name-off-actual.png")); + } + finally + { + doc.close(); + } + + // create PDF without OCGs to created expected rendering + PDDocument doc2 = new PDDocument(); + try + { + //Create new page + PDPage page = new PDPage(); + doc2.addPage(page); + PDResources resources = page.getResources(); + if (resources == null) + { + resources = new PDResources(); + page.setResources(resources); + } + + PDPageContentStream contentStream = new PDPageContentStream(doc2, page, AppendMode.OVERWRITE, false); + PDFont font = PDType1Font.HELVETICA; + + contentStream.setNonStrokingColor(Color.RED); + contentStream.beginText(); + contentStream.setFont(font, 12); + contentStream.newLineAtOffset(80, 500); + contentStream.showText("Alternative 1: The earth is a flat circle"); + contentStream.endText(); + + contentStream.setNonStrokingColor(Color.BLUE); + contentStream.beginText(); + contentStream.setFont(font, 12); + contentStream.newLineAtOffset(80, 450); + contentStream.showText("Alternative 2: The earth is a flat parallelogram"); + contentStream.endText(); + + contentStream.close(); + + expectedImage = new PDFRenderer(doc2).renderImage(0, 2); + ImageIO.write(expectedImage, "png", new File(testResultsDir, "ocg-generation-same-name-off-expected.png")); + } + finally + { + doc2.close(); + } + + // compare images + DataBufferInt expectedData = (DataBufferInt) expectedImage.getRaster().getDataBuffer(); + DataBufferInt actualData = (DataBufferInt) actualImage.getRaster().getDataBuffer(); + Assert.assertArrayEquals(expectedData.getData(), actualData.getData()); + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingIntentTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingIntentTest.java new file mode 100644 index 00000000000..e4bf57c7e4e --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/state/RenderingIntentTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.graphics.state; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class RenderingIntentTest +{ + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void fromStringInputNotNullOutputNotNull() + { + // Arrange + final String value = "AbsoluteColorimetric"; + + // Act + final RenderingIntent retval = RenderingIntent.fromString(value); + + // Assert result + Assert.assertEquals(RenderingIntent.ABSOLUTE_COLORIMETRIC, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull2() + { + // Arrange + final String value = "RelativeColorimetric"; + + // Act + final RenderingIntent retval = RenderingIntent.fromString(value); + + // Assert result + Assert.assertEquals(RenderingIntent.RELATIVE_COLORIMETRIC, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull3() + { + // Arrange + final String value = "Perceptual"; + + // Act + final RenderingIntent retval = RenderingIntent.fromString(value); + + // Assert result + Assert.assertEquals(RenderingIntent.PERCEPTUAL, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull4() + { + // Arrange + final String value = "Saturation"; + + // Act + final RenderingIntent retval = RenderingIntent.fromString(value); + + // Assert result + Assert.assertEquals(RenderingIntent.SATURATION, retval); + } + + @Test + public void fromStringInputNotNullOutputNotNull5() + { + // Arrange + final String value = ""; + + // Act + final RenderingIntent retval = RenderingIntent.fromString(value); + + // Assert result + Assert.assertEquals(RenderingIntent.RELATIVE_COLORIMETRIC, retval); + } + + @Test + public void stringValueOutputNotNull() + { + // Arrange + final RenderingIntent objectUnderTest = RenderingIntent.ABSOLUTE_COLORIMETRIC; + + // Act + final String retval = objectUnderTest.stringValue(); + + // Assert result + Assert.assertEquals("AbsoluteColorimetric", retval); + } + + @Test + public void testIsFill() + { + // Arrange + final RenderingMode objectUnderTest = RenderingMode.FILL; + + // Act + final boolean retval = objectUnderTest.isFill(); + + // Assert result + Assert.assertEquals(true, retval); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java new file mode 100644 index 00000000000..395ee3f9183 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java @@ -0,0 +1,84 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.action; + +import java.io.IOException; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSString; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class PDActionURITest +{ + /** + * PDFBOX-3913: Check that URIs encoded in UTF-8 are also supported. + * PDFBOX-3946: Check that there is no NPE if URI missing. + */ + @Test + public void testUTF8URI() + { + PDActionURI actionURI = new PDActionURI(); + assertNull(actionURI.getURI()); + actionURI.setURI("http://組匶替綎.com/"); + assertEquals("http://経営承継.com/", actionURI.getURI()); + } + + /** + * PDFBOX-3913: Check that URIs encoded in UTF16 (BE) are also supported. + * + * @throws IOException + */ + @Test + public void testUTF16BEURI() throws IOException + { + PDActionURI actionURI = new PDActionURI(); + + // found in govdocs file 534948.pdf + COSString utf16URI = COSString.parseHex("FEFF0068007400740070003A002F002F00770077" + + "0077002E006E00610070002E006500640075002F0063006100740061006C006F006700" + + "2F00310031003100340030002E00680074006D006C"); + actionURI.getCOSObject().setItem(COSName.URI, utf16URI); + assertEquals("http://www.nap.edu/catalog/11140.html", actionURI.getURI()); + } + + /** + * PDFBOX-3913: Check that URIs encoded in UTF16 (LE) are also supported. + * + * @throws IOException + */ + @Test + public void testUTF16LEURI() throws IOException + { + PDActionURI actionURI = new PDActionURI(); + + COSString utf16URI = COSString.parseHex("FFFE68007400740070003A00"); + actionURI.getCOSObject().setItem(COSName.URI, utf16URI); + assertEquals("http:", actionURI.getURI()); + } + + @Test + public void testUTF7URI() + { + PDActionURI actionURI = new PDActionURI(); + actionURI.setURI("http://pdfbox.apache.org/"); + assertEquals("http://pdfbox.apache.org/", actionURI.getURI()); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotationTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotationTest.java new file mode 100644 index 00000000000..52a0eb31f24 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotationTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.form; + +import java.io.File; +import java.io.IOException; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.TestPDFToImage; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class AcroFormsRotationTest +{ + + private static final File OUT_DIR = new File("target/test-output"); + private static final File IN_DIR = new File("src/test/resources/org/apache/pdfbox/pdmodel/interactive/form"); + private static final String NAME_OF_PDF = "AcroFormsRotation.pdf"; + private static final String TEST_VALUE = "Lorem ipsum dolor sit amet, consetetur sadipscing elitr," + + " sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua."; + + private PDDocument document; + private PDAcroForm acroForm; + + @Before + public void setUp() throws IOException + { + document = PDDocument.load(new File(IN_DIR, NAME_OF_PDF)); + acroForm = document.getDocumentCatalog().getAcroForm(); + OUT_DIR.mkdirs(); + } + + @Test + public void fillFields() throws IOException + { + + // portrait page + // single line fields + PDField field = acroForm.getField("pdfbox.portrait.single.rotation0"); + field.setValue(field.getFullyQualifiedName()); + field = acroForm.getField("pdfbox.portrait.single.rotation90"); + field.setValue(field.getFullyQualifiedName()); + field = acroForm.getField("pdfbox.portrait.single.rotation180"); + field.setValue(field.getFullyQualifiedName()); + field = acroForm.getField("pdfbox.portrait.single.rotation270"); + field.setValue(field.getFullyQualifiedName()); + + // multiline fields + field = acroForm.getField("pdfbox.portrait.multi.rotation0"); + field.setValue(field.getFullyQualifiedName() + "\n" + TEST_VALUE); + field = acroForm.getField("pdfbox.portrait.multi.rotation90"); + field.setValue(field.getFullyQualifiedName() + "\n" + TEST_VALUE); + field = acroForm.getField("pdfbox.portrait.multi.rotation180"); + field.setValue(field.getFullyQualifiedName() + "\n" + TEST_VALUE); + field = acroForm.getField("pdfbox.portrait.multi.rotation270"); + field.setValue(field.getFullyQualifiedName() + "\n" + TEST_VALUE); + + // 90 degrees rotated page + // single line fields + field = acroForm.getField("pdfbox.page90.single.rotation0"); + field.setValue("pdfbox.page90.single.rotation0"); + field = acroForm.getField("pdfbox.page90.single.rotation90"); + field.setValue("pdfbox.page90.single.rotation90"); + field = acroForm.getField("pdfbox.page90.single.rotation180"); + field.setValue("pdfbox.page90.single.rotation180"); + field = acroForm.getField("pdfbox.page90.single.rotation270"); + field.setValue("pdfbox.page90.single.rotation270"); + + // multiline fields + field = acroForm.getField("pdfbox.page90.multi.rotation0"); + field.setValue(field.getFullyQualifiedName() + "\n" + TEST_VALUE); + field = acroForm.getField("pdfbox.page90.multi.rotation90"); + field.setValue(field.getFullyQualifiedName() + "\n" + TEST_VALUE); + field = acroForm.getField("pdfbox.page90.multi.rotation180"); + field.setValue(field.getFullyQualifiedName() + "\n" + TEST_VALUE); + field = acroForm.getField("pdfbox.page90.multi.rotation270"); + field.setValue(field.getFullyQualifiedName() + "\n" + TEST_VALUE); + + // compare rendering + File file = new File(OUT_DIR, NAME_OF_PDF); + document.save(file); + TestPDFToImage testPDFToImage = new TestPDFToImage(TestPDFToImage.class.getName()); + if (!testPDFToImage.doTestFile(file, IN_DIR.getAbsolutePath(), OUT_DIR.getAbsolutePath())) + { + // don't fail, rendering is different on different systems, result + // must be viewed manually + System.err.println("Rendering of " + file + " failed or is not identical to expected rendering in " + IN_DIR + + " directory"); + } + } + + @After + public void tearDown() throws IOException + { + document.close(); + } + +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java new file mode 100644 index 00000000000..f3f8888a0df --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.form; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdfparser.PDFStreamParser; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test handling some special characters when setting a fields value. + * + * Compare the results of setting the values using PDFBox with setting the values + * via Acrobat using JavaScript and manual input. + * + * The JavaScript used for acrobat is + * + *
    + * {@code
    + * this.getField("acrobat-nul").value = "NUL\0NUL";
    + * this.getField("acrobat-tab").value = "TAB\tTAB";
    + * this.getField("acrobat-space").value = "SPACE SPACE";
    + * this.getField("acrobat-cr").value = "CR\rCR";
    + * this.getField("acrobat-lf").value = "LF\nLF";
    + * this.getField("acrobat-crlf").value = "CRLF\r\nCRLF";
    + * this.getField("acrobat-lfcr").value = "LFCR\n\rLFCR";
    + * this.getField("acrobat-linebreak").value = "linebreak\u2028linebreak";
    + * this.getField("acrobat-paragraphbreak").value = "paragraphbreak\u2029paragraphbreak";
    + * }
    + * 
    + * + * @see https://issues.apache.org/jira/browse/PDFBOX-3461 + * + */ +public class ControlCharacterTest { + private static final File IN_DIR = new File("src/test/resources/org/apache/pdfbox/pdmodel/interactive/form"); + private static final String NAME_OF_PDF = "ControlCharacters.pdf"; + + private PDDocument document; + private PDAcroForm acroForm; + + @Before + public void setUp() throws IOException + { + document = PDDocument.load(new File(IN_DIR, NAME_OF_PDF)); + acroForm = document.getDocumentCatalog().getAcroForm(); + } + + @Test(expected=IllegalArgumentException.class) + public void characterNUL() throws IOException + { + acroForm.getField("pdfbox-nul").setValue("NUL\0NUL"); + } + + @Test + public void characterTAB() throws IOException + { + acroForm.getField("pdfbox-tab").setValue("TAB\tTAB"); + } + + @Test + public void characterSPACE() throws IOException + { + PDField field = acroForm.getField("pdfbox-space"); + field.setValue("SPACE SPACE"); + + List pdfboxValues = getStringsFromStream(field); + List acrobatValues = getStringsFromStream(acroForm.getField("acrobat-space")); + + assertEquals(pdfboxValues, acrobatValues); + } + + @Test + public void characterCR() throws IOException + { + PDField field = acroForm.getField("pdfbox-cr"); + field.setValue("CR\rCR"); + + List pdfboxValues = getStringsFromStream(field); + List acrobatValues = getStringsFromStream(acroForm.getField("acrobat-cr")); + + assertEquals(pdfboxValues, acrobatValues); + } + + @Test + public void characterLF() throws IOException + { + PDField field = acroForm.getField("pdfbox-lf"); + field.setValue("LF\nLF"); + + List pdfboxValues = getStringsFromStream(field); + List acrobatValues = getStringsFromStream(acroForm.getField("acrobat-lf")); + + assertEquals(pdfboxValues, acrobatValues); + } + + @Test + public void characterCRLF() throws IOException + { + PDField field = acroForm.getField("pdfbox-crlf"); + field.setValue("CRLF\r\nCRLF"); + + List pdfboxValues = getStringsFromStream(field); + List acrobatValues = getStringsFromStream(acroForm.getField("acrobat-crlf")); + + assertEquals(pdfboxValues, acrobatValues); + } + + @Test + public void characterLFCR() throws IOException + { + PDField field = acroForm.getField("pdfbox-lfcr"); + field.setValue("LFCR\n\rLFCR"); + + List pdfboxValues = getStringsFromStream(field); + List acrobatValues = getStringsFromStream(acroForm.getField("acrobat-lfcr")); + + assertEquals(pdfboxValues, acrobatValues); + } + + @Test + public void characterUnicodeLinebreak() throws IOException + { + PDField field = acroForm.getField("pdfbox-linebreak"); + field.setValue("linebreak\u2028linebreak"); + + List pdfboxValues = getStringsFromStream(field); + List acrobatValues = getStringsFromStream(acroForm.getField("acrobat-linebreak")); + + assertEquals(pdfboxValues, acrobatValues); + } + + @Test + public void characterUnicodeParagraphbreak() throws IOException + { + PDField field = acroForm.getField("pdfbox-paragraphbreak"); + field.setValue("paragraphbreak\u2029paragraphbreak"); + + List pdfboxValues = getStringsFromStream(field); + List acrobatValues = getStringsFromStream(acroForm.getField("acrobat-paragraphbreak")); + + assertEquals(pdfboxValues, acrobatValues); + } + + @After + public void tearDown() throws IOException + { + document.close(); + } + + private List getStringsFromStream(PDField field) throws IOException + { + PDAnnotationWidget widget = field.getWidgets().get(0); + PDFStreamParser parser = new PDFStreamParser(widget.getNormalAppearanceStream()); + + Object token = parser.parseNextToken(); + + List stringValues = new ArrayList(); + + while (token != null) + { + if (token instanceof COSString) + { + // TODO: improve the string output to better match + // trimming as Acrobat adds spaces to strings + // where we don't + stringValues.add(((COSString) token).getString().trim()); + } + token = parser.parseNextToken(); + } + return stringValues; + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/HandleDifferentDALevelsTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/HandleDifferentDALevelsTest.java new file mode 100644 index 00000000000..bc56ada80d2 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/HandleDifferentDALevelsTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.form; + +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class HandleDifferentDALevelsTest +{ + private static final File OUT_DIR = new File("target/test-output"); + private static final File IN_DIR = new File("src/test/resources/org/apache/pdfbox/pdmodel/interactive/form"); + private static final String NAME_OF_PDF = "DifferentDALevels.pdf"; + + private PDDocument document; + private PDAcroForm acroForm; + + @Before + public void setUp() throws IOException + { + document = PDDocument.load(new File(IN_DIR, NAME_OF_PDF)); + acroForm = document.getDocumentCatalog().getAcroForm(); + OUT_DIR.mkdirs(); + + // prefill the fields to generate the appearance streams + PDTextField field = (PDTextField) acroForm.getField("SingleAnnotation"); + field.setValue("single annotation"); + + field = (PDTextField) acroForm.getField("MultipeAnnotations-SameLayout"); + field.setValue("same layout"); + + field = (PDTextField) acroForm.getField("MultipleAnnotations-DifferentLayout"); + field.setValue("different layout"); + + File file = new File(OUT_DIR, NAME_OF_PDF); + document.save(file); + + } + + @Test + public void checkSingleAnnotation() throws IOException + { + PDTextField field = (PDTextField) acroForm.getField("SingleAnnotation"); + String fieldFontSetting = getFontSettingFromDA(field); + List widgets = field.getWidgets(); + for (PDAnnotationWidget widget : widgets) + { + String contentAsString = new String(widget.getNormalAppearanceStream().getContentStream().toByteArray()); + assertTrue(contentAsString.indexOf(fieldFontSetting) > 0); + } + } + + @Test + public void checkSameLayout() throws IOException + { + PDTextField field = (PDTextField) acroForm.getField("MultipeAnnotations-SameLayout"); + String fieldFontSetting = getFontSettingFromDA(field); + List widgets = field.getWidgets(); + for (PDAnnotationWidget widget : widgets) + { + String contentAsString = new String(widget.getNormalAppearanceStream().getContentStream().toByteArray()); + assertTrue("font setting in content stream shall be " + fieldFontSetting, contentAsString.indexOf(fieldFontSetting) > 0); + } + } + + // TODO: enable the test after issue 3687 has been fixed + @Test + public void checkDifferentLayout() throws IOException + { + PDTextField field = (PDTextField) acroForm.getField("MultipleAnnotations-DifferentLayout"); + String fieldFontSetting = getFontSettingFromDA(field); + List widgets = field.getWidgets(); + for (PDAnnotationWidget widget : widgets) + { + String widgetFontSetting = getFontSettingFromDA(widget); + String fontSetting = widgetFontSetting == null ? fieldFontSetting : widgetFontSetting; + String contentAsString = new String(widget.getNormalAppearanceStream().getContentStream().toByteArray()); + assertTrue("font setting in content stream shall be " + fontSetting, contentAsString.indexOf(fontSetting) > 0); + } + } + + @After + public void tearDown() throws IOException + { + document.close(); + } + + private String getFontSettingFromDA(PDTextField field) + { + String defaultAppearance = field.getDefaultAppearance(); + // get the font setting from the default appearance string + return defaultAppearance.substring(0, defaultAppearance.lastIndexOf("Tf")+2); + } + + private String getFontSettingFromDA(PDAnnotationWidget widget) + { + String defaultAppearance = widget.getCOSObject().getString(COSName.DA); + if (defaultAppearance != null) + { + return defaultAppearance.substring(0, defaultAppearance.lastIndexOf("Tf")+2); + } + return defaultAppearance; + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java index f7cc7861058..4e9c7e21340 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java @@ -17,9 +17,19 @@ package org.apache.pdfbox.pdmodel.interactive.form; +import static org.junit.Assert.assertEquals; + import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; import org.apache.pdfbox.rendering.TestPDFToImage; import org.junit.After; import org.junit.Before; @@ -94,6 +104,101 @@ public void fillFields() throws IOException System.err.println ("Rendering of " + file + " failed or is not identical to expected rendering in " + IN_DIR + " directory"); } } + + // Test for PDFBOX-3812 + @Test + public void testMultilineAuto() throws IOException + { + PDDocument document = PDDocument.load(new File(IN_DIR, "PDFBOX3812-acrobat-multiline-auto.pdf")); + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(); + + // Get and store the field sizes in the original PDF + PDTextField fieldMultiline = (PDTextField) acroForm.getField("Multiline"); + float fontSizeMultiline = getFontSizeFromAppearanceStream(fieldMultiline); + + PDTextField fieldSingleline = (PDTextField) acroForm.getField("Singleline"); + float fontSizeSingleline = getFontSizeFromAppearanceStream(fieldSingleline); + + PDTextField fieldMultilineAutoscale = (PDTextField) acroForm.getField("MultilineAutoscale"); + float fontSizeMultilineAutoscale = getFontSizeFromAppearanceStream(fieldMultilineAutoscale); + + PDTextField fieldSinglelineAutoscale = (PDTextField) acroForm.getField("SinglelineAutoscale"); + float fontSizeSinglelineAutoscale = getFontSizeFromAppearanceStream(fieldSinglelineAutoscale); + + fieldMultiline.setValue("Multiline - Fixed"); + fieldSingleline.setValue("Singleline - Fixed"); + fieldMultilineAutoscale.setValue("Multiline - auto"); + fieldSinglelineAutoscale.setValue("Singleline - auto"); + + assertEquals(fontSizeMultiline, getFontSizeFromAppearanceStream(fieldMultiline), 0.001f); + assertEquals(fontSizeSingleline, getFontSizeFromAppearanceStream(fieldSingleline), 0.001f); + assertEquals(fontSizeMultilineAutoscale, getFontSizeFromAppearanceStream(fieldMultilineAutoscale), 0.001f); + assertEquals(fontSizeSinglelineAutoscale, getFontSizeFromAppearanceStream(fieldSinglelineAutoscale), 0.025f); + } + + // Test for PDFBOX-3812 + @Test + public void testMultilineBreak() throws IOException + { + final String TEST_PDF = "PDFBOX-3835-input-acrobat-wrap.pdf"; + PDDocument document = PDDocument.load(new File(IN_DIR, TEST_PDF)); + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(); + + // Get and store the field sizes in the original PDF + PDTextField fieldInput = (PDTextField) acroForm.getField("filled"); + String fieldValue = fieldInput.getValue(); + List acrobatLines = getTextLinesFromAppearanceStream(fieldInput); + fieldInput.setValue(fieldValue); + List pdfboxLines = getTextLinesFromAppearanceStream(fieldInput); + assertEquals("Number of lines generated by PDFBox shall match Acrobat", acrobatLines.size(),pdfboxLines.size()); + for (int i = 0; i < acrobatLines.size(); i++) + { + assertEquals("Number of characters per lines generated by PDFBox shall match Acrobat", acrobatLines.get(i).length(), pdfboxLines.get(i).length()); + } + document.close(); + } + + private float getFontSizeFromAppearanceStream(PDField field) throws IOException + { + PDAnnotationWidget widget = field.getWidgets().get(0); + PDFStreamParser parser = new PDFStreamParser(widget.getNormalAppearanceStream()); + + Object token = parser.parseNextToken(); + + while (token != null) + { + if (token instanceof COSName && ((COSName) token).getName().equals("Helv")) + { + token = parser.parseNextToken(); + if (token != null && token instanceof COSNumber) + { + return ((COSNumber) token).floatValue(); + } + } + token = parser.parseNextToken(); + } + return 0; + } + + private List getTextLinesFromAppearanceStream(PDField field) throws IOException + { + PDAnnotationWidget widget = field.getWidgets().get(0); + PDFStreamParser parser = new PDFStreamParser(widget.getNormalAppearanceStream()); + + Object token = parser.parseNextToken(); + + List lines = new ArrayList(); + + while (token != null) + { + if (token instanceof COSString) + { + lines.add(((COSString) token).getString()); + } + token = parser.parseNextToken(); + } + return lines; + } @After public void tearDown() throws IOException diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java new file mode 100644 index 00000000000..ed94e863e27 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java @@ -0,0 +1,401 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.form; + +import static org.junit.Assert.assertTrue; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FilenameFilter; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; + +import javax.imageio.ImageIO; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.rendering.TestPDFToImage; +import static org.junit.Assert.fail; +import org.junit.Before; +import org.junit.Test; + +/** + * Test flatten different forms and compare with rendering. + * + * Some of the tests are currently disabled to not run within the CI environment + * as the test results need manual inspection. Enable as needed. + * + */ +public class PDAcroFormFlattenTest +{ + + private static final File IN_DIR = new File("target/test-output/flatten/in"); + private static final File OUT_DIR = new File("target/test-output/flatten/out"); + + @Before + public void setUp() + { + IN_DIR.mkdirs(); + OUT_DIR.mkdirs(); + } + + /* + * PDFBOX-142 Filled template. + */ + // @Test + public void testFlattenPDFBOX142() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12742551/Testformular1.pdf"; + String targetFileName = "Testformular1.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-563 Filled template. + */ + // Disabled as there is a minimal difference which can not be seen visually on ci-builds + // @Test + public void testFlattenPDFBOX563() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12425859/TestFax_56972.pdf"; + String targetFileName = "TestFax_56972.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-2469 Empty template. + */ + @Test + public void testFlattenPDFBOX2469Empty() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12682897/FormI-9-English.pdf"; + String targetFileName = "FormI-9-English.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-2469 Filled template. + */ + // Disabled as there is a minimal difference which can not be seen visually, see PDFBOX-5133 + // @Test + public void testFlattenPDFBOX2469Filled() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12678455/testPDF_acroForm.pdf"; + String targetFileName = "testPDF_acroForm.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-2586 Empty template. + */ + @Test + public void testFlattenPDFBOX2586() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12689788/test.pdf"; + String targetFileName = "test-2586.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-3083 Filled template rotated. + */ + // @Test + public void testFlattenPDFBOX3083() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12770263/mypdf.pdf"; + String targetFileName = "mypdf.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-3262 Hidden fields + */ + @Test + public void testFlattenPDFBOX3262() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12792007/hidden_fields.pdf"; + String targetFileName = "hidden_fields.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-3396 Signed Document 1. + */ + @Test + public void testFlattenPDFBOX3396_1() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12816014/Signed-Document-1.pdf"; + String targetFileName = "Signed-Document-1.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-3396 Signed Document 2. + */ + @Test + public void testFlattenPDFBOX3396_2() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12816016/Signed-Document-2.pdf"; + String targetFileName = "Signed-Document-2.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-3396 Signed Document 3. + */ + @Test + public void testFlattenPDFBOX3396_3() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12821307/Signed-Document-3.pdf"; + String targetFileName = "Signed-Document-3.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-3396 Signed Document 4. + */ + @Test + public void testFlattenPDFBOX3396_4() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12821308/Signed-Document-4.pdf"; + String targetFileName = "Signed-Document-4.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * PDFBOX-3587 Filled template. + */ + // @Test + public void testFlattenOpenOfficeFormFilled() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12840280/OpenOfficeForm_filled.pdf"; + String targetFileName = "OpenOfficeForm_filled.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /** + * PDFBOX-4157 Filled template. + */ + // @Test + public void testFlattenPDFBox4157() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12976553/PDFBOX-4157-filled.pdf"; + String targetFileName = "PDFBOX-4157-filled.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /** + * PDFBOX-4172 Filled template. + */ + // @Test + public void testFlattenPDFBox4172() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12976552/PDFBOX-4172-filled.pdf"; + String targetFileName = "PDFBOX-4172-filled.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /** + * PDFBOX-4615 Filled template. + */ + // Disabled as there is a minimal difference which can not be seen visually on ci-builds + // @Test + public void testFlattenPDFBox4615() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12976452/resetboundingbox-filled.pdf"; + String targetFileName = "PDFBOX-4615-filled.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /** + * PDFBOX-4693: page is not rotated, but the appearance stream is. + */ + @Test + public void testFlattenPDFBox4693() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12986337/stenotypeTest-3_rotate_no_flatten.pdf"; + String targetFileName = "PDFBOX-4693-filled.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /** + * PDFBOX-4788: non-widget annotations are not to be removed on a page that has no widget + * annotations. + */ + @Test + public void testFlattenPDFBox4788() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12994791/flatten.pdf"; + String targetFileName = "PDFBOX-4788.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /** + * PDFBOX-4889: appearance streams with empty /BBox. + * + * @throws IOException + */ + @Test + public void testFlattenPDFBox4889() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13005793/f1040sb%20test.pdf"; + String targetFileName = "PDFBOX-4889.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /** + * PDFBOX-4955: appearance streams with forms that are not used. + * + * @throws IOException + */ + @Test + public void testFlattenPDFBox4955() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13011410/PDFBOX-4955.pdf"; + String targetFileName = "PDFBOX-4955.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + // Disabled as there is a minimal difference which can not be seen visually on ci-builds + // @Test + public void testFlattenPDFBox4958() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13012242/PDFBOX-4958.pdf"; + String targetFileName = "PDFBOX-4958-flattened.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + + /* + * Flatten and compare with generated image samples. + */ + private static void flattenAndCompare(String sourceUrl, String targetFileName) throws IOException + { + generateSamples(sourceUrl,targetFileName); + + File inputFile = new File(IN_DIR, targetFileName); + File outputFile = new File(OUT_DIR, targetFileName); + + PDDocument testPdf = PDDocument.load(inputFile); + testPdf.getDocumentCatalog().getAcroForm().flatten(); + testPdf.setAllSecurityToBeRemoved(true); + assertTrue(testPdf.getDocumentCatalog().getAcroForm().getFields().isEmpty()); + testPdf.save(outputFile); + testPdf.close(); + + // compare rendering + TestPDFToImage testPDFToImage = new TestPDFToImage(TestPDFToImage.class.getName()); + if (!testPDFToImage.doTestFile(outputFile, IN_DIR.getAbsolutePath(), OUT_DIR.getAbsolutePath())) + { + fail("Rendering of " + outputFile + " failed or is not identical to expected rendering in " + IN_DIR + " directory"); + } + else + { + // cleanup input and output directory for matching files. + removeAllRenditions(inputFile); + inputFile.delete(); + outputFile.delete(); + } + } + + /* + * Generate the sample images to which the PDF will be compared after flatten. + */ + private static void generateSamples(String sourceUrl, String targetFile) throws IOException + { + getFromUrl(sourceUrl, targetFile); + + File file = new File(IN_DIR,targetFile); + + PDDocument document = PDDocument.load(file, (String) null); + String outputPrefix = IN_DIR.getAbsolutePath() + '/' + file.getName() + "-"; + int numPages = document.getNumberOfPages(); + + PDFRenderer renderer = new PDFRenderer(document); + for (int i = 0; i < numPages; i++) + { + String fileName = outputPrefix + (i + 1) + ".png"; + BufferedImage image = renderer.renderImageWithDPI(i, 96); // Windows native DPI + ImageIO.write(image, "PNG", new File(fileName)); + } + document.close(); + } + + /* + * Get a PDF from URL and copy to file for processing. + */ + private static void getFromUrl(String sourceUrl, String targetFile) throws IOException + { + URL url = new URL(sourceUrl); + + InputStream is = url.openStream(); + OutputStream os = new FileOutputStream(new File(IN_DIR, targetFile)); + + byte[] b = new byte[2048]; + int length; + + while ((length = is.read(b)) != -1) + { + os.write(b, 0, length); + } + is.close(); + os.close(); + } + + /* + * Remove renditions for the PDF from the input directory. + * The output directory will have been cleaned by the TestPDFToImage utility. + */ + private static void removeAllRenditions(final File inputFile) + { + File[] testFiles = inputFile.getParentFile().listFiles(new FilenameFilter() + { + @Override + public boolean accept(File dir, String name) + { + return (name.startsWith(inputFile.getName()) && name.toLowerCase().endsWith(".png")); + } + }); + + for (File testFile : testFiles) + { + testFile.delete(); + } + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFromAnnotsTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFromAnnotsTest.java new file mode 100644 index 00000000000..983aae4f4d3 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFromAnnotsTest.java @@ -0,0 +1,394 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.form; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; + +import java.io.IOException; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.fixup.AbstractFixup; +import org.apache.pdfbox.pdmodel.fixup.AcroFormDefaultFixup; +import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormOrphanWidgetsProcessor; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.junit.Test; + +/** + * Tests for building AcroForm entries form Widget annotations. + * + */ +public class PDAcroFormFromAnnotsTest +{ + /** + * PDFBOX-4985 AcroForms entry but empty Fields array + * + * Using the default get acroform call with error correction + * + * @throws IOException + */ + @Test + public void testFromAnnots4985DefaultMode() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13013354/POPPLER-806.pdf"; + String acrobatSourceUrl = "https://issues.apache.org/jira/secure/attachment/13013384/POPPLER-806-acrobat.pdf"; + + int numFormFieldsByAcrobat = 0; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(acrobatSourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + PDAcroForm acroForm = catalog.getAcroForm(null); + numFormFieldsByAcrobat = acroForm.getFields().size(); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + // need to do a low level cos access as the PDModel access will build the AcroForm + COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM); + COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS); + assertEquals("Initially there shall be 0 fields", 0, cosFields.size()); + PDAcroForm acroForm = catalog.getAcroForm(); + assertEquals("After rebuild there shall be " + numFormFieldsByAcrobat + " fields", numFormFieldsByAcrobat, acroForm.getFields().size()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-4985 AcroForms entry but empty Fields array + * + * Using the acroform call with error correction + * + * @throws IOException + */ + @Test + public void testFromAnnots4985CorrectionMode() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13013354/POPPLER-806.pdf"; + String acrobatSourceUrl = "https://issues.apache.org/jira/secure/attachment/13013384/POPPLER-806-acrobat.pdf"; + + int numFormFieldsByAcrobat = 0; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(acrobatSourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + PDAcroForm acroForm = catalog.getAcroForm(null); + numFormFieldsByAcrobat = acroForm.getFields().size(); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + // need to do a low level cos access as the PDModel access will build the AcroForm + COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM); + COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS); + assertEquals("Initially there shall be 0 fields", 0, cosFields.size()); + PDAcroForm acroForm = catalog.getAcroForm(new AcroFormDefaultFixup(testPdf)); + assertEquals("After rebuild there shall be " + numFormFieldsByAcrobat + " fields", numFormFieldsByAcrobat, acroForm.getFields().size()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-4985 AcroForms entry but empty Fields array + * + * Using the acroform call without error correction + * + * @throws IOException + */ + @Test + public void testFromAnnots4985WithoutCorrectionMode() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13013354/POPPLER-806.pdf"; + + int numCosFormFields = 0; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + // need to do a low level cos access as the PDModel access will build the AcroForm + COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM); + COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS); + numCosFormFields = cosFields.size(); + assertEquals("Initially there shall be 0 fields", 0, cosFields.size()); + PDAcroForm acroForm = catalog.getAcroForm(null); + assertEquals("After call without correction there shall be " + numCosFormFields + " fields", numCosFormFields, acroForm.getFields().size()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-3891 AcroForm with empty fields entry + * + * With the default correction nothing shall be added + * + * @throws IOException + */ + @Test + public void testFromAnnots3891DontCreateFields() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12881055/merge-test.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + // need to do a low level cos access as the PDModel access will build the AcroForm + COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM); + COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS); + assertEquals("Initially there shall be 0 fields", 0, cosFields.size()); + PDAcroForm acroForm = catalog.getAcroForm(); + assertEquals("After call with default correction there shall be 0 fields", 0, acroForm.getFields().size()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-3891 AcroForm with empty fields entry + * + * Special fixup to create fields + * + * @throws IOException + */ + @Test + public void testFromAnnots3891CreateFields() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12881055/merge-test.pdf"; + String acrobatSourceUrl = "https://issues.apache.org/jira/secure/attachment/13014447/merge-test-na-acrobat.pdf"; + + int numFormFieldsByAcrobat = 0; + + // will build the expected fields using the acrobat source document + Map fieldsByName = new HashMap(); + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(acrobatSourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + PDAcroForm acroForm = catalog.getAcroForm(null); + numFormFieldsByAcrobat = acroForm.getFields().size(); + for (PDField field : acroForm.getFieldTree()) + { + fieldsByName.put(field.getFullyQualifiedName(), field); + } + } + finally + { + IOUtils.closeQuietly(testPdf); + } + + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + // need to do a low level cos access as the PDModel access will build the AcroForm + COSDictionary cosAcroForm = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.ACRO_FORM); + COSArray cosFields = (COSArray) cosAcroForm.getDictionaryObject(COSName.FIELDS); + assertEquals("Initially there shall be 0 fields", 0, cosFields.size()); + PDAcroForm acroForm = catalog.getAcroForm(new CreateFieldsFixup(testPdf)); + assertEquals("After rebuild there shall be " + numFormFieldsByAcrobat + " fields", numFormFieldsByAcrobat, acroForm.getFields().size()); + + // the the fields found are contained in the map + for (PDField field : acroForm.getFieldTree()) + { + assertNotNull(fieldsByName.get(field.getFullyQualifiedName())); + } + + // test all fields in the map are also found in the AcroForm + for (String fieldName : fieldsByName.keySet()) + { + assertNotNull(acroForm.getField(fieldName)); + } + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-3891 AcroForm with empty fields entry + * + * Check if the font resources added by PDFBox matches these by Acrobat + * which are taken from the widget normal appearance resources + * + * @throws IOException + */ + @Test + public void testFromAnnots3891ValidateFont() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12881055/merge-test.pdf"; + String acrobatSourceUrl = "https://issues.apache.org/jira/secure/attachment/13014447/merge-test-na-acrobat.pdf"; + + // will build the expected font respurce names and font decriptor names using the acrobat source document + Map fontNames = new HashMap(); + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(acrobatSourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + PDAcroForm acroForm = catalog.getAcroForm(null); + PDResources acroFormResources = acroForm.getDefaultResources(); + if (acroFormResources != null) + { + for (COSName fontName : acroFormResources.getFontNames()) + { + try + { + PDFont font = acroFormResources.getFont(fontName); + font.getFontDescriptor().getFontName(); + fontNames.put(fontName.getName(), font.getName()); + } + catch (IOException ioe) + { + //ignoring + } + } + } + } + finally + { + IOUtils.closeQuietly(testPdf); + } + + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + PDAcroForm acroForm = catalog.getAcroForm(new CreateFieldsFixup(testPdf)); + PDResources acroFormResources = acroForm.getDefaultResources(); + if (acroFormResources != null) + { + for (COSName fontName : acroFormResources.getFontNames()) + { + try + { + PDFont font = acroFormResources.getFont(fontName); + String pdfBoxFontName = font.getFontDescriptor().getFontName(); + assertEquals("font resource added by Acrobat shall match font resource added by PDFBox", fontNames.get(fontName.getName()), pdfBoxFontName); + } + catch (IOException ioe) + { + //ignoring + } + } + } + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-3891 null PDFieldFactory.createField + * @throws IOException + */ + @Test + public void testFromAnnots3891NullField() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13016993/poppler-14433-0.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + boolean thrown = false; + try + { + catalog.getAcroForm(new CreateFieldsFixup(testPdf)); + } + catch (Exception e) + { + thrown = true; + } + assertFalse("There shall be no exception when getting the AcroForm", thrown); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + + + /* + * Create fields from widget annotations + */ + class CreateFieldsFixup extends AbstractFixup + { + CreateFieldsFixup(PDDocument document) + { + super(document); + } + + @Override + public void apply() { + new AcroFormOrphanWidgetsProcessor(document).process(); + + } + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormGenerateAppearancesTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormGenerateAppearancesTest.java new file mode 100644 index 00000000000..dc763ae6d47 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormGenerateAppearancesTest.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.pdmodel.interactive.form; + +import static org.junit.Assert.assertFalse; + +import java.io.IOException; +import java.net.URL; + +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.junit.Test; + +public class PDAcroFormGenerateAppearancesTest { + + /** + * PDFBOX-5041 Missing font descriptor + * + * @throws IOException + */ + @Test + public void test5041MissingFontDescriptor() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13016941/REDHAT-1301016-0.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + boolean thrown = false; + try + { + catalog.getAcroForm(); + } + catch (Exception e) + { + thrown = true; + } + assertFalse("There shall be no exception when getting the AcroForm", thrown); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-4086 Character missing for encoding + * @throws IOException + */ + @Test + public void test4086CharNotEncodable() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12908175/AML1.PDF"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + boolean thrown = false; + try + { + catalog.getAcroForm(); + } + catch (Exception e) + { + thrown = true; + } + assertFalse("There shall be no exception when getting the AcroForm", thrown); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-5043 PaperMetaData + * @throws IOException + */ + @Test + public void test5043PaperMetaData() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13016992/PDFBOX-3891-5.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + boolean thrown = false; + try + { + catalog.getAcroForm(); + } + catch (Exception e) + { + thrown = true; + } + assertFalse("There shall be no exception when getting the AcroForm", thrown); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormTest.java index baee95f6059..4a417d56828 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormTest.java @@ -17,15 +17,32 @@ package org.apache.pdfbox.pdmodel.interactive.form; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.fail; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; import org.apache.pdfbox.rendering.TestPDFToImage; import org.junit.After; import org.junit.Before; @@ -58,7 +75,7 @@ public void testFieldsEntry() // the /Fields entry has been created with the AcroForm // as this is a required entry assertNotNull(acroForm.getFields()); - assertEquals(acroForm.getFields().size(),0); + assertEquals(0, acroForm.getFields().size()); // there shouldn't be an exception if there is no such field assertNull(acroForm.getField("foo")); @@ -69,7 +86,7 @@ public void testFieldsEntry() // ensure there is always an empty collection returned assertNotNull(acroForm.getFields()); - assertEquals(acroForm.getFields().size(),0); + assertEquals(0, acroForm.getFields().size()); // there shouldn't be an exception if there is no such field assertNull(acroForm.getField("foo")); @@ -80,7 +97,7 @@ public void testAcroFormProperties() { assertTrue(acroForm.getDefaultAppearance().isEmpty()); acroForm.setDefaultAppearance("/Helv 0 Tf 0 g"); - assertEquals(acroForm.getDefaultAppearance(),"/Helv 0 Tf 0 g"); + assertEquals("/Helv 0 Tf 0 g", acroForm.getDefaultAppearance()); } @Test @@ -101,11 +118,365 @@ public void testFlatten() throws IOException } + /* + * Same as above but remove the page reference from the widget annotation + * before doing the flatten() to ensure that the widgets page reference is properly looked up + * (PDFBOX-3301) + */ + @Test + public void testFlattenWidgetNoRef() throws IOException + { + PDDocument testPdf = PDDocument.load(new File(IN_DIR, "AlignmentTests.pdf")); + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + for (PDField field : acroForm.getFieldTree()) { + for (PDAnnotationWidget widget : field.getWidgets()) { + widget.getCOSObject().removeItem(COSName.P); + } + } + testPdf.getDocumentCatalog().getAcroForm().flatten(); + + // 36 non widget annotations shall not be flattened + assertEquals(36, testPdf.getPage(0).getAnnotations().size()); + + assertTrue(testPdf.getDocumentCatalog().getAcroForm().getFields().isEmpty()); + File file = new File(OUT_DIR, "AlignmentTests-flattened-noRef.pdf"); + testPdf.save(file); + // compare rendering + TestPDFToImage testPDFToImage = new TestPDFToImage(TestPDFToImage.class.getName()); + if (!testPDFToImage.doTestFile(file, IN_DIR.getAbsolutePath(), OUT_DIR.getAbsolutePath())) + { + // don't fail, rendering is different on different systems, result must be viewed manually + System.out.println("Rendering of " + file + " failed or is not identical to expected rendering in " + IN_DIR + " directory"); + } + } + + @Test + public void testFlattenSpecificFieldsOnly() throws IOException + { + File file = new File(OUT_DIR, "AlignmentTests-flattened-specificFields.pdf"); + + List fieldsToFlatten = new ArrayList(); + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new File(IN_DIR, "AlignmentTests.pdf")); + PDAcroForm acroFormToFlatten = testPdf.getDocumentCatalog().getAcroForm(); + int numFieldsBeforeFlatten = acroFormToFlatten.getFields().size(); + int numWidgetsBeforeFlatten = countWidgets(testPdf); + + fieldsToFlatten.add(acroFormToFlatten.getField("AlignLeft-Border_Small-Filled")); + fieldsToFlatten.add(acroFormToFlatten.getField("AlignLeft-Border_Medium-Filled")); + fieldsToFlatten.add(acroFormToFlatten.getField("AlignLeft-Border_Wide-Filled")); + fieldsToFlatten.add(acroFormToFlatten.getField("AlignLeft-Border_Wide_Clipped-Filled")); + + acroFormToFlatten.flatten(fieldsToFlatten, true); + int numFieldsAfterFlatten = acroFormToFlatten.getFields().size(); + int numWidgetsAfterFlatten = countWidgets(testPdf); + + assertEquals(numFieldsBeforeFlatten, numFieldsAfterFlatten + fieldsToFlatten.size()); + assertEquals(numWidgetsBeforeFlatten, numWidgetsAfterFlatten + fieldsToFlatten.size()); + + testPdf.save(file); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /* + * Test that we do not modify an AcroForm with missing resource information + * when loading the document only. + * (PDFBOX-3752) + */ + @Test + public void testDontAddMissingInformationOnDocumentLoad() + { + try + { + byte[] pdfBytes = createAcroFormWithMissingResourceInformation(); + PDDocument pdfDocument = PDDocument.load(pdfBytes); + + // do a low level access to the AcroForm to avoid the generation of missing entries + PDDocumentCatalog documentCatalog = pdfDocument.getDocumentCatalog(); + COSDictionary catalogDictionary = documentCatalog.getCOSObject(); + COSDictionary acroFormDictionary = (COSDictionary) catalogDictionary.getDictionaryObject(COSName.ACRO_FORM); + + // ensure that the missing information has not been generated + assertNull(acroFormDictionary.getDictionaryObject(COSName.DA)); + assertNull(acroFormDictionary.getDictionaryObject(COSName.RESOURCES)); + + pdfDocument.close(); + } + catch (IOException e) + { + System.err.println("Couldn't create test document, test skipped"); + return; + } + } + + /* + * Test that we add missing ressouce information to an AcroForm + * when accessing the AcroForm on the PD level + * (PDFBOX-3752) + */ + @Test + public void testAddMissingInformationOnAcroFormAccess() + { + try + { + byte[] pdfBytes = createAcroFormWithMissingResourceInformation(); + PDDocument pdfDocument = PDDocument.load(pdfBytes); + PDDocumentCatalog documentCatalog = pdfDocument.getDocumentCatalog(); + + // this call shall trigger the generation of missing information + PDAcroForm theAcroForm = documentCatalog.getAcroForm(); + + // ensure that the missing information has been generated + // DA entry + assertEquals("/Helv 0 Tf 0 g ", theAcroForm.getDefaultAppearance()); + assertNotNull(theAcroForm.getDefaultResources()); + + // DR entry + PDResources acroFormResources = theAcroForm.getDefaultResources(); + assertNotNull(acroFormResources.getFont(COSName.getPDFName("Helv"))); + assertEquals("Helvetica", acroFormResources.getFont(COSName.getPDFName("Helv")).getName()); + assertNotNull(acroFormResources.getFont(COSName.getPDFName("ZaDb"))); + assertEquals("ZapfDingbats", acroFormResources.getFont(COSName.getPDFName("ZaDb")).getName()); + + pdfDocument.close(); + } + catch (IOException e) + { + System.err.println("Couldn't create test document, test skipped"); + return; + } + } + + /* + * Test that we don't add missing ressouce information to an AcroForm + * when accessing the AcroForm on the PD level with fix ups being set to + * false + * (PDFBOX-5000) + */ + @Test + public void testDontAddMissingInformationOnAcroFormAccess() + { + try + { + byte[] pdfBytes = createAcroFormWithMissingResourceInformation(); + PDDocument pdfDocument = PDDocument.load(pdfBytes); + PDDocumentCatalog documentCatalog = pdfDocument.getDocumentCatalog(); + + // this call shall skip triggering the generation of missing information + PDAcroForm theAcroForm = documentCatalog.getAcroForm(null); + + // ensure that the missing information has not been generated + // DA entry + assertEquals("", theAcroForm.getDefaultAppearance()); + // Resources + assertNull(theAcroForm.getDefaultResources()); + pdfDocument.close(); + } + catch (IOException e) + { + System.err.println("Couldn't create test document, test skipped"); + return; + } + } + + + + /** + * PDFBOX-4235: a bad /DA string should not result in an NPE. + * + * @throws IOException + */ + @Test + public void testBadDA() throws IOException + { + PDDocument doc = new PDDocument(); + + PDPage page = new PDPage(); + doc.addPage(page); + + PDAcroForm acroForm = new PDAcroForm(document); + doc.getDocumentCatalog().setAcroForm(acroForm); + acroForm.setDefaultResources(new PDResources()); + + PDTextField textBox = new PDTextField(acroForm); + textBox.setPartialName("SampleField"); + + // https://stackoverflow.com/questions/50609478/ + // "tf" is a typo, should have been "Tf" and this results that no font is chosen + textBox.setDefaultAppearance("/Helv 0 tf 0 g"); + acroForm.getFields().add(textBox); + + PDAnnotationWidget widget = textBox.getWidgets().get(0); + PDRectangle rect = new PDRectangle(50, 750, 200, 20); + widget.setRectangle(rect); + widget.setPage(page); + + page.getAnnotations().add(widget); + + try + { + textBox.setValue("huhu"); + } + catch (IllegalArgumentException ex) + { + return; + } + finally + { + doc.close(); + } + fail("IllegalArgumentException should have been thrown"); + } + + /** + * PDFBOX-3732, PDFBOX-4303, PDFBOX-4393: Test whether /Helv and /ZaDb get added, but only if + * they don't exist. + */ + @Test + public void testAcroFormDefaultFonts() throws IOException + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + PDDocument doc = new PDDocument(); + + PDPage page = new PDPage(PDRectangle.A4); + doc.addPage(page); + PDAcroForm acroForm2 = new PDAcroForm(doc); + doc.getDocumentCatalog().setAcroForm(acroForm2); + PDResources defaultResources = acroForm2.getDefaultResources(); + assertNull(defaultResources); + defaultResources = new PDResources(); + acroForm2.setDefaultResources(defaultResources); + assertNull(defaultResources.getFont(COSName.HELV)); + assertNull(defaultResources.getFont(COSName.ZA_DB)); + + // getting AcroForm sets the two fonts + acroForm2 = doc.getDocumentCatalog().getAcroForm(); + defaultResources = acroForm2.getDefaultResources(); + assertNotNull(defaultResources.getFont(COSName.HELV)); + assertNotNull(defaultResources.getFont(COSName.ZA_DB)); + + // repeat with a new AcroForm (to delete AcroForm cache) and thus missing /DR + doc.getDocumentCatalog().setAcroForm(new PDAcroForm(doc)); + acroForm2 = doc.getDocumentCatalog().getAcroForm(); + defaultResources = acroForm2.getDefaultResources(); + PDFont helv = defaultResources.getFont(COSName.HELV); + PDFont zadb = defaultResources.getFont(COSName.ZA_DB); + assertNotNull(helv); + assertNotNull(zadb); + doc.save(baos); + doc.close(); + + doc = PDDocument.load(baos.toByteArray()); + acroForm2 = doc.getDocumentCatalog().getAcroForm(); + defaultResources = acroForm2.getDefaultResources(); + helv = defaultResources.getFont(COSName.HELV); + zadb = defaultResources.getFont(COSName.ZA_DB); + assertNotNull(helv); + assertNotNull(zadb); + // make sure that font wasn't overwritten + assertNotEquals(PDType1Font.HELVETICA, helv); + assertNotEquals(PDType1Font.ZAPF_DINGBATS, zadb); + doc.close(); + } + + /** + * PDFBOX-3777 Illegal Fields definition COSDictionary instead of Array + * + * @throws IOException + */ + @Test + public void testIllegalFieldsDefinition() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12866226/D1790B.PDF"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDDocumentCatalog catalog = testPdf.getDocumentCatalog(); + boolean thrown = false; + try + { + catalog.getAcroForm(); + } + catch (Exception e) + { + thrown = true; + } + assertFalse("There shall be no exception when getting the AcroForm", thrown); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + + @After public void tearDown() throws IOException { document.close(); } + private byte[] createAcroFormWithMissingResourceInformation() throws IOException + { + PDDocument document = new PDDocument(); + PDPage page = new PDPage(); + document.addPage(page); + + PDAcroForm newAcroForm = new PDAcroForm(document); + document.getDocumentCatalog().setAcroForm(newAcroForm); + + PDTextField textBox = new PDTextField(newAcroForm); + textBox.setPartialName("SampleField"); + newAcroForm.getFields().add(textBox); + + PDAnnotationWidget widget = textBox.getWidgets().get(0); + PDRectangle rect = new PDRectangle(50, 750, 200, 20); + widget.setRectangle(rect); + widget.setPage(page); + + page.getAnnotations().add(widget); + + // acroForm.setNeedAppearances(true); + // acroForm.getField("SampleField").getCOSObject().setString(COSName.V, "content"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + document.save(baos); // this is a working PDF + document.close(); + return baos.toByteArray(); + } + + private int countWidgets(PDDocument documentToTest) + { + int count = 0; + for (PDPage page : documentToTest.getPages()) + { + try + { + for (PDAnnotation annotation : page.getAnnotations()) + { + if (annotation instanceof PDAnnotationWidget) + { + count ++; + } + } + } + catch (IOException e) + { + // ignoring + } + } + return count; + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDButtonTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDButtonTest.java index 3d6982d701a..db83c4bb6a0 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDButtonTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDButtonTest.java @@ -20,13 +20,17 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; + import org.junit.After; +import static org.junit.Assert.assertNotEquals; import org.junit.Before; import org.junit.Test; @@ -36,11 +40,11 @@ */ public class PDButtonTest { - - + private static final File IN_DIR = new File("src/test/resources/org/apache/pdfbox/pdmodel/interactive/form"); private static final String NAME_OF_PDF = "AcroFormsBasicFields.pdf"; - + private static final File TARGET_PDF_DIR = new File("target/pdfs"); + private PDDocument document; private PDAcroForm acroForm; @@ -64,7 +68,7 @@ public void createCheckBox() PDButton buttonField = new PDCheckBox(acroForm); assertEquals(buttonField.getFieldType(), buttonField.getCOSObject().getNameAsString(COSName.FT)); - assertEquals(buttonField.getFieldType(), "Btn"); + assertEquals("Btn", buttonField.getFieldType()); assertFalse(buttonField.isPushButton()); assertFalse(buttonField.isRadioButton()); } @@ -75,7 +79,7 @@ public void createPushButton() PDButton buttonField = new PDPushButton(acroForm); assertEquals(buttonField.getFieldType(), buttonField.getCOSObject().getNameAsString(COSName.FT)); - assertEquals(buttonField.getFieldType(), "Btn"); + assertEquals("Btn", buttonField.getFieldType()); assertTrue(buttonField.isPushButton()); assertFalse(buttonField.isRadioButton()); } @@ -86,18 +90,152 @@ public void createRadioButton() PDButton buttonField = new PDRadioButton(acroForm); assertEquals(buttonField.getFieldType(), buttonField.getCOSObject().getNameAsString(COSName.FT)); - assertEquals(buttonField.getFieldType(), "Btn"); + assertEquals("Btn", buttonField.getFieldType()); + assertTrue(buttonField.isRadioButton()); + assertFalse(buttonField.isPushButton()); + } + + @Test + public void changeRadioButtonToPushButton() + { + PDButton buttonField = new PDRadioButton(acroForm); + + assertEquals(buttonField.getFieldType(), buttonField.getCOSObject().getNameAsString(COSName.FT)); + assertEquals("Btn", buttonField.getFieldType()); assertTrue(buttonField.isRadioButton()); assertFalse(buttonField.isPushButton()); + + // change to push button + buttonField.setPushButton(true); + assertFalse(buttonField.isRadioButton()); + assertTrue(buttonField.isPushButton()); + } + + @Test + public void changePushButtonToRadioButton() + { + PDButton buttonField = new PDPushButton(acroForm); + + assertEquals(buttonField.getFieldType(), buttonField.getCOSObject().getNameAsString(COSName.FT)); + assertEquals("Btn", buttonField.getFieldType()); + assertTrue(buttonField.isPushButton()); + assertFalse(buttonField.isRadioButton()); + + // change to push button + buttonField.setRadioButton(true); + assertFalse(buttonField.isPushButton()); + assertTrue(buttonField.isRadioButton()); + } + + @Test + /** + * PDFBOX-3656 + * + * Test a radio button with options. + * This was causing an ArrayIndexOutOfBoundsException when trying to set to "Off", as this + * wasn't treated to be a valid option. + * + * @throws IOException + */ + public void testRadioButtonWithOptions() + { + File file; + PDDocument pdfDocument = null; + + try + { + file = new File(TARGET_PDF_DIR, "PDFBOX-3656.pdf"); + + pdfDocument = PDDocument.load(file); + + PDRadioButton radioButton = (PDRadioButton) pdfDocument.getDocumentCatalog().getAcroForm().getField("Checking/Savings"); + radioButton.setValue("Off"); + for (PDAnnotationWidget widget : radioButton.getWidgets()) + { + assertEquals("The widget should be set to Off", COSName.Off, widget.getCOSObject().getItem(COSName.AS)); + } + + } + catch (IOException e) + { + fail("Unexpected IOException " + e.getMessage()); + } + finally + { + if (pdfDocument != null) + { + try + { + pdfDocument.close(); + } + catch (IOException e) + { + e.printStackTrace(); + } + } + } } + + @Test + /** + * PDFBOX-3682 + * + * Test a radio button with options. + * Special handling for a radio button with /Opt and the On state not being named + * after the index. + * + * @throws IOException + */ + public void testOptionsAndNamesNotNumbers() + { + File file; + PDDocument pdfDocument = null; + try + { + file = new File(TARGET_PDF_DIR, "PDFBOX-3682.pdf"); + + pdfDocument = PDDocument.load(file); + + pdfDocument.getDocumentCatalog().getAcroForm().getField("RadioButton").setValue("c"); + PDRadioButton radioButton = (PDRadioButton) pdfDocument.getDocumentCatalog().getAcroForm().getField("RadioButton"); + radioButton.setValue("c"); + + // test that the old behavior is now invalid + assertNotEquals("This shall no longer be 2", "2", radioButton.getValueAsString()); + assertNotEquals("This shall no longer be 2", "2", radioButton.getWidgets().get(2).getCOSObject().getNameAsString(COSName.AS)); + + // test for the correct behavior + assertEquals("This shall be c", "c", radioButton.getValueAsString()); + assertEquals("This shall be c", "c", radioButton.getWidgets().get(2).getCOSObject().getNameAsString(COSName.AS)); + } + catch (IOException e) + { + fail("Unexpected IOException " + e.getMessage()); + } + finally + { + if (pdfDocument != null) + { + try + { + pdfDocument.close(); + } + catch (IOException e) + { + e.printStackTrace(); + } + } + } + } + @Test public void retrieveAcrobatCheckBoxProperties() throws IOException { PDCheckBox checkbox = (PDCheckBox) acrobatAcroForm.getField("Checkbox"); assertNotNull(checkbox); - assertEquals(checkbox.getOnValue(), "Yes"); - assertEquals(checkbox.getOnValues().size(), 1); + assertEquals("Yes", checkbox.getOnValue()); + assertEquals(1, checkbox.getOnValues().size()); assertTrue(checkbox.getOnValues().contains("Yes")); } @@ -105,7 +243,7 @@ public void retrieveAcrobatCheckBoxProperties() throws IOException public void testAcrobatCheckBoxProperties() throws IOException { PDCheckBox checkbox = (PDCheckBox) acrobatAcroForm.getField("Checkbox"); - assertEquals(checkbox.getValue(), ""); + assertEquals(checkbox.getValue(), "Off"); assertEquals(checkbox.isChecked(), false); checkbox.check(); @@ -136,55 +274,55 @@ public void setValueForAbstractedAcrobatCheckBox() throws IOException checkbox.setValue("Yes"); assertEquals(checkbox.getValueAsString(), ((PDCheckBox) checkbox).getOnValue()); - assertEquals(((PDCheckBox) checkbox).isChecked(), true); - assertEquals(checkbox.getCOSObject().getDictionaryObject(COSName.AS), COSName.YES); + assertEquals(true, ((PDCheckBox) checkbox).isChecked()); + assertEquals(COSName.YES, checkbox.getCOSObject().getDictionaryObject(COSName.AS)); checkbox.setValue("Off"); - assertEquals(checkbox.getValueAsString(), COSName.Off.getName()); - assertEquals(((PDCheckBox) checkbox).isChecked(), false); - assertEquals(checkbox.getCOSObject().getDictionaryObject(COSName.AS), COSName.Off); + assertEquals(COSName.Off.getName(), checkbox.getValueAsString()); + assertEquals(false, ((PDCheckBox) checkbox).isChecked()); + assertEquals(COSName.Off, checkbox.getCOSObject().getDictionaryObject(COSName.AS)); } @Test public void testAcrobatCheckBoxGroupProperties() throws IOException { PDCheckBox checkbox = (PDCheckBox) acrobatAcroForm.getField("CheckboxGroup"); - assertEquals(checkbox.getValue(), ""); - assertEquals(checkbox.isChecked(), false); + assertEquals("Off", checkbox.getValue()); + assertEquals(false, checkbox.isChecked()); checkbox.check(); assertEquals(checkbox.getValue(), checkbox.getOnValue()); - assertEquals(checkbox.isChecked(), true); - - assertEquals(checkbox.getOnValues().size(), 3); + assertEquals(true, checkbox.isChecked()); + + assertEquals(3, checkbox.getOnValues().size()); assertTrue(checkbox.getOnValues().contains("Option1")); assertTrue(checkbox.getOnValues().contains("Option2")); assertTrue(checkbox.getOnValues().contains("Option3")); - + // test a value which sets one of the individual checkboxes within the group checkbox.setValue("Option1"); - assertEquals("Option1",checkbox.getValue()); - assertEquals("Option1",checkbox.getValueAsString()); + assertEquals("Option1", checkbox.getValue()); + assertEquals("Option1", checkbox.getValueAsString()); // ensure that for the widgets representing the individual checkboxes // the AS entry has been set - assertEquals("Option1",checkbox.getWidgets().get(0).getAppearanceState().getName()); - assertEquals("Off",checkbox.getWidgets().get(1).getAppearanceState().getName()); - assertEquals("Off",checkbox.getWidgets().get(2).getAppearanceState().getName()); - assertEquals("Off",checkbox.getWidgets().get(3).getAppearanceState().getName()); - + assertEquals("Option1", checkbox.getWidgets().get(0).getAppearanceState().getName()); + assertEquals("Off", checkbox.getWidgets().get(1).getAppearanceState().getName()); + assertEquals("Off", checkbox.getWidgets().get(2).getAppearanceState().getName()); + assertEquals("Off", checkbox.getWidgets().get(3).getAppearanceState().getName()); + // test a value which sets two of the individual chekboxes within the group // as the have the same name entry for being checked checkbox.setValue("Option3"); - assertEquals("Option3",checkbox.getValue()); - assertEquals("Option3",checkbox.getValueAsString()); - + assertEquals("Option3", checkbox.getValue()); + assertEquals("Option3", checkbox.getValueAsString()); + // ensure that for both widgets representing the individual checkboxes // the AS entry has been set - assertEquals("Off",checkbox.getWidgets().get(0).getAppearanceState().getName()); - assertEquals("Off",checkbox.getWidgets().get(1).getAppearanceState().getName()); - assertEquals("Option3",checkbox.getWidgets().get(2).getAppearanceState().getName()); - assertEquals("Option3",checkbox.getWidgets().get(3).getAppearanceState().getName()); + assertEquals("Off", checkbox.getWidgets().get(0).getAppearanceState().getName()); + assertEquals("Off", checkbox.getWidgets().get(1).getAppearanceState().getName()); + assertEquals("Option3", checkbox.getWidgets().get(2).getAppearanceState().getName()); + assertEquals("Option3", checkbox.getWidgets().get(3).getAppearanceState().getName()); } @Test @@ -253,7 +391,7 @@ public void retrieveAcrobatRadioButtonProperties() throws IOException { PDRadioButton radioButton = (PDRadioButton) acrobatAcroForm.getField("RadioButtonGroup"); assertNotNull(radioButton); - assertEquals(radioButton.getOnValues().size(), 2); + assertEquals(2, radioButton.getOnValues().size()); assertTrue(radioButton.getOnValues().contains("RadioButton01")); assertTrue(radioButton.getOnValues().contains("RadioButton02")); } @@ -265,21 +403,21 @@ public void testAcrobatRadioButtonProperties() throws IOException // Set value so that first radio button option is selected radioButton.setValue("RadioButton01"); - assertEquals(radioButton.getValue(), "RadioButton01"); + assertEquals("RadioButton01", radioButton.getValue()); // First option shall have /RadioButton01, second shall have /Off - assertEquals(radioButton.getWidgets().get(0).getCOSObject().getDictionaryObject(COSName.AS), - COSName.getPDFName("RadioButton01")); - assertEquals(radioButton.getWidgets().get(1).getCOSObject().getDictionaryObject(COSName.AS), - COSName.Off); + assertEquals(COSName.getPDFName("RadioButton01"), + radioButton.getWidgets().get(0).getCOSObject().getDictionaryObject(COSName.AS)); + assertEquals(COSName.Off, + radioButton.getWidgets().get(1).getCOSObject().getDictionaryObject(COSName.AS)); // Set value so that second radio button option is selected radioButton.setValue("RadioButton02"); - assertEquals(radioButton.getValue(), "RadioButton02"); + assertEquals("RadioButton02", radioButton.getValue()); // First option shall have /Off, second shall have /RadioButton02 - assertEquals(radioButton.getWidgets().get(0).getCOSObject().getDictionaryObject(COSName.AS), - COSName.Off); - assertEquals(radioButton.getWidgets().get(1).getCOSObject().getDictionaryObject(COSName.AS), - COSName.getPDFName("RadioButton02")); + assertEquals(COSName.Off, + radioButton.getWidgets().get(0).getCOSObject().getDictionaryObject(COSName.AS)); + assertEquals(COSName.getPDFName("RadioButton02"), + radioButton.getWidgets().get(1).getCOSObject().getDictionaryObject(COSName.AS)); } @Test @@ -289,21 +427,21 @@ public void setValueForAbstractedAcrobatRadioButton() throws IOException // Set value so that first radio button option is selected radioButton.setValue("RadioButton01"); - assertEquals(radioButton.getValueAsString(), "RadioButton01"); + assertEquals("RadioButton01", radioButton.getValueAsString()); // First option shall have /RadioButton01, second shall have /Off - assertEquals(radioButton.getWidgets().get(0).getCOSObject().getDictionaryObject(COSName.AS), - COSName.getPDFName("RadioButton01")); - assertEquals(radioButton.getWidgets().get(1).getCOSObject().getDictionaryObject(COSName.AS), - COSName.Off); + assertEquals(COSName.getPDFName("RadioButton01"), + radioButton.getWidgets().get(0).getCOSObject().getDictionaryObject(COSName.AS)); + assertEquals(COSName.Off, + radioButton.getWidgets().get(1).getCOSObject().getDictionaryObject(COSName.AS)); // Set value so that second radio button option is selected radioButton.setValue("RadioButton02"); - assertEquals(radioButton.getValueAsString(), "RadioButton02"); + assertEquals("RadioButton02", radioButton.getValueAsString()); // First option shall have /Off, second shall have /RadioButton02 - assertEquals(radioButton.getWidgets().get(0).getCOSObject().getDictionaryObject(COSName.AS), - COSName.Off); - assertEquals(radioButton.getWidgets().get(1).getCOSObject().getDictionaryObject(COSName.AS), - COSName.getPDFName("RadioButton02")); + assertEquals(COSName.Off, + radioButton.getWidgets().get(0).getCOSObject().getDictionaryObject(COSName.AS)); + assertEquals(COSName.getPDFName("RadioButton02"), + radioButton.getWidgets().get(1).getCOSObject().getDictionaryObject(COSName.AS)); } @Test(expected=IllegalArgumentException.class) diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDChoiceTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDChoiceTest.java index 71ee0eaa32d..0c746e09d6a 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDChoiceTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDChoiceTest.java @@ -20,7 +20,12 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.Before; import org.junit.Test; @@ -33,12 +38,18 @@ public class PDChoiceTest { private PDDocument document; private PDAcroForm acroForm; + private List options; + @Before public void setUp() { document = new PDDocument(); acroForm = new PDAcroForm(document); + options = new ArrayList(); + options.add(" "); + options.add("A"); + options.add("B"); } @Test @@ -47,7 +58,7 @@ public void createListBox() PDChoice choiceField = new PDListBox(acroForm); assertEquals(choiceField.getFieldType(), choiceField.getCOSObject().getNameAsString(COSName.FT)); - assertEquals(choiceField.getFieldType(), "Ch"); + assertEquals("Ch", choiceField.getFieldType()); assertFalse(choiceField.isCombo()); } @@ -57,9 +68,82 @@ public void createComboBox() PDChoice choiceField = new PDComboBox(acroForm); assertEquals(choiceField.getFieldType(), choiceField.getCOSObject().getNameAsString(COSName.FT)); - assertEquals(choiceField.getFieldType(), "Ch"); + assertEquals("Ch", choiceField.getFieldType()); assertTrue(choiceField.isCombo()); } + @Test + public void getOptionsFromStrings() + { + PDChoice choiceField = new PDComboBox(acroForm); + COSArray choiceFieldOptions = new COSArray(); + choiceFieldOptions.add(new COSString(" ")); + choiceFieldOptions.add(new COSString("A")); + choiceFieldOptions.add(new COSString("B")); + + // add the options using the low level COS model as the PD model will + // abstract the COSArray + choiceField.getCOSObject().setItem(COSName.OPT, choiceFieldOptions); + + assertEquals(options, choiceField.getOptions()); + } + + @Test + public void getOptionsFromCOSArray() + { + PDChoice choiceField = new PDComboBox(acroForm); + COSArray choiceFieldOptions = new COSArray(); + + // add entry to options + COSArray entry = new COSArray(); + entry.add(new COSString(" ")); + choiceFieldOptions.add(entry); + + // add entry to options + entry = new COSArray(); + entry.add(new COSString("A")); + choiceFieldOptions.add(entry); + + // add entry to options + entry = new COSArray(); + entry.add(new COSString("B")); + choiceFieldOptions.add(entry); + + // add the options using the low level COS model as the PD model will + // abstract the COSArray + choiceField.getCOSObject().setItem(COSName.OPT, choiceFieldOptions); + + assertEquals(options, choiceField.getOptions()); + } + + /* + * Get the entries form a moxed values array. See PDFBOX-4185 + */ + @Test + public void getOptionsFromMixed() + { + PDChoice choiceField = new PDComboBox(acroForm); + COSArray choiceFieldOptions = new COSArray(); + + // add string entry to options + choiceFieldOptions.add(new COSString(" ")); + + // add array entry to options + COSArray entry = new COSArray(); + entry.add(new COSString("A")); + choiceFieldOptions.add(entry); + + // add array entry to options + entry = new COSArray(); + entry.add(new COSString("B")); + choiceFieldOptions.add(entry); + + // add the options using the low level COS model as the PD model will + // abstract the COSArray + choiceField.getCOSObject().setItem(COSName.OPT, choiceFieldOptions); + + assertEquals(options, choiceField.getOptions()); + } + } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDDefaultAppearanceStringTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDDefaultAppearanceStringTest.java index ce8cc25881e..3526aeb1034 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDDefaultAppearanceStringTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDDefaultAppearanceStringTest.java @@ -46,7 +46,7 @@ public void setUp() @Test public void testParseDAString() throws IOException { - COSString sampleString = new COSString("/" + fontResourceName.getName() + " 12 Tf 0.019 0.305 0.627 rg 0g"); + COSString sampleString = new COSString("/" + fontResourceName.getName() + " 12 Tf 0.019 0.305 0.627 rg"); PDDefaultAppearanceString defaultAppearanceString = new PDDefaultAppearanceString(sampleString, resources); @@ -61,14 +61,14 @@ public void testParseDAString() throws IOException @Test(expected=IOException.class) public void testFontResourceUnavailable() throws IOException { - COSString sampleString = new COSString("/Helvetica 12 Tf 0.019 0.305 0.627 rg 0g"); + COSString sampleString = new COSString("/Helvetica 12 Tf 0.019 0.305 0.627 rg"); new PDDefaultAppearanceString(sampleString, resources); } @Test(expected=IOException.class) public void testWrongNumberOfColorArguments() throws IOException { - COSString sampleString = new COSString("/Helvetica 12 Tf 0.305 0.627 rg 0g"); + COSString sampleString = new COSString("/Helvetica 12 Tf 0.305 0.627 rg"); new PDDefaultAppearanceString(sampleString, resources); } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldTreeTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldTreeTest.java new file mode 100644 index 00000000000..6e0da58616f --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDFieldTreeTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.form; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; + +import org.junit.Assert; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class PDFieldTreeTest +{ + + /** + * PDFBOX-5044 stack overflow + * + * @throws IOException + */ + @Test + public void test5044() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13016994/PDFBOX-4131-0.pdf"; + + InputStream is = new URL(sourceUrl).openStream(); + PDDocument doc = PDDocument.load(is); + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + PDAcroForm acroForm = catalog.getAcroForm(); + int count = 0; + for (PDField field : acroForm.getFieldTree()) + { + ++count; + } + Assert.assertEquals(4, count); + is.close(); + doc.close(); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDSignatureFieldTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDSignatureFieldTest.java index d4262041aff..bc7bdc58345 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDSignatureFieldTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDSignatureFieldTest.java @@ -16,9 +16,7 @@ */ package org.apache.pdfbox.pdmodel.interactive.form; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - +import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -26,7 +24,11 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; +import org.apache.pdfbox.util.Charsets; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import org.junit.Before; import org.junit.Test; @@ -53,7 +55,7 @@ public void createDefaultSignatureField() throws IOException sigField.setPartialName("SignatureField"); assertEquals(sigField.getFieldType(), sigField.getCOSObject().getNameAsString(COSName.FT)); - assertEquals(sigField.getFieldType(), "Sig"); + assertEquals("Sig", sigField.getFieldType()); assertEquals(COSName.ANNOT, sigField.getCOSObject().getItem(COSName.TYPE)); assertEquals(PDAnnotationWidget.SUB_TYPE, sigField.getCOSObject().getNameAsString(COSName.SUBTYPE)); @@ -74,4 +76,20 @@ public void setValueForAbstractedSignatureField() throws IOException ((PDField) sigField).setValue("Can't set value using String"); } + + /** + * PDFBOX-4822: test get the signature contents. + * + * @throws IOException + */ + @Test + public void testGetContents() throws IOException + { + // Normally, range0 + range1 = position of "<", and range2 = position after ">" + PDSignature signature = new PDSignature(); + signature.setByteRange(new int[]{ 0, 10, 30, 10}); + byte[] by = "AAAAAAAAAA<313233343536373839>BBBBBBBBBB".getBytes(Charsets.ISO_8859_1); + assertEquals("123456789", new String(signature.getContents(by))); + assertEquals("123456789", new String(signature.getContents(new ByteArrayInputStream(by)))); + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDTextFieldTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDTextFieldTest.java index b1f441d064f..8a02fd049f8 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDTextFieldTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDTextFieldTest.java @@ -48,7 +48,7 @@ public void createDefaultTextField() PDField textField = new PDTextField(acroForm); assertEquals(textField.getFieldType(), textField.getCOSObject().getNameAsString(COSName.FT)); - assertEquals(textField.getFieldType(), "Tx"); + assertEquals("Tx", textField.getFieldType()); } @Test diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java new file mode 100644 index 00000000000..f750ce89b21 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.form; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +/** + * Test if a sequence of linebreak/paragraph characters produces the same + * number of paragraphs as Adobe Acrobat produces when setting the value + * via JavaScript. + * + */ +public class PlainTextTest { + @Test + public void characterCR() + { + PlainText text = new PlainText("CR\rCR"); + assertEquals(2,text.getParagraphs().size()); + } + + @Test + public void characterLF() + { + PlainText text = new PlainText("LF\nLF"); + assertEquals(2,text.getParagraphs().size()); + } + + @Test + public void characterCRLF() + { + PlainText text = new PlainText("CRLF\r\nCRLF"); + assertEquals(2,text.getParagraphs().size()); + } + + @Test + public void characterLFCR() + { + PlainText text = new PlainText("LFCR\n\rLFCR"); + assertEquals(3,text.getParagraphs().size()); + } + + @Test + public void characterUnicodeLinebreak() + { + PlainText text = new PlainText("linebreak\u2028linebreak"); + assertEquals(2,text.getParagraphs().size()); + } + + @Test + public void characterUnicodeParagraphbreak() + { + PlainText text = new PlainText("paragraphbreak\u2029paragraphbreak"); + assertEquals(2,text.getParagraphs().size()); + } + +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestCheckBox.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestCheckBox.java index 56328aa4fd8..eafdc0541d8 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestCheckBox.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestCheckBox.java @@ -25,11 +25,19 @@ import junit.framework.TestSuite; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceCharacteristicsDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary; /** - * This will test the functionality of Radio Buttons in PDFBox. + * This will test the functionality of checkboxes in PDFBox. */ public class TestCheckBox extends TestCase { @@ -66,7 +74,7 @@ public static void main( String[] args ) } /** - * This will test the radio button PDModel. + * This will test the checkbox PDModel. * * @throws IOException If there is an error creating the field. */ @@ -106,7 +114,7 @@ public void testCheckboxPDModel() throws IOException checkBox.setExportValues(null); assertNull(checkBox.getCOSObject().getItem(COSName.OPT)); // if there is no Opt entry an empty List shall be returned - assertEquals(checkBox.getExportValues(), new ArrayList()); + assertTrue(checkBox.getExportValues().isEmpty()); } finally { @@ -116,4 +124,40 @@ public void testCheckboxPDModel() throws IOException } } } + + /** + * PDFBOX-4366: Create and test a checkbox with no /AP. The created file works with Adobe Reader! + * + * @throws IOException + */ + public void testCheckBoxNoAppearance() throws IOException + { + PDDocument doc = new PDDocument(); + PDPage page = new PDPage(); + doc.addPage(page); + PDAcroForm acroForm = new PDAcroForm(doc); + acroForm.setNeedAppearances(true); // need this or it won't appear on Adobe Reader + doc.getDocumentCatalog().setAcroForm(acroForm); + List fields = new ArrayList(); + PDCheckBox checkBox = new PDCheckBox(acroForm); + checkBox.setPartialName("checkbox"); + PDAnnotationWidget widget = checkBox.getWidgets().get(0); + widget.setRectangle(new PDRectangle(50, 600, 100, 100)); + PDBorderStyleDictionary bs = new PDBorderStyleDictionary(); + bs.setStyle(PDBorderStyleDictionary.STYLE_SOLID); + bs.setWidth(1); + COSDictionary acd = new COSDictionary(); + PDAppearanceCharacteristicsDictionary ac = new PDAppearanceCharacteristicsDictionary(acd); + ac.setBackground(new PDColor(new float[] { 1, 1, 0 }, PDDeviceRGB.INSTANCE)); + ac.setBorderColour(new PDColor(new float[] { 1, 0, 0 }, PDDeviceRGB.INSTANCE)); + ac.setNormalCaption("4"); // 4 is checkmark, 8 is cross + widget.setAppearanceCharacteristics(ac); + widget.setBorderStyle(bs); + checkBox.setValue("Off"); + fields.add(checkBox); + page.getAnnotations().add(widget); + acroForm.setFields(fields); + assertEquals("Off", checkBox.getValue()); + doc.close(); + } } \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestFields.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestFields.java index 99142b7c978..1e1539b6145 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestFields.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestFields.java @@ -24,6 +24,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; /** * This will test the form fields in PDFBox. @@ -192,4 +193,41 @@ public void testAcroFormsBasicFields() throws IOException } } } + + /** + * This will test the handling of a widget with a missing (required) /Rect entry. + * + * @throws IOException If there is an error loading the form or the field. + */ + public void testWidgetMissingRect() throws IOException + { + PDDocument doc = null; + + try + { + doc = PDDocument.load(new File(PATH_OF_PDF)); + + PDAcroForm form = doc.getDocumentCatalog().getAcroForm(); + + PDTextField textField = (PDTextField)form.getField("TextField-DefaultValue"); + PDAnnotationWidget widget = textField.getWidgets().get(0); + + // initially there is an Appearance Entry in the form + assertNotNull(widget.getCOSObject().getDictionaryObject(COSName.AP)); + widget.getCOSObject().removeItem(COSName.RECT); + textField.setValue("field value"); + + // There shall be no appearance entry if there is no /Rect to + // behave as Adobe Acrobat does + assertNull(widget.getCOSObject().getDictionaryObject(COSName.AP)); + + } + finally + { + if( doc != null ) + { + doc.close(); + } + } + } } \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestListBox.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestListBox.java index abf4a3ebd99..cb7a332f885 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestListBox.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestListBox.java @@ -67,15 +67,15 @@ public static void main( String[] args ) } /** - * This will test the radio button PDModel. + * This will test the list box PDModel. * * @throws IOException If there is an error creating the field. */ - public void testChoicePDModel() throws IOException + public void testListboxPDModel() throws IOException { /* - * Set up two data list which will be used for the tests + * Set up two data lists which will be used for the tests */ // export values @@ -96,7 +96,7 @@ public void testChoicePDModel() throws IOException { doc = new PDDocument(); PDAcroForm form = new PDAcroForm( doc ); - PDChoice choice = new PDListBox(form); + PDListBox choice = new PDListBox(form); // appearance construction is not implemented, so turn on NeedAppearances form.setNeedAppearances(true); @@ -115,6 +115,12 @@ public void testChoicePDModel() throws IOException assertEquals(exportValues,choice.getOptionsDisplayValues()); assertEquals(exportValues,choice.getOptionsExportValues()); + // Test bug 1 of PDFBOX-4252 when top index is not null + choice.setTopIndex(1); + choice.setValue(exportValues.get(2)); + assertEquals(exportValues.get(2), choice.getValue().get(0)); + choice.setTopIndex(null); // reset + // assert that the option values have been correctly set COSArray optItem = (COSArray) choice.getCOSObject().getItem(COSName.OPT); assertNotNull(choice.getCOSObject().getItem(COSName.OPT)); @@ -150,7 +156,7 @@ public void testChoicePDModel() throws IOException // ensure that the choice field does allow multiple selections choice.setMultiSelect(true); - // now this call must suceed + // now this call must succeed choice.setValue(exportValues); // assert that the option values have been correctly set diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestRadioButtons.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestRadioButtons.java index 48041ce030f..66e6c5b2cc2 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestRadioButtons.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/TestRadioButtons.java @@ -16,60 +16,41 @@ */ package org.apache.pdfbox.pdmodel.interactive.form; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + import java.io.IOException; +import java.net.URL; import java.util.ArrayList; import java.util.List; -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; - import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceEntry; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; + +import org.junit.Test; /** * This will test the functionality of Radio Buttons in PDFBox. */ -public class TestRadioButtons extends TestCase +public class TestRadioButtons { - - /** - * Constructor. - * - * @param name The name of the test to run. - */ - public TestRadioButtons( String name ) - { - super( name ); - } - - /** - * This will get the suite of test that this class holds. - * - * @return All of the tests that this class holds. - */ - public static Test suite() - { - return new TestSuite( TestRadioButtons.class ); - } - - /** - * infamous main method. - * - * @param args The command line arguments. - */ - public static void main( String[] args ) - { - String[] arg = {TestRadioButtons.class.getName() }; - junit.textui.TestRunner.main( arg ); - } - /** * This will test the radio button PDModel. * * @throws IOException If there is an error creating the field. */ + @Test public void testRadioButtonPDModel() throws IOException { PDDocument doc = null; @@ -92,16 +73,54 @@ public void testRadioButtonPDModel() throws IOException options.add("Value02"); radioButton.setExportValues(options); + // Test getSelectedExportValues() + List widgets = new ArrayList(); + for (int i = 0; i < options.size(); i++) + { + PDAnnotationWidget widget = new PDAnnotationWidget(); + COSDictionary apNDict = new COSDictionary(); + apNDict.setItem(COSName.Off, new PDAppearanceStream(doc)); + apNDict.setItem(options.get(i), new PDAppearanceStream(doc)); + + PDAppearanceDictionary appearance = new PDAppearanceDictionary(); + PDAppearanceEntry appearanceNEntry = new PDAppearanceEntry(apNDict); + appearance.setNormalAppearance(appearanceNEntry); + widget.setAppearance(appearance); + widget.setAppearanceState("Off"); + widgets.add(widget); + } + radioButton.setWidgets(widgets); + + radioButton.setValue("Value01"); + assertEquals("Value01", radioButton.getValue()); + assertEquals(1, radioButton.getSelectedExportValues().size()); + assertEquals("Value01", radioButton.getSelectedExportValues().get(0)); + assertEquals("Value01", widgets.get(0).getAppearanceState().getName()); + assertEquals("Off", widgets.get(1).getAppearanceState().getName()); + + radioButton.setValue("Value02"); + assertEquals("Value02", radioButton.getValue()); + assertEquals(1, radioButton.getSelectedExportValues().size()); + assertEquals("Value02", radioButton.getSelectedExportValues().get(0)); + assertEquals("Off", widgets.get(0).getAppearanceState().getName()); + assertEquals("Value02", widgets.get(1).getAppearanceState().getName()); + + radioButton.setValue("Off"); + assertEquals("Off", radioButton.getValue()); + assertEquals(0, radioButton.getSelectedExportValues().size()); + assertEquals("Off", widgets.get(0).getAppearanceState().getName()); + assertEquals("Off", widgets.get(1).getAppearanceState().getName()); + COSArray optItem = (COSArray) radioButton.getCOSObject().getItem(COSName.OPT); // assert that the values have been correctly set assertNotNull(radioButton.getCOSObject().getItem(COSName.OPT)); - assertEquals(optItem.size(),2); + assertEquals(2, optItem.size()); assertEquals(options.get(0), optItem.getString(0)); // assert that the values can be retrieved correctly List retrievedOptions = radioButton.getExportValues(); - assertEquals(retrievedOptions.size(),2); + assertEquals(2, retrievedOptions.size()); assertEquals(retrievedOptions, options); // assert that the Opt entry is removed @@ -112,10 +131,256 @@ public void testRadioButtonPDModel() throws IOException } finally { - if( doc != null ) { - doc.close(); + IOUtils.closeQuietly(doc); + } + } + } + + /** + * PDFBOX-3656 Radio button field with FLAG_RADIOS_IN_UNISON false + * + * @throws IOException + */ + @Test + public void testPDFBox3656NotInUnison() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12848122/SF1199AEG%20%28Complete%29.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + PDRadioButton field = (PDRadioButton) acroForm.getField("Checking/Savings"); + assertFalse("the radio buttons can be selected individually although having the same ON value", field.isRadiosInUnison()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-3656 Set by value + * + * There are 6 radio buttons where 3 share the same common values but they are not set in unison + * Setting by the first export value shall only select the first radio button + * + * @throws IOException + */ + @Test + public void testPDFBox3656ByValidExportValue() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12848122/SF1199AEG%20%28Complete%29.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + PDRadioButton field = (PDRadioButton) acroForm.getField("Checking/Savings"); + // check defaults + assertFalse("the radio buttons can be selected individually although having the same ON value", field.isRadiosInUnison()); + assertEquals("initially no option shall be selected", "Off", field.getValue()); + // set the field to a valid export value + field.setValue("Checking"); + assertEquals("setting by the export value should also return that", "Checking", field.getValue()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-3656 Set by invalid export value + * + * @throws IOException + */ + @Test + public void testPDFBox3656ByInvalidExportValue() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12848122/SF1199AEG%20%28Complete%29.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + PDRadioButton field = (PDRadioButton) acroForm.getField("Checking/Savings"); + // check defaults + assertFalse("the radio buttons can be selected individually although having the same ON value", field.isRadiosInUnison()); + assertEquals("initially no option shall be selected", "Off", field.getValue()); + + try { + field.setValue("Invalid"); + fail("Expected an IndexOutOfBoundsException to be thrown"); + } catch (Exception ex) { + // compare the messages + String expectedMessage = "value 'Invalid' is not a valid option for the field Checking/Savings, valid values are: [Checking, Savings] and Off"; + String actualMessage = ex.getMessage(); + assertTrue(actualMessage.contains(expectedMessage)); + } + + assertEquals("no option shall be selected", "Off", field.getValue()); + assertTrue("no export values are selected", field.getSelectedExportValues().isEmpty()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-3656 Set by a valid index + * + * There are 6 radio buttons where 3 share the same common values but they are not set in unison + * Setting by the index shall only select the corresponding radio button + * + * @throws IOException + */ + @Test + public void testPDFBox3656ByValidIndex() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12848122/SF1199AEG%20%28Complete%29.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + PDRadioButton field = (PDRadioButton) acroForm.getField("Checking/Savings"); + // check defaults + assertFalse("the radio buttons can be selected individually although having the same ON value", field.isRadiosInUnison()); + assertEquals("initially no option shall be selected", "Off", field.getValue()); + // set the field to a valid index + field.setValue(4); + assertEquals("setting by the index value should return the corresponding export", "Checking", field.getValue()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-3656 Set by an invalid index + * + * There are 6 radio buttons where 3 share the same common values but they are not set in unison + * Setting by the index shall only select the corresponding radio button + * + * @throws IOException + */ + @Test + public void testPDFBox3656ByInvalidIndex() throws IOException + { + + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12848122/SF1199AEG%20%28Complete%29.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + PDRadioButton field = (PDRadioButton) acroForm.getField("Checking/Savings"); + // check defaults + assertFalse("the radio buttons can be selected individually although having the same ON value", field.isRadiosInUnison()); + assertEquals("initially no option shall be selected", "Off", field.getValue()); + + try { + field.setValue(6); + fail("Expected an IndexOutOfBoundsException to be thrown"); + } catch (Exception ex) { + // compare the messages + String expectedMessage = "index '6' is not a valid index for the field Checking/Savings, valid indices are from 0 to 5"; + String actualMessage = ex.getMessage(); + assertTrue(actualMessage.contains(expectedMessage)); } + + assertEquals("no option shall be selected", "Off", field.getValue()); + assertTrue("no export values are selected", field.getSelectedExportValues().isEmpty()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-4617 Enable getting selected index + * + * @throws IOException + */ + @Test + public void testPDFBox4617IndexNoneSelected() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12848122/SF1199AEG%20%28Complete%29.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + PDRadioButton field = (PDRadioButton) acroForm.getField("Checking/Savings"); + assertEquals("if there is no value set the index shall be -1", -1, field.getSelectedIndex()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-4617 Enable getting selected index for value being set by option + * + * @throws IOException + */ + @Test + public void testPDFBox4617IndexForSetByOption() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12848122/SF1199AEG%20%28Complete%29.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + PDRadioButton field = (PDRadioButton) acroForm.getField("Checking/Savings"); + field.setValue( "Checking"); + assertEquals("the index shall be equal with the first entry of Checking which is 0", 0, field.getSelectedIndex()); + } + finally + { + IOUtils.closeQuietly(testPdf); + } + } + + /** + * PDFBOX-4617 Enable getting selected index for value being set by index + * + * @throws IOException + */ + @Test + public void testPDFBox4617IndexForSetByIndex() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12848122/SF1199AEG%20%28Complete%29.pdf"; + + PDDocument testPdf = null; + try + { + testPdf = PDDocument.load(new URL(sourceUrl).openStream()); + PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm(); + PDRadioButton field = (PDRadioButton) acroForm.getField("Checking/Savings"); + field.setValue(4); + assertEquals("setting by the index value should return the corresponding export", "Checking", field.getValue()); + assertEquals("the index shall be equals with the set value of 4", 4, field.getSelectedIndex()); + } + finally + { + IOUtils.closeQuietly(testPdf); } } } \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/package.html b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/package.html index a3d1b69d61a..def257f49fb 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/package.html b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/package.html index 96db5ecbd5e..74c451e0e0f 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/java/org/apache/pdfbox/rendering/TestPDFToImage.java b/pdfbox/src/test/java/org/apache/pdfbox/rendering/TestPDFToImage.java index 393c707255d..df01a3cb64b 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/rendering/TestPDFToImage.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/rendering/TestPDFToImage.java @@ -151,13 +151,13 @@ private BufferedImage createEmptyDiffImage(int minWidth, int minHeight, int maxW /** * Get the difference between two images, identical colors are set to white, differences are - * xored, the highest bit of each color is reset to avoid colors that are too light + * xored, the highest bit of each color is reset to avoid colors that are too light. * * @param bim1 * @param bim2 - * @return If the images are different, the function returns a diff image If the images are - * identical, the function returns null If the size is different, a black border on the botton - * and the right is created + * @return If the images are different, the function returns a diff image. If the images are + * identical, the function returns null. If the size is different, a black border on the bottom + * at the right is created. * * @throws IOException */ @@ -179,10 +179,10 @@ private BufferedImage diffImages(BufferedImage bim1, BufferedImage bim2) throws int rgb1 = bim1.getRGB(x, y); int rgb2 = bim2.getRGB(x, y); if (rgb1 != rgb2 - // don't bother about differences of 1 color step - && (Math.abs((rgb1 & 0xFF) - (rgb2 & 0xFF)) > 1 - || Math.abs(((rgb1 >> 8) & 0xFF) - ((rgb2 >> 8) & 0xFF)) > 1 - || Math.abs(((rgb1 >> 16) & 0xFF) - ((rgb2 >> 16) & 0xFF)) > 1)) + // don't bother about small differences + && (Math.abs((rgb1 & 0xFF) - (rgb2 & 0xFF)) > 3 + || Math.abs(((rgb1 >> 8) & 0xFF) - ((rgb2 >> 8) & 0xFF)) > 3 + || Math.abs(((rgb1 >> 16) & 0xFF) - ((rgb2 >> 16) & 0xFF)) > 3)) { if (bim3 == null) { @@ -325,6 +325,7 @@ else if (!filesAreIdentical(outFile, inFile)) LOG.info("*** TEST OK *** for file: " + inFile.getName()); LOG.info("Deleting: " + outFile.getName()); outFile.delete(); + outFile.deleteOnExit(); } } else @@ -332,6 +333,7 @@ else if (!filesAreIdentical(outFile, inFile)) LOG.info("*** TEST OK *** for file: " + inFile.getName()); LOG.info("Deleting: " + outFile.getName()); outFile.delete(); + outFile.deleteOnExit(); } } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java b/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java index 68040f0bab0..969d95d7ca5 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java @@ -34,21 +34,28 @@ import java.io.OutputStreamWriter; import java.io.PrintStream; import java.io.Writer; +import java.net.URISyntaxException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import junit.framework.Test; import junit.framework.TestCase; +import static junit.framework.TestCase.assertFalse; import junit.framework.TestSuite; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.TestPDPageTree; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; +import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; +import org.junit.Assert; /** @@ -185,6 +192,11 @@ else if( expected != null && actual != null ) equals = false; log.warn("Actual line is longer at:" + actualIndex ); } + if (expectedArray.length != actualArray.length) + { + equals = false; + log.warn("Expected lines: " + expectedArray.length + ", actual lines: " + actualArray.length); + } } } else @@ -243,7 +255,6 @@ public void doTestFile(File inFile, File outDir, boolean bLogResult, boolean bSo } } - //System.out.println(" " + inFile + (bSort ? " (sorted)" : "")); PDDocument document = PDDocument.load(inFile); try { @@ -306,116 +317,117 @@ public void doTestFile(File inFile, File outDir, boolean bLogResult, boolean bSo return; } - boolean localFail = false; + compareResult(expectedFile, outFile, inFile, bSort, diffFile); + } + finally + { + document.close(); + } + } - LineNumberReader expectedReader = + private void compareResult(File expectedFile, File outFile, File inFile, boolean bSort, File diffFile) + throws IOException + { + boolean localFail = false; + + LineNumberReader expectedReader = new LineNumberReader(new InputStreamReader(new FileInputStream(expectedFile), ENCODING)); - LineNumberReader actualReader = + LineNumberReader actualReader = new LineNumberReader(new InputStreamReader(new FileInputStream(outFile), ENCODING)); - - while (true) + + while (true) + { + String expectedLine = expectedReader.readLine(); + while( expectedLine != null && expectedLine.trim().length() == 0 ) { - String expectedLine = expectedReader.readLine(); - while( expectedLine != null && expectedLine.trim().length() == 0 ) - { - expectedLine = expectedReader.readLine(); - } - String actualLine = actualReader.readLine(); - while( actualLine != null && actualLine.trim().length() == 0 ) + expectedLine = expectedReader.readLine(); + } + String actualLine = actualReader.readLine(); + while( actualLine != null && actualLine.trim().length() == 0 ) + { + actualLine = actualReader.readLine(); + } + if (!stringsEqual(expectedLine, actualLine)) + { + this.bFail = true; + localFail = true; + log.error("FAILURE: Line mismatch for file " + inFile.getName() + + " (sort = "+bSort+")" + + " at expected line: " + expectedReader.getLineNumber() + + " at actual line: " + actualReader.getLineNumber() + + "\nexpected line was: \"" + expectedLine + "\"" + + "\nactual line was: \"" + actualLine + "\"" + "\n"); + + //lets report all lines, even though this might produce some verbose logging + //break; + } + + if (expectedLine == null || actualLine == null) + { + break; + } + } + expectedReader.close(); + actualReader.close(); + if (!localFail) + { + outFile.delete(); + } + else + { + // https://code.google.com/p/java-diff-utils/wiki/SampleUsage + List original = fileToLines(expectedFile); + List revised = fileToLines(outFile); + + // Compute diff. Get the Patch object. Patch is the container for computed deltas. + Patch patch = DiffUtils.diff(original, revised); + + PrintStream diffPS = new PrintStream(diffFile, ENCODING); + for (Object delta : patch.getDeltas()) + { + if (delta instanceof ChangeDelta) { - actualLine = actualReader.readLine(); + ChangeDelta cdelta = (ChangeDelta) delta; + diffPS.println("Org: " + cdelta.getOriginal()); + diffPS.println("New: " + cdelta.getRevised()); + diffPS.println(); } - if (!stringsEqual(expectedLine, actualLine)) + else if (delta instanceof DeleteDelta) { - this.bFail = true; - localFail = true; - log.error("FAILURE: Line mismatch for file " + inFile.getName() + - " (sort = "+bSort+")" + - " at expected line: " + expectedReader.getLineNumber() + - " at actual line: " + actualReader.getLineNumber() + - "\nexpected line was: \"" + expectedLine + "\"" + - "\nactual line was: \"" + actualLine + "\"" + "\n"); - - //lets report all lines, even though this might produce some verbose logging - //break; + DeleteDelta ddelta = (DeleteDelta) delta; + diffPS.println("Org: " + ddelta.getOriginal()); + diffPS.println("New: " + ddelta.getRevised()); + diffPS.println(); } - - if( expectedLine == null || actualLine==null) + else if (delta instanceof InsertDelta) { - break; + InsertDelta idelta = (InsertDelta) delta; + diffPS.println("Org: " + idelta.getOriginal()); + diffPS.println("New: " + idelta.getRevised()); + diffPS.println(); } - } - expectedReader.close(); - actualReader.close(); - if (!localFail) - { - outFile.delete(); - } - else - { - // https://code.google.com/p/java-diff-utils/wiki/SampleUsage - List original = fileToLines(expectedFile); - List revised = fileToLines(outFile); - - // Compute diff. Get the Patch object. Patch is the container for computed deltas. - Patch patch = DiffUtils.diff(original, revised); - - PrintStream diffPS = new PrintStream(diffFile, ENCODING); - for (Object delta : patch.getDeltas()) + else { - if (delta instanceof ChangeDelta) - { - ChangeDelta cdelta = (ChangeDelta) delta; - diffPS.println("Org: " + cdelta.getOriginal()); - diffPS.println("New: " + cdelta.getRevised()); - diffPS.println(); - } - else if (delta instanceof DeleteDelta) - { - DeleteDelta ddelta = (DeleteDelta) delta; - diffPS.println("Org: " + ddelta.getOriginal()); - diffPS.println("New: " + ddelta.getRevised()); - diffPS.println(); - } - else if (delta instanceof InsertDelta) - { - InsertDelta idelta = (InsertDelta) delta; - diffPS.println("Org: " + idelta.getOriginal()); - diffPS.println("New: " + idelta.getRevised()); - diffPS.println(); - } - else - { - diffPS.println(delta); - } + diffPS.println(delta); } - diffPS.close(); } - } - finally - { - document.close(); + diffPS.close(); } } // Helper method for get the file content - private static List fileToLines(File file) + private static List fileToLines(File file) throws IOException { List lines = new LinkedList(); - String line = ""; - try - { - BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ENCODING)); - while ((line = in.readLine()) != null) - { - lines.add(line); - } - in.close(); - } - catch (IOException e) + String line; + + BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ENCODING)); + while ((line = in.readLine()) != null) { - e.printStackTrace(); + lines.add(line); } + in.close(); + return lines; } @@ -441,10 +453,11 @@ private int findOutlineItemDestPageNum(PDDocument doc, PDOutlineItem oi) throws * must be empty. * * @throws IOException + * @throws URISyntaxException */ - public void testStripByOutlineItems() throws IOException + public void testStripByOutlineItems() throws IOException, URISyntaxException { - PDDocument doc = PDDocument.load(TestPDPageTree.class.getResourceAsStream("with_outline.pdf")); + PDDocument doc = PDDocument.load(new File(TestPDPageTree.class.getResource("with_outline.pdf").toURI())); PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline(); Iterable children = outline.children(); Iterator it = children.iterator(); @@ -485,7 +498,7 @@ public void testStripByOutlineItems() throws IOException stripper.setEndBookmark(oi3); String textoi23 = stripper.getText(doc); assertFalse(textoi23.isEmpty()); - assertFalse(textoi23.equals(textFull)); + Assert.assertNotEquals(textoi23, textFull); String expectedTextoi23 = "Second at level 1\n" @@ -505,18 +518,17 @@ public void testStripByOutlineItems() throws IOException stripper.setEndPage(4); String textp34 = stripper.getText(doc); assertFalse(textp34.isEmpty()); - assertFalse(textoi23.equals(textFull)); - assertTrue(textoi23.equals(textp34)); - - + Assert.assertNotEquals(textoi23, textFull); + Assert.assertEquals(textoi23, textp34); + // this should grab 0-based page 2, i.e. 1-based page 3 // by the bookmark stripper.setStartBookmark(oi2); stripper.setEndBookmark(oi2); String textoi2 = stripper.getText(doc); assertFalse(textoi2.isEmpty()); - assertFalse(textoi2.equals(textoi23)); - assertFalse(textoi23.equals(textFull)); + Assert.assertNotEquals(textoi2, textoi23); + Assert.assertNotEquals(textoi23, textFull); String expectedTextoi2 = "Second at level 1\n" @@ -533,10 +545,10 @@ public void testStripByOutlineItems() throws IOException stripper.setEndPage(3); String textp3 = stripper.getText(doc); assertFalse(textp3.isEmpty()); - assertFalse(textp3.equals(textp34)); - assertFalse(textoi23.equals(textFull)); - assertTrue(textoi2.equals(textp3)); - + Assert.assertNotEquals(textp3, textp34); + Assert.assertNotEquals(textoi23, textFull); + Assert.assertEquals(textoi2, textp3); + // Test with orphan bookmark PDOutlineItem oiOrphan = new PDOutlineItem(); stripper.setStartBookmark(oiOrphan); @@ -604,6 +616,94 @@ public void testExtract() throws Exception } } + public void testTabula() throws IOException + { + File pdfFile = new File("src/test/resources/input", "eu-001.pdf"); + File outFile = new File("target/test-output", "eu-001.pdf-tabula.txt"); + File expectedOutFile = new File("src/test/resources/input", "eu-001.pdf-tabula.txt"); + File diffFile = new File("target/test-output", "eu-001.pdf-tabula-diff.txt"); + PDDocument tabulaDocument = PDDocument.load(pdfFile); + PDFTextStripper tabulaStripper = new PDFTabulaTextStripper(); + + OutputStream os = new FileOutputStream(outFile); + + os.write(0xEF); + os.write(0xBB); + os.write(0xBF); + + Writer writer = new BufferedWriter(new OutputStreamWriter(os, ENCODING)); + try + { + tabulaStripper.writeText(tabulaDocument, writer); + } + finally + { + writer.close(); + } + + os.close(); + + compareResult(expectedOutFile, outFile, pdfFile, false, diffFile); + + assertFalse(bFail); + } + + private class PDFTabulaTextStripper extends PDFTextStripper + { + PDFTabulaTextStripper() throws IOException + { + // empty + } + + @Override + protected float computeFontHeight(PDFont font) throws IOException + { + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) + { + // PDFBOX-2158 and PDFBOX-3130 + // files by Salmat eSolutions / ClibPDF Library + bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536)); + } + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = bbox.getHeight() / 2; + + // sometimes the bbox has very high values, but CapHeight is OK + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) + { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 + && (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = capHeight; + } + // PDFBOX-3464, PDFBOX-448: + // sometimes even CapHeight has very high value, but Ascent and Descent are ok + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (ascent > 0 && descent < 0 + && ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = (ascent - descent) / 2; + } + } + + // transformPoint from glyph space -> text space + float height; + if (font instanceof PDType3Font) + { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } + else + { + height = glyphHeight / 1000; + } + + return height; + } + } + /** * Set the tests in the suite for this test class. * diff --git a/pdfbox/src/test/java/org/apache/pdfbox/util/MatrixTest.java b/pdfbox/src/test/java/org/apache/pdfbox/util/MatrixTest.java index 1b1120fd785..efefa87e0eb 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/util/MatrixTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/util/MatrixTest.java @@ -17,16 +17,231 @@ import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSFloat; +import org.apache.pdfbox.cos.COSName; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; import org.junit.Test; -import static org.junit.Assert.*; /** * + * @author Neil McErlean * @author Tilman Hausherr */ public class MatrixTest { + @Test + public void testConstructionAndCopy() throws Exception + { + Matrix m1 = new Matrix(); + assertMatrixIsPristine(m1); + + Matrix m2 = m1.clone(); + assertNotSame(m1, m2); + assertMatrixIsPristine(m2); + } + + @Test + public void testGetScalingFactor() + { + // check scaling factor of an initial matrix + Matrix m1 = new Matrix(); + assertEquals(1, m1.getScalingFactorX(), 0); + assertEquals(1, m1.getScalingFactorY(), 0); + + // check scaling factor of an initial matrix + Matrix m2 = new Matrix(2, 4, 4, 2, 0, 0); + assertEquals((float) Math.sqrt(20), m2.getScalingFactorX(), 0); + assertEquals((float) Math.sqrt(20), m2.getScalingFactorY(), 0); + } + + @Test + public void testCreateMatrixUsingInvalidInput() + { + // anything but a COSArray is invalid and leads to an initial matrix + Matrix createMatrix = Matrix.createMatrix(COSName.A); + assertMatrixIsPristine(createMatrix); + + // a COSArray with fewer than 6 entries leads to an initial matrix + COSArray cosArray = new COSArray(); + cosArray.add(COSName.A); + createMatrix = Matrix.createMatrix(cosArray); + assertMatrixIsPristine(createMatrix); + + // a COSArray containing other kind of objects than COSNumber leads to an initial matrix + cosArray = new COSArray(); + for (int i = 0; i < 6; i++) + { + cosArray.add(COSName.A); + } + createMatrix = Matrix.createMatrix(cosArray); + assertMatrixIsPristine(createMatrix); + } + + @Test + public void testMultiplication() + { + // These matrices will not change - we use it to drive the various multiplications. + final Matrix const1 = new Matrix(); + final Matrix const2 = new Matrix(); + + // Create matrix with values + // [ 0, 1, 2 + // 1, 2, 3 + // 2, 3, 4] + for (int x = 0; x < 3; x++) + { + for (int y = 0; y < 3; y++) + { + const1.setValue(x, y, x + y); + const2.setValue(x, y, 8 + x + y); + } + } + + float[] m1MultipliedByM1 = new float[] { 5, 8, 11, 8, 14, 20, 11, 20, 29 }; + float[] m1MultipliedByM2 = new float[] { 29, 32, 35, 56, 62, 68, 83, 92, 101 }; + float[] m2MultipliedByM1 = new float[] { 29, 56, 83, 32, 62, 92, 35, 68, 101 }; + + Matrix var1 = const1.clone(); + Matrix var2 = const2.clone(); + + // Multiply two matrices together producing a new result matrix. + Matrix result = var1.multiply(var2); + assertEquals(const1, var1); + assertEquals(const2, var2); + assertMatrixValuesEqualTo(m1MultipliedByM2, result); + + // Multiply two matrices together with the result being written to a third matrix + // (Any existing values there will be overwritten). + result = var1.multiply(var2); + assertEquals(const1, var1); + assertEquals(const2, var2); + assertMatrixValuesEqualTo(m1MultipliedByM2, result); + + // Multiply two matrices together with the result being written into 'this' matrix + var1 = const1.clone(); + var2 = const2.clone(); + var1.concatenate(var2); + assertEquals(const2, var2); + assertMatrixValuesEqualTo(m2MultipliedByM1, var1); + + var1 = const1.clone(); + var2 = const2.clone(); + result = Matrix.concatenate(var1, var2); + assertEquals(const1, var1); + assertEquals(const2, var2); + assertMatrixValuesEqualTo(m2MultipliedByM1, result); + + // Multiply the same matrix with itself with the result being written into 'this' matrix + var1 = const1.clone(); + result = var1.multiply(var1); + assertEquals(const1, var1); + assertMatrixValuesEqualTo(m1MultipliedByM1, result); + } + + @Test + public void testOldMultiplication() throws Exception + { + // This matrix will not change - we use it to drive the various multiplications. + final Matrix testMatrix = new Matrix(); + + // Create matrix with values + // [ 0, 1, 2 + // 1, 2, 3 + // 2, 3, 4] + for (int x = 0; x < 3; x++) + { + for (int y = 0; y < 3; y++) + { + testMatrix.setValue(x, y, x + y); + } + } + + Matrix m1 = testMatrix.clone(); + Matrix m2 = testMatrix.clone(); + + // Multiply two matrices together producing a new result matrix. + Matrix product = m1.multiply(m2); + + assertNotSame(m1, product); + assertNotSame(m2, product); + + // Operand 1 should not have changed + assertMatrixValuesEqualTo(new float[] { 0, 1, 2, 1, 2, 3, 2, 3, 4 }, m1); + // Operand 2 should not have changed + assertMatrixValuesEqualTo(new float[] { 0, 1, 2, 1, 2, 3, 2, 3, 4 }, m2); + assertMatrixValuesEqualTo(new float[] { 5, 8, 11, 8, 14, 20, 11, 20, 29 }, product); + + // Multiply two matrices together with the result being written to a third matrix + // (Any existing values there will be overwritten). + Matrix resultMatrix = new Matrix(); + + Matrix retVal = m1.multiply(m2, resultMatrix); + assertSame(retVal, resultMatrix); + // Operand 1 should not have changed + assertMatrixValuesEqualTo(new float[] { 0, 1, 2, 1, 2, 3, 2, 3, 4 }, m1); + // Operand 2 should not have changed + assertMatrixValuesEqualTo(new float[] { 0, 1, 2, 1, 2, 3, 2, 3, 4 }, m2); + assertMatrixValuesEqualTo(new float[] { 5, 8, 11, 8, 14, 20, 11, 20, 29 }, resultMatrix); + + // Multiply two matrices together with the result being written into the other matrix + retVal = m1.multiply(m2, m2); + assertSame(retVal, m2); + // Operand 1 should not have changed + assertMatrixValuesEqualTo(new float[] { 0, 1, 2, 1, 2, 3, 2, 3, 4 }, m1); + assertMatrixValuesEqualTo(new float[] { 5, 8, 11, 8, 14, 20, 11, 20, 29 }, retVal); + + // Multiply two matrices together with the result being written into 'this' matrix + m1 = testMatrix.clone(); + m2 = testMatrix.clone(); + + retVal = m1.multiply(m2, m1); + assertSame(retVal, m1); + // Operand 2 should not have changed + assertMatrixValuesEqualTo(new float[] { 0, 1, 2, 1, 2, 3, 2, 3, 4 }, m2); + assertMatrixValuesEqualTo(new float[] { 5, 8, 11, 8, 14, 20, 11, 20, 29 }, retVal); + + // Multiply the same matrix with itself with the result being written into 'this' matrix + m1 = testMatrix.clone(); + + retVal = m1.multiply(m1, m1); + assertSame(retVal, m1); + assertMatrixValuesEqualTo(new float[] { 5, 8, 11, 8, 14, 20, 11, 20, 29 }, retVal); + } + + @Test(expected = IllegalArgumentException.class) + public void testIllegalValueNaN1() + { + Matrix m = new Matrix(); + m.setValue(0, 0, Float.MAX_VALUE); + m.multiply(m, m); + } + + @Test(expected = IllegalArgumentException.class) + public void testIllegalValueNaN2() + { + Matrix m = new Matrix(); + m.setValue(0, 0, Float.NaN); + m.multiply(m, m); + } + + @Test(expected = IllegalArgumentException.class) + public void testIllegalValuePositiveInfinity() + { + Matrix m = new Matrix(); + m.setValue(0, 0, Float.POSITIVE_INFINITY); + m.multiply(m, m); + } + + @Test(expected = IllegalArgumentException.class) + public void testIllegalValueNegativeInfinity() + { + Matrix m = new Matrix(); + m.setValue(0, 0, Float.NEGATIVE_INFINITY); + m.multiply(m, m); + } + /** * Test of PDFBOX-2872 bug */ @@ -44,5 +259,109 @@ public void testPdfbox2872() } + @Test + public void testGetValues() + { + Matrix m = new Matrix(2, 4, 4, 2, 15, 30); + float[][] values = m.getValues(); + assertEquals(2, values[0][0], 0); + assertEquals(4, values[0][1], 0); + assertEquals(0, values[0][2], 0); + assertEquals(4, values[1][0], 0); + assertEquals(2, values[1][1], 0); + assertEquals(0, values[1][2], 0); + assertEquals(15, values[2][0], 0); + assertEquals(30, values[2][1], 0); + assertEquals(1, values[2][2], 0); + } + + @Test + public void testScaling() + { + Matrix m = new Matrix(2, 4, 4, 2, 15, 30); + m.scale(2, 3); + // first row, multiplication with 2 + assertEquals(4, m.getValue(0, 0), 0); + assertEquals(8, m.getValue(0, 1), 0); + assertEquals(0, m.getValue(0, 2), 0); + + // second row, multiplication with 3 + assertEquals(12, m.getValue(1, 0), 0); + assertEquals(6, m.getValue(1, 1), 0); + assertEquals(0, m.getValue(1, 2), 0); + + // third row, no changes at all + assertEquals(15, m.getValue(2, 0), 0); + assertEquals(30, m.getValue(2, 1), 0); + assertEquals(1, m.getValue(2, 2), 0); + } + + @Test + public void testTranslation() + { + Matrix m = new Matrix(2, 4, 4, 2, 15, 30); + m.translate(2, 3); + // first row, no changes at all + assertEquals(2, m.getValue(0, 0), 0); + assertEquals(4, m.getValue(0, 1), 0); + assertEquals(0, m.getValue(0, 2), 0); + + // second row, no changes at all + assertEquals(4, m.getValue(1, 0), 0); + assertEquals(2, m.getValue(1, 1), 0); + assertEquals(0, m.getValue(1, 2), 0); + + // third row, translated values + assertEquals(31, m.getValue(2, 0), 0); + assertEquals(44, m.getValue(2, 1), 0); + assertEquals(1, m.getValue(2, 2), 0); + } + + /** + * This method asserts that the matrix values for the given {@link Matrix} object are equal to the pristine, or + * original, values. + * + * @param m the Matrix to test. + */ + private void assertMatrixIsPristine(Matrix m) + { + assertMatrixValuesEqualTo(new float[] { 1, 0, 0, 0, 1, 0, 0, 0, 1 }, m); + } + + /** + * This method asserts that the matrix values for the given {@link Matrix} object have the specified values. + * + * @param values the expected values + * @param m the matrix to test + */ + private void assertMatrixValuesEqualTo(float[] values, Matrix m) + { + float delta = 0.00001f; + for (int i = 0; i < values.length; i++) + { + // Need to convert a (row, column) coordinate into a straight index. + int row = (int) Math.floor(i / 3); + int column = i % 3; + StringBuilder failureMsg = new StringBuilder(); + failureMsg.append("Incorrect value for matrix[").append(row).append(",").append(column) + .append("]"); + assertEquals(failureMsg.toString(), values[i], m.getValue(row, column), delta); + } + } + //Uncomment annotation to run the test + // @Test + public void testMultiplicationPerformance() { + long start = System.currentTimeMillis(); + Matrix c; + Matrix d; + for (int i=0; i<100000000; i++) { + c = new Matrix(15, 3, 235, 55, 422, 1); + d = new Matrix(45, 345, 23, 551, 66, 832); + c.multiply(d); + c.concatenate(d); + } + long stop = System.currentTimeMillis(); + System.out.println("Matrix multiplication took " + (stop - start) + "ms."); + } } diff --git a/pdfbox/src/test/java/org/apache/pdfbox/util/TestDateUtil.java b/pdfbox/src/test/java/org/apache/pdfbox/util/TestDateUtil.java index 7311aba7c35..04ba3652887 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/util/TestDateUtil.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/util/TestDateUtil.java @@ -129,9 +129,9 @@ private static void checkParse(int yr, int mon, int day, int hr, int min, int sec, int offsetHours, int offsetMinutes, String orig) throws Exception { - String pdfDate = String.format("D:%04d%02d%02d%02d%02d%02d%+03d'%02d'", + String pdfDate = String.format(Locale.US, "D:%04d%02d%02d%02d%02d%02d%+03d'%02d'", yr,mon,day,hr,min,sec,offsetHours,offsetMinutes); - String iso8601Date = String.format("%04d-%02d-%02d" + String iso8601Date = String.format(Locale.US, "%04d-%02d-%02d" + "T%02d:%02d:%02d%+03d:%02d", yr,mon,day,hr,min,sec,offsetHours,offsetMinutes); Calendar cal = DateConverter.toCalendar(orig); @@ -167,6 +167,10 @@ public void testDateConverter() throws Exception // PDFBOX-1219 checkParse(2001, 1,31,10,33, 0, +1, 0, "2001-01-31T10:33+01:00 "); + + // Same with milliseconds + checkParse(2001, 1,31,10,33, 0, +1, 0, "2001-01-31T10:33.123+01:00"); + // PDFBOX-465 checkParse(2002, 5,12, 9,47, 0, 0, 0, "9:47 5/12/2002"); // PDFBOX-465 @@ -215,6 +219,7 @@ public void testDateConverter() throws Exception checkParse(2000, 2,29, 0, 0, 0, 0, 0, "2000 Feb 29"); // valid date checkParse(2000, 2,29, 0, 0, 0,+11, 0, " 2000 Feb 29 GMT + 11:00"); // valid date + checkParse(2000, 2,29, 0, 0, 0,+11, 0, " 2000 Feb 29 UTC + 11:00"); // valid date checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "2100 Feb 29 GMT+11"); // invalid date checkParse(2012, 2,29, 0, 0, 0,+11, 0, "2012 Feb 29 GMT+11"); // valid date checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "2012 Feb 30 GMT+11"); // invalid date @@ -295,6 +300,8 @@ public void testDateConverter() throws Exception // ambiguous big-endian date checkParse(2073,12,25, 0, 8, 0, 0, 0, "2073 12 25:08"); + // PDFBOX-3315 GMT+12 + checkParse(2016, 4,11,16,01,15, 12, 0, "D:20160411160115+12'00'"); } private static void checkToString(int yr, int mon, int day, @@ -305,9 +312,9 @@ private static void checkToString(int yr, int mon, int day, GregorianCalendar cal = new GregorianCalendar(tz, Locale.ENGLISH); cal.set(yr, mon-1, day, hr, min, sec); // create expected strings - String pdfDate = String.format("D:%04d%02d%02d%02d%02d%02d%+03d'%02d'", + String pdfDate = String.format(Locale.US, "D:%04d%02d%02d%02d%02d%02d%+03d'%02d'", yr,mon,day,hr,min,sec,offsetHours, offsetMinutes); - String iso8601Date = String.format("%04d-%02d-%02d" + String iso8601Date = String.format(Locale.US, "%04d-%02d-%02d" + "T%02d:%02d:%02d%+03d:%02d", yr,mon,day,hr,min,sec,offsetHours, offsetMinutes); // compare outputs from toString and toISO8601 with expected values @@ -332,6 +339,8 @@ public void testToString() throws Exception assertNull(DateConverter.toCalendar((COSString) null)); assertNull(DateConverter.toCalendar((String) null)); + assertNull(DateConverter.toCalendar("D: ")); + assertNull(DateConverter.toCalendar("D:")); checkToString(2013, 8, 28, 3, 14, 15, tzPgh, -4, 0); checkToString(2014, 2, 28, 3, 14, 15, tzPgh, -5, 0); @@ -344,18 +353,10 @@ public void testToString() throws Exception checkToString(2015, 8, 28, 3, 14, 15, tzAdelaide, +9, 30); checkToString(2016, 2, 28, 3, 14, 15, tzAdelaide, +10, 30); // McMurdo has a daylightsavings rule, but it seems never to apply - checkToString(1981, 1, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1982, 2, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1983, 3, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1984, 4, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1985, 5, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1986, 6, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1987, 7, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1988, 8, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1989, 9, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1990, 10, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1991, 11, 1, 1, 14, 15, tzMcMurdo, +0, 0); - checkToString(1992, 12, 1, 1, 14, 15, tzMcMurdo, +0, 0); + for (int m = 1; m <= 12; ++m) + { + checkToString(1980 + m, m, 1, 1, 14, 15, tzMcMurdo, +0, 0); + } } private static void checkParseTZ(int expect, String src) @@ -370,14 +371,15 @@ private static void checkParseTZ(int expect, String src) */ public void testParseTZ() { + // 1st parameter is what to expect checkParseTZ(0*HRS+0*MINS, "+00:00"); checkParseTZ(0*HRS+0*MINS, "-0000"); checkParseTZ(1*HRS+0*MINS, "+1:00"); checkParseTZ(-(1*HRS+0*MINS), "-1:00"); checkParseTZ(-(1*HRS+30*MINS), "-0130"); checkParseTZ(11*HRS+59*MINS, "1159"); - checkParseTZ(-(11*HRS+30*MINS), "1230"); - checkParseTZ(11*HRS+30*MINS, "-12:30"); + checkParseTZ(12*HRS+30*MINS, "1230"); + checkParseTZ(-(12*HRS+30*MINS), "-12:30"); checkParseTZ(0*HRS+0*MINS, "Z"); checkParseTZ(-(8*HRS+0*MINS), "PST"); checkParseTZ(0*HRS+0*MINS, "EDT"); // EDT does not parse @@ -391,6 +393,11 @@ public void testParseTZ() checkParseTZ((5*HRS+0*MINS), "+0500"); checkParseTZ((11*HRS+0*MINS), "+11'00'"); checkParseTZ(0, "Z"); + // PDFBOX-3315, PDFBOX-2420 + checkParseTZ(12*HRS+0*MINS, "+12:00"); + checkParseTZ(-(12*HRS+0*MINS), "-12:00"); + checkParseTZ(14*HRS+0*MINS, "1400"); + checkParseTZ(-(14*HRS+0*MINS), "-1400"); } private static void checkFormatOffset(double off, String expect) @@ -405,16 +412,17 @@ private static void checkFormatOffset(double off, String expect) */ public void testFormatTZoffset() { - checkFormatOffset(-12.1, "+11:54"); - checkFormatOffset(12.1, "-11:54"); + // 2nd parameter is what to expect + checkFormatOffset(-12.1, "-12:06"); + checkFormatOffset(12.1, "+12:06"); checkFormatOffset(0, "+00:00"); checkFormatOffset(-1, "-01:00"); checkFormatOffset(.5, "+00:30"); checkFormatOffset(-0.5, "-00:30"); checkFormatOffset(.1, "+00:06"); checkFormatOffset(-0.1, "-00:06"); - checkFormatOffset(-12, "+00:00"); - checkFormatOffset(12, "+00:00"); + checkFormatOffset(-12, "-12:00"); + checkFormatOffset(12, "+12:00"); checkFormatOffset(-11.5, "-11:30"); checkFormatOffset(11.5, "+11:30"); checkFormatOffset(11.9, "+11:54"); diff --git a/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java b/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java new file mode 100644 index 00000000000..ce02d57a71f --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java @@ -0,0 +1,107 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.util; + +import java.io.IOException; +import java.util.Locale; +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; +import static org.junit.Assert.assertArrayEquals; + +/** + * + * @author Michael Doswald + */ +public class TestHexUtil extends TestCase +{ + + /** + * Test conversion from short to char[] + */ + public void testGetCharsFromShortWithoutPassingInABuffer() + { + assertArrayEquals(new char[]{'0','0','0','0'}, Hex.getChars((short)0x0000)); + assertArrayEquals(new char[]{'0','0','0','F'}, Hex.getChars((short)0x000F)); + assertArrayEquals(new char[]{'A','B','C','D'}, Hex.getChars((short)0xABCD)); + assertArrayEquals(new char[]{'B','A','B','E'}, Hex.getChars((short)0xCAFEBABE)); + } + + /** + * Check conversion from String to a char[] which contains the UTF16-BE encoded + * bytes of the string as hex digits + * + */ + public void testGetCharsUTF16BE() + { + assertArrayEquals(new char[]{'0','0','6','1','0','0','6','2'}, Hex.getCharsUTF16BE("ab")); + assertArrayEquals(new char[]{'5','E','2','E','5','2','A','9'}, Hex.getCharsUTF16BE("帮助")); + } + + /** + * Test getBytes() and getString() and decodeHex() + */ + public void testMisc() throws IOException + { + byte[] byteSrcArray = new byte[256]; + for (int i = 0; i < 256; ++i) + { + byteSrcArray[i] = (byte) i; + + byte[] bytes = Hex.getBytes((byte) i); + assertEquals(2, bytes.length); + String s2 = String.format(Locale.US, "%02X", i); + assertArrayEquals(s2.getBytes(Charsets.US_ASCII), bytes); + s2 = Hex.getString((byte) i); + assertArrayEquals(s2.getBytes(Charsets.US_ASCII), bytes); + + assertArrayEquals(new byte[]{(byte) i}, Hex.decodeHex(s2)); + } + byte[] byteDstArray = Hex.getBytes(byteSrcArray); + assertEquals(byteDstArray.length, byteSrcArray.length * 2); + + String dstString = Hex.getString(byteSrcArray); + assertEquals(dstString.length(), byteSrcArray.length * 2); + + assertArrayEquals(dstString.getBytes(Charsets.US_ASCII), byteDstArray); + + assertArrayEquals(byteSrcArray, Hex.decodeHex(dstString)); + } + + /** + * Set the tests in the suite for this test class. + * + * @return the Suite. + */ + public static Test suite() + { + return new TestSuite(TestHexUtil.class); + } + + /** + * Command line execution. + * + * @param args Command line arguments. + */ + public static void main(String[] args) + { + String[] arg = + { + TestHexUtil.class.getName() + }; + junit.textui.TestRunner.main(arg); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/util/TestMatrix.java b/pdfbox/src/test/java/org/apache/pdfbox/util/TestMatrix.java deleted file mode 100644 index 11b18bc8539..00000000000 --- a/pdfbox/src/test/java/org/apache/pdfbox/util/TestMatrix.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.util; - -import java.io.IOException; - -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; - -/** - * Test the {@link Matrix} class. - * @author Neil McErlean - * @since 1.4.0 - */ -public class TestMatrix extends TestCase -{ - /** - * Test class constructor. - * - * @param name The name of the test class. - * - * @throws IOException If there is an error creating the test. - */ - public TestMatrix( String name ) throws IOException - { - super( name ); - } - - public void testConstructionAndCopy() throws Exception - { - Matrix m1 = new Matrix(); - assertMatrixIsPristine(m1); - - Matrix m2 = m1.clone(); - assertNotSame(m1, m2); - assertMatrixIsPristine(m2); - } - - public void testMultiplication() throws Exception - { - // This matrix will not change - we use it to drive the various multiplications. - final Matrix testMatrix = new Matrix(); - - // Create matrix with values - // [ 0, 1, 2 - // 1, 2, 3 - // 2, 3, 4] - for (int x = 0; x < 3; x++) - { - for (int y = 0; y < 3; y++) - { - testMatrix.setValue(x, y, x + y); - } - } - - Matrix m1 = testMatrix.clone(); - Matrix m2 = testMatrix.clone(); - - // Multiply two matrices together producing a new result matrix. - Matrix product = m1.multiply(m2); - - assertNotSame(m1, product); - assertNotSame(m2, product); - - // Operand 1 should not have changed - assertMatrixValuesEqualTo(new float[] {0, 1, 2, - 1, 2, 3, - 2, 3, 4}, m1); - // Operand 2 should not have changed - assertMatrixValuesEqualTo(new float[] {0, 1, 2, - 1, 2, 3, - 2, 3, 4}, m2); - assertMatrixValuesEqualTo(new float[] {5, 8, 11, - 8, 14, 20, - 11, 20, 29}, product); - - // Multiply two matrices together with the result being written to a third matrix - // (Any existing values there will be overwritten). - Matrix resultMatrix = new Matrix(); - - Matrix retVal = m1.multiply(m2, resultMatrix); - assertSame(retVal, resultMatrix); - // Operand 1 should not have changed - assertMatrixValuesEqualTo(new float[] {0, 1, 2, - 1, 2, 3, - 2, 3, 4}, m1); - // Operand 2 should not have changed - assertMatrixValuesEqualTo(new float[] {0, 1, 2, - 1, 2, 3, - 2, 3, 4}, m2); - assertMatrixValuesEqualTo(new float[] {5, 8, 11, - 8, 14, 20, - 11, 20, 29}, resultMatrix); - - - - // Multiply two matrices together with the result being written into the other matrix - retVal = m1.multiply(m2, m2); - assertSame(retVal, m2); - // Operand 1 should not have changed - assertMatrixValuesEqualTo(new float[] {0, 1, 2, - 1, 2, 3, - 2, 3, 4}, m1); - assertMatrixValuesEqualTo(new float[] {5, 8, 11, - 8, 14, 20, - 11, 20, 29}, retVal); - - - - // Multiply two matrices together with the result being written into 'this' matrix - m1 = testMatrix.clone(); - m2 = testMatrix.clone(); - - retVal = m1.multiply(m2, m1); - assertSame(retVal, m1); - // Operand 2 should not have changed - assertMatrixValuesEqualTo(new float[] {0, 1, 2, - 1, 2, 3, - 2, 3, 4}, m2); - assertMatrixValuesEqualTo(new float[] {5, 8, 11, - 8, 14, 20, - 11, 20, 29}, retVal); - - - - // Multiply the same matrix with itself with the result being written into 'this' matrix - m1 = testMatrix.clone(); - - retVal = m1.multiply(m1, m1); - assertSame(retVal, m1); - assertMatrixValuesEqualTo(new float[] {5, 8, 11, - 8, 14, 20, - 11, 20, 29}, retVal); - } - - /** - * This method asserts that the matrix values for the given {@link Matrix} object are equal - * to the pristine, or original, values. - * @param m the Matrix to test. - */ - private void assertMatrixIsPristine(Matrix m) - { - assertMatrixValuesEqualTo(new float[] {1 ,0 ,0, - 0, 1, 0, - 0, 0, 1}, m); - } - - /** - * This method asserts that the matrix values for the given {@link Matrix} object have - * the specified values. - * @param values the expected values - * @param m the matrix to test - */ - private void assertMatrixValuesEqualTo(float[] values, Matrix m) - { - float delta = 0.00001f; - for (int i = 0; i < values.length; i++) - { - // Need to convert a (row, column) co-ordinate into a straight index. - int row = (int)Math.floor(i / 3); - int column = i % 3; - StringBuilder failureMsg = new StringBuilder(); - failureMsg.append("Incorrect value for matrix[") - .append(row).append(",").append(column).append("]"); - assertEquals(failureMsg.toString(), values[i], m.getValue(row, column), delta); - } - } - - /** - * Set the tests in the suite for this test class. - * - * @return the Suite. - */ - public static Test suite() - { - return new TestSuite( TestMatrix.class ); - } - - /** - * Command line execution. - * - * @param args Command line arguments. - */ - public static void main( String[] args ) - { - String[] arg = {TestMatrix.class.getName() }; - junit.textui.TestRunner.main( arg ); - } -} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java b/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java new file mode 100644 index 00000000000..4522e0d6964 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java @@ -0,0 +1,202 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.util; + +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; +import java.util.Arrays; +import java.util.regex.Pattern; +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; +import org.junit.Assert; + +/** + * + * @author Michael Doswald + */ +public class TestNumberFormatUtil extends TestCase +{ + + private final byte[] buffer = new byte[64]; + + public void testFormatOfIntegerValues() + { + assertEquals(2, NumberFormatUtil.formatFloatFast(51, 5, buffer)); + assertArrayEquals(new byte[]{'5', '1'}, Arrays.copyOfRange(buffer, 0, 2)); + + assertEquals(3, NumberFormatUtil.formatFloatFast(-51, 5, buffer)); + assertArrayEquals(new byte[]{'-', '5', '1'}, Arrays.copyOfRange(buffer, 0, 3)); + + assertEquals(1, NumberFormatUtil.formatFloatFast(0, 5, buffer)); + assertArrayEquals(new byte[]{'0'}, Arrays.copyOfRange(buffer, 0, 1)); + + assertEquals(19, NumberFormatUtil.formatFloatFast(Long.MAX_VALUE, 5, buffer)); + assertArrayEquals(new byte[]{'9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5', + '4', '7', '7', '5', '8', '0', '7'}, + Arrays.copyOfRange(buffer, 0, 19)); + + //Note: Integer.MAX_VALUE would be 2147483647, but when converting to float, we have + // precision errors. NumberFormat.getIntegerInstance() does also print 2147483648 for + // such a float + assertEquals(10, NumberFormatUtil.formatFloatFast(Integer.MAX_VALUE, 5, buffer)); + assertArrayEquals(new byte[]{'2', '1', '4', '7', '4', '8', '3', '6', '4', '8'}, + Arrays.copyOfRange(buffer, 0, 10)); + + assertEquals(11, NumberFormatUtil.formatFloatFast(Integer.MIN_VALUE, 5, buffer)); + assertArrayEquals(new byte[]{'-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '8'}, + Arrays.copyOfRange(buffer, 0, 11)); + } + + public void testFormatOfRealValues() + { + assertEquals(3, NumberFormatUtil.formatFloatFast(0.7f, 5, buffer)); + assertArrayEquals(new byte[]{'0', '.', '7'}, Arrays.copyOfRange(buffer, 0, 3)); + + assertEquals(4, NumberFormatUtil.formatFloatFast(-0.7f, 5, buffer)); + assertArrayEquals(new byte[]{'-', '0', '.', '7'}, Arrays.copyOfRange(buffer, 0, 4)); + + assertEquals(5, NumberFormatUtil.formatFloatFast(0.003f, 5, buffer)); + assertArrayEquals(new byte[]{'0', '.', '0', '0', '3'}, Arrays.copyOfRange(buffer, 0, 5)); + + assertEquals(6, NumberFormatUtil.formatFloatFast(-0.003f, 5, buffer)); + assertArrayEquals(new byte[]{'-', '0', '.', '0', '0', '3'}, + Arrays.copyOfRange(buffer, 0, 6)); + } + + public void testFormatOfRealValuesReturnsMinusOneIfItCannotBeFormatted() + { + assertEquals("NaN should not be formattable", -1, + NumberFormatUtil.formatFloatFast(Float.NaN, 5, buffer)); + assertEquals("+Infinity should not be formattable", -1, + NumberFormatUtil.formatFloatFast(Float.POSITIVE_INFINITY, 5, buffer)); + assertEquals("-Infinity should not be formattable", -1, + NumberFormatUtil.formatFloatFast(Float.NEGATIVE_INFINITY, 5, buffer)); + + assertEquals("Too big number should not be formattable", -1, + NumberFormatUtil.formatFloatFast(((float) Long.MAX_VALUE) + 1000000000000f, 5, buffer)); + assertEquals("Too big negative number should not be formattable", -1, + NumberFormatUtil.formatFloatFast(Long.MIN_VALUE, 5, buffer)); + } + + public void testRoundingUp() + { + assertEquals(1, NumberFormatUtil.formatFloatFast(0.999999f, 5, buffer)); + assertArrayEquals(new byte[]{'1'}, Arrays.copyOfRange(buffer, 0, 1)); + + assertEquals(4, NumberFormatUtil.formatFloatFast(0.125f, 2, buffer)); + assertArrayEquals(new byte[]{'0','.','1','3'}, Arrays.copyOfRange(buffer, 0, 4)); + + assertEquals(2, NumberFormatUtil.formatFloatFast(-0.999999f, 5, buffer)); + assertArrayEquals(new byte[]{'-','1'}, Arrays.copyOfRange(buffer, 0, 2)); + } + + public void testRoundingDown() + { + assertEquals(4, NumberFormatUtil.formatFloatFast(0.994f, 2, buffer)); + assertArrayEquals(new byte[]{'0','.','9','9'}, Arrays.copyOfRange(buffer, 0, 4)); + } + + /** + * Formats all floats in a defined range, parses them back with the BigDecimal constructor and + * compares them to the expected result. The test only tests a small range for performance + * reasons. It works for ranges up to at least A0 size: + * + *
      + *
    • PDF uses 72 dpi resolution
    • + *
    • A0 size is 841mm x 1189mm, this equals to about 2472 x 3495 in dot resolution
    • + *
    + */ + public void testFormattingInRange() + { + //Define a range to test + BigDecimal minVal = new BigDecimal("-10"); + BigDecimal maxVal = new BigDecimal("10"); + BigDecimal maxDelta = BigDecimal.ZERO; + + Pattern pattern = Pattern.compile("^\\-?\\d+(\\.\\d+)?$"); + + byte[] formatBuffer = new byte[32]; + + for (int maxFractionDigits = 0; maxFractionDigits <= 5; maxFractionDigits++) + { + BigDecimal increment = new BigDecimal(10).pow(-maxFractionDigits, MathContext.DECIMAL128); + + for (BigDecimal value = minVal; value.compareTo(maxVal) < 0; value = value.add(increment)) + { + //format with the formatFloatFast method and parse back + int byteCount = NumberFormatUtil.formatFloatFast(value.floatValue(), maxFractionDigits, formatBuffer); + Assert.assertNotEquals(-1, byteCount); + String newStringResult = new String(formatBuffer, 0, byteCount, Charsets.US_ASCII); + BigDecimal formattedDecimal = new BigDecimal(newStringResult); + + //create new BigDecimal with float representation. This is needed because the float + //may not represent the 'value' BigDecimal precisely, in which case the formatFloatFast + //would get a different result. + BigDecimal expectedDecimal = new BigDecimal(value.floatValue()); + expectedDecimal = expectedDecimal.setScale(maxFractionDigits, RoundingMode.HALF_UP); + + BigDecimal diff = formattedDecimal.subtract(expectedDecimal).abs(); + + assertTrue(pattern.matcher(newStringResult).matches()); + + //Fail if diff is greater than maxDelta. + if (diff.compareTo(maxDelta) > 0) + { + fail("Expected: " + expectedDecimal + ", actual: " + newStringResult + ", diff: " + diff); + } + } + } + } + + private void assertArrayEquals(byte[] expected, byte[] actual) + { + assertEquals("Length of byte array not equal", expected.length, actual.length); + for (int idx = 0; idx < expected.length; idx++) + { + if (expected[idx] != actual[idx]) + { + fail(String.format("Byte at index %d not equal. Expected '%02X' but got '%02X'", + idx, expected[idx], actual[idx])); + } + } + } + + /** + * Set the tests in the suite for this test class. + * + * @return the Suite. + */ + public static Test suite() + { + return new TestSuite(TestNumberFormatUtil.class); + } + + /** + * Command line execution. + * + * @param args Command line arguments. + */ + public static void main(String[] args) + { + String[] arg = + { + TestNumberFormatUtil.class.getName() + }; + junit.textui.TestRunner.main(arg); + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/util/package.html b/pdfbox/src/test/java/org/apache/pdfbox/util/package.html index 046deef9d42..78d799962f4 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/util/package.html +++ b/pdfbox/src/test/java/org/apache/pdfbox/util/package.html @@ -15,8 +15,8 @@ ! limitations under the License. !--> - - + + diff --git a/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf b/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf new file mode 100644 index 00000000000..5ccc1e807a0 Binary files /dev/null and b/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf differ diff --git a/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf-sorted.txt b/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf-sorted.txt new file mode 100644 index 00000000000..c477aafc78a --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf-sorted.txt @@ -0,0 +1,12 @@ +Apache PDFBox ® +90° text matrix +Apache PDFBox ® +90° ctm +Apache PDFBox ® +180° text matrix +Apache PDFBox ® +180° ctm +Apache PDFBox ® +270° text matrix +Apache PDFBox ® +270° ctm diff --git a/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf.txt b/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf.txt new file mode 100644 index 00000000000..c477aafc78a --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-2984-rotations.pdf.txt @@ -0,0 +1,12 @@ +Apache PDFBox ® +90° text matrix +Apache PDFBox ® +90° ctm +Apache PDFBox ® +180° text matrix +Apache PDFBox ® +180° ctm +Apache PDFBox ® +270° text matrix +Apache PDFBox ® +270° ctm diff --git a/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf b/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf new file mode 100644 index 00000000000..cd40ceca17e Binary files /dev/null and b/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf differ diff --git a/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf-sorted.txt b/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf-sorted.txt new file mode 100644 index 00000000000..3210f1f67af --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf-sorted.txt @@ -0,0 +1 @@ +IN THE COURT OF CHANCERY OF THE STATE OF DELAWARE  diff --git a/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf.txt b/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf.txt new file mode 100644 index 00000000000..3210f1f67af --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-3498-Y5TLCWTIAE3FYDVJTV2TXRZGXLEDUNSW.pdf.txt @@ -0,0 +1 @@ +IN THE COURT OF CHANCERY OF THE STATE OF DELAWARE  diff --git a/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf b/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf new file mode 100644 index 00000000000..785dc9a99ef Binary files /dev/null and b/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf differ diff --git a/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf-sorted.txt b/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf-sorted.txt new file mode 100644 index 00000000000..bf0bbe4a70b --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf-sorted.txt @@ -0,0 +1 @@ +ターン diff --git a/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf.txt b/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf.txt new file mode 100644 index 00000000000..bf0bbe4a70b --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-3833-reduced.pdf.txt @@ -0,0 +1 @@ +ターン diff --git a/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf b/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf new file mode 100644 index 00000000000..644bd752f45 Binary files /dev/null and b/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf differ diff --git a/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf-sorted.txt b/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf-sorted.txt new file mode 100644 index 00000000000..1ec7b784ec6 --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf-sorted.txt @@ -0,0 +1 @@ +Justin diff --git a/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf.txt b/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf.txt new file mode 100644 index 00000000000..1ec7b784ec6 --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-4322-Empty-ToUnicode-reduced.pdf.txt @@ -0,0 +1 @@ +Justin diff --git a/pdfbox/src/test/resources/input/PDFBOX-5002.pdf b/pdfbox/src/test/resources/input/PDFBOX-5002.pdf new file mode 100644 index 00000000000..71e240bbb72 Binary files /dev/null and b/pdfbox/src/test/resources/input/PDFBOX-5002.pdf differ diff --git a/pdfbox/src/test/resources/input/PDFBOX-5002.pdf-sorted.txt b/pdfbox/src/test/resources/input/PDFBOX-5002.pdf-sorted.txt new file mode 100644 index 00000000000..47f7bbc0a06 --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-5002.pdf-sorted.txt @@ -0,0 +1,3 @@ +Title in a big font some text with +a smaller font +on multiple lines diff --git a/pdfbox/src/test/resources/input/PDFBOX-5002.pdf.txt b/pdfbox/src/test/resources/input/PDFBOX-5002.pdf.txt new file mode 100644 index 00000000000..47f7bbc0a06 --- /dev/null +++ b/pdfbox/src/test/resources/input/PDFBOX-5002.pdf.txt @@ -0,0 +1,3 @@ +Title in a big font some text with +a smaller font +on multiple lines diff --git a/pdfbox/src/test/resources/input/cweb.pdf-sorted.txt b/pdfbox/src/test/resources/input/cweb.pdf-sorted.txt index 046b9acfc39..7f20da9c28c 100644 --- a/pdfbox/src/test/resources/input/cweb.pdf-sorted.txt +++ b/pdfbox/src/test/resources/input/cweb.pdf-sorted.txt @@ -801,7 +801,7 @@ change limit = change buffer ; /∗ this value is used if the change file ends long as they don’t begin with @y, @z, or @i (which would probably mean that the change file is fouled up). 〈Skip over comment lines in the change file; return if end of file 13 〉 ≡ while (1) { -change line++; +change line ++; if (¬input ln (change file )) return; if (limit < buffer + 2) continue; if (buffer [0] 6= ’@’) continue; @@ -816,7 +816,7 @@ This code is used in section 12. 14. Here we are looking at lines following the @x. 〈Skip to the next nonblank line; return if end of file 14 〉 ≡ do { -change line++; +change line ++; if (¬input ln (change file )) { err print ("! Change file ended after @x"); return; diff --git a/pdfbox/src/test/resources/input/cweb.pdf.txt b/pdfbox/src/test/resources/input/cweb.pdf.txt index 734e2bb74ba..edceca8231b 100644 --- a/pdfbox/src/test/resources/input/cweb.pdf.txt +++ b/pdfbox/src/test/resources/input/cweb.pdf.txt @@ -801,7 +801,7 @@ change limit = change buffer ; /∗ this value is used if the change file ends long as they don’t begin with @y, @z, or @i (which would probably mean that the change file is fouled up). 〈Skip over comment lines in the change file; return if end of file 13 〉 ≡ while (1) { -change line++; +change line ++; if (¬input ln (change file )) return; if (limit < buffer + 2) continue; if (buffer [0] 6= ’@’) continue; @@ -816,7 +816,7 @@ This code is used in section 12. 14. Here we are looking at lines following the @x. 〈Skip to the next nonblank line; return if end of file 14 〉 ≡ do { -change line++; +change line ++; if (¬input ln (change file )) { err print ("! Change file ended after @x"); return; diff --git a/pdfbox/src/test/resources/input/eu-001.pdf b/pdfbox/src/test/resources/input/eu-001.pdf new file mode 100644 index 00000000000..20680bda43d Binary files /dev/null and b/pdfbox/src/test/resources/input/eu-001.pdf differ diff --git a/pdfbox/src/test/resources/input/eu-001.pdf-sorted.txt b/pdfbox/src/test/resources/input/eu-001.pdf-sorted.txt new file mode 100644 index 00000000000..52ba6be350f --- /dev/null +++ b/pdfbox/src/test/resources/input/eu-001.pdf-sorted.txt @@ -0,0 +1,163 @@ +E-PRTR pollutants and their thresholds + +A facility has to report data under E-PRTR if it fulfils the following criteria: +• the facility falls under at least one of the 65 E-PRTR economic activities. The +activities are also reported using a statistical classification of economic activities +(NACE rev 2) +• the facility has a capacity exceeding at least one of the E-PRTR capacity +thresholds +• the facility releases pollutants or transfers waste off-site which exceed specific +thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for +releases of pollutants are specified for each media - air, water and land - in Annex +II of the E-PRTR Regulation. + +In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken +down by the 7 groups used in all the searches of the E-PRTR website. + + +Greenhouse gases + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Carbon dioxide (CO2) 100 million - - +Hydro-fluorocarbons (HFCs) 100 - - +Methane (CH4) 100 000 - - +Nitrous oxide (N2O) 10 000 - - +Perfluorocarbons (PFCs) 100 - - +Sulphur hexafluoride (SF6) 50 - - + +Other gases + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Ammonia (NH3) 10 000 - - +Carbon monoxide (CO) 500 000 - - +Chlorine and inorganic compounds +(as HCl) 10 000 - - +Chlorofluorocarbons (CFCs) 1 - - +Flourine and inorganic compounds +(as HF) 5 000 - - +Halons 1 - - +Hydrochlorofluorocarbons (HCFCs) 1 - - +Hydrogen Cyanide (HCN) 200 - - +Nitrogen oxides (NOx/NO2) 100 000 - - +Non-methane volatile organic +compounds (NMVOC) 100 000 - - +Sulphur oxides (SOx/SO2) 150 000 - - + +Heavy metals + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Arsenic and compounds (as As) 20 5 5 +Cadmium and compounds (as Cd) 10 5 5 +Chromium and compounds (as Cr) 100 50 50 +Copper and compounds (as Cu) 100 50 50 +Lead and compounds (as Pb) 200 20 20 +Mercury and compounds (as Hg) 10 1 1 +Nickel and compounds (as Ni) 50 20 20 +Zinc and compounds (as Zn) 200 100 100 + +Pesticides + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +1,2,3,4,5,6- hexachlorocyclohexane +(HCH) 10 1 1 +Alachlor - 1 1 +Aldrin 1 1 1 +Atrazine - 1 1 +Chlordane 1 1 1 +Chlordecone 1 1 1 +Chlorfenvinphos - 1 1 +Chlorpyrifos - 1 1 +DDT 1 1 1 +Diuron - 1 1 +Endosulphan - 1 1 +Endrin 1 1 1 +Heptachlor 1 1 1 +Isodrin - 1 - +Isoproturon - 1 1 +Lindane 1 1 1 +Mirex 1 1 1 +Simazine - 1 1 +Toxaphene 1 1 1 +Tributylin and compounds - 1 1 +Trifluralin - 1 1 +Triphenyltin and compounds - 1 1 + +Chlorinated organic substances + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +1,1,1-trichloroethane 100 - - +1,1,2,2-tetrachloroethane 50 - - +1,2-dichloroethane (EDC) 1 000 10 10 +Brominated diphenylethers (PBDE) - 1 1 +Chloro-alkanes, C10-C13 - 1 1 +Dichloromethane (DCM) 1 000 10 10 +Dieldrin 1 1 1 +Halogenated Organic Compounds (AOX) - 1 000 1 000 +Hexabromobifenyl 0,1 0,1 0,1 +Hexachlorobenzene (HCB) 10 1 1 +Hexachlorobutadiene (HCBD) - 1 1 +PCDD+PCFD (Dioxins+furans) (as Teq) 0,0001 0,0001 0,0001 +Pentachlorobenzene 1 1 1 +Pentachlorophenol (PCP) 10 1 1 +Polychlorinated biphenyls (PCB) 0,1 0,1 0,1 +Tetrachloroethylene (PER) 2 000 10 - +Tetrachloromethane (TCM) 100 1 - +Trichlorobenzenes (TCBs) (all isomers) 10 1 - +Trichloroethylene 2 000 10 - +Trichloromethane 500 10 - +Vynil chloride 1 000 10 10 + + +Other organic substances + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Anthracene 50 1 1 +Benzene 1 000 200 (as 200 (as +BTEX) BTEX) +Benzo(g,h,i)perylene - 1 - +Di-(2-ethyl hexyl) phthalate (DEHP) 10 1 1 +Ethyl benzene - 200 (as 200 (as +BTEX) BTEX) +Ethylene oxide 1 000 10 10 +Fluoranthene - 1 - +Naphthalene 100 10 10 +Nonylphenol and Nonylphenol ethoxylates +(NP/NPEs) - 1 1 +Octylphenols and octylphenol ethoxylates - 1 - +Organotin compounds (as total Sn) - 50 50 +Phenols (as total C) - 20 20 +Polycyclic Aromatic hydrocarbons (PAHs) 50 5 5 +Toluene - 200 (as 200 (as +BTEX) BTEX) +Total Organic Carbon (TOC) (as total C or +COD/3) - 50 000 - +Xylenes - 200 (as 200 (as +BTEX) BTEX) + + +Inorganic substances + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Asbestos 1 1 1 +Chlorides (as total Cl) - 2 million 2 million +Cyanides (as total CN) - 50 50 +Fluorides (as total F) - 2 000 2 000 +Particulate matter (PM10) 50 000 - - +Total Nitrogen - 50 000 50 000 +Total Phosphorus - 5 000 5 000 + + diff --git a/pdfbox/src/test/resources/input/eu-001.pdf-tabula.txt b/pdfbox/src/test/resources/input/eu-001.pdf-tabula.txt new file mode 100644 index 00000000000..ffdbba11cc6 --- /dev/null +++ b/pdfbox/src/test/resources/input/eu-001.pdf-tabula.txt @@ -0,0 +1,209 @@ +E-PRTR pollutants and their thresholds + +A facility has to report data under E-PRTR if it fulfils the following criteria: +• the facility falls under at least one of the 65 E-PRTR economic activities. The +activities are also reported using a statistical classification of economic activities +(NACE rev 2) +• the facility has a capacity exceeding at least one of the E-PRTR capacity +thresholds +• the facility releases pollutants or transfers waste off-site which exceed specific +thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for +releases of pollutants are specified for each media - air, water and land - in Annex +II of the E-PRTR Regulation. + +In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken +down by the 7 groups used in all the searches of the E-PRTR website. + + +Greenhouse gases + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Carbon dioxide (CO2) 100 million - - +Hydro-fluorocarbons (HFCs) 100 - - +Methane (CH4) 100 000 - - +Nitrous oxide (N2O) 10 000 - - +Perfluorocarbons (PFCs) 100 - - +Sulphur hexafluoride (SF6) 50 - - + +Other gases + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Ammonia (NH3) 10 000 - - +Carbon monoxide (CO) 500 000 - - +Chlorine and inorganic compounds +(as HCl) +10 000 - - +Chlorofluorocarbons (CFCs) 1 - - +Flourine and inorganic compounds +(as HF) +5 000 - - +Halons 1 - - +Hydrochlorofluorocarbons (HCFCs) 1 - - +Hydrogen Cyanide (HCN) 200 - - +Nitrogen oxides (NOx/NO2) 100 000 - - +Non-methane volatile organic +compounds (NMVOC) +100 000 - - +Sulphur oxides (SOx/SO2) 150 000 - - + +Heavy metals + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Arsenic and compounds (as As) 20 5 5 +Cadmium and compounds (as Cd) 10 5 5 +Chromium and compounds (as Cr) 100 50 50 +Copper and compounds (as Cu) 100 50 50 +Lead and compounds (as Pb) 200 20 20 +Mercury and compounds (as Hg) 10 1 1 +Nickel and compounds (as Ni) 50 20 20 +Zinc and compounds (as Zn) 200 100 100 + +Pesticides + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +1,2,3,4,5,6- hexachlorocyclohexane +(HCH) +10 1 1 +Alachlor - 1 1 +Aldrin 1 1 1 +Atrazine - 1 1 +Chlordane 1 1 1 +Chlordecone 1 1 1 +Chlorfenvinphos - 1 1 +Chlorpyrifos - 1 1 +DDT 1 1 1 +Diuron - 1 1 +Endosulphan - 1 1 +Endrin 1 1 1 +Heptachlor 1 1 1 +Isodrin - 1 - +Isoproturon - 1 1 +Lindane 1 1 1 +Mirex 1 1 1 +Simazine - 1 1 +Toxaphene 1 1 1 +Tributylin and compounds - 1 1 +Trifluralin - 1 1 +Triphenyltin and compounds - 1 1 + +Chlorinated organic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +1,1,1-trichloroethane 100 - - +1,1,2,2-tetrachloroethane 50 - - +1,2-dichloroethane (EDC) 1 000 10 10 +Brominated diphenylethers (PBDE) - 1 1 +Chloro-alkanes, C10-C13 - 1 1 +Dichloromethane (DCM) 1 000 10 10 +Dieldrin 1 1 1 +Halogenated Organic Compounds (AOX) - 1 000 1 000 +Hexabromobifenyl 0,1 0,1 0,1 +Hexachlorobenzene (HCB) 10 1 1 +Hexachlorobutadiene (HCBD) - 1 1 +PCDD+PCFD (Dioxins+furans) (as Teq) 0,0001 0,0001 0,0001 +Pentachlorobenzene 1 1 1 +Pentachlorophenol (PCP) 10 1 1 +Polychlorinated biphenyls (PCB) 0,1 0,1 0,1 +Tetrachloroethylene (PER) 2 000 10 - +Tetrachloromethane (TCM) 100 1 - +Trichlorobenzenes (TCBs) (all isomers) 10 1 - +Trichloroethylene 2 000 10 - +Trichloromethane 500 10 - +Vynil chloride 1 000 10 10 + + +Other organic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Anthracene 50 1 1 +Benzene 1 000 +200 (as +BTEX) +200 (as +BTEX) +Benzo(g,h,i)perylene - 1 - +Di-(2-ethyl hexyl) phthalate (DEHP) 10 1 1 +Ethyl benzene - +200 (as +BTEX) +200 (as +BTEX) +Ethylene oxide 1 000 10 10 +Fluoranthene - 1 - +Naphthalene 100 10 10 +Nonylphenol and Nonylphenol ethoxylates +(NP/NPEs) +- 1 1 +Octylphenols and octylphenol ethoxylates - 1 - +Organotin compounds (as total Sn) - 50 50 +Phenols (as total C) - 20 20 +Polycyclic Aromatic hydrocarbons (PAHs) 50 5 5 +Toluene - +200 (as +BTEX) +200 (as +BTEX) +Total Organic Carbon (TOC) (as total C or +COD/3) +- 50 000 - +Xylenes - +200 (as +BTEX) +200 (as +BTEX) + + +Inorganic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Asbestos 1 1 1 +Chlorides (as total Cl) - 2 million 2 million +Cyanides (as total CN) - 50 50 +Fluorides (as total F) - 2 000 2 000 +Particulate matter (PM10) 50 000 - - +Total Nitrogen - 50 000 50 000 +Total Phosphorus - 5 000 5 000 + + diff --git a/pdfbox/src/test/resources/input/eu-001.pdf.txt b/pdfbox/src/test/resources/input/eu-001.pdf.txt new file mode 100644 index 00000000000..d0d8dd796ff --- /dev/null +++ b/pdfbox/src/test/resources/input/eu-001.pdf.txt @@ -0,0 +1,199 @@ +E-PRTR pollutants and their thresholds + +A facility has to report data under E-PRTR if it fulfils the following criteria: +• the facility falls under at least one of the 65 E-PRTR economic activities. The +activities are also reported using a statistical classification of economic activities +(NACE rev 2) +• the facility has a capacity exceeding at least one of the E-PRTR capacity +thresholds +• the facility releases pollutants or transfers waste off-site which exceed specific +thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for +releases of pollutants are specified for each media - air, water and land - in Annex +II of the E-PRTR Regulation. + +In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken +down by the 7 groups used in all the searches of the E-PRTR website. + + +Greenhouse gases + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Carbon dioxide (CO2) 100 million - - +Hydro-fluorocarbons (HFCs) 100 - - +Methane (CH4) 100 000 - - +Nitrous oxide (N2O) 10 000 - - +Perfluorocarbons (PFCs) 100 - - +Sulphur hexafluoride (SF6) 50 - - + +Other gases + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Ammonia (NH3) 10 000 - - +Carbon monoxide (CO) 500 000 - - +Chlorine and inorganic compounds +(as HCl) 10 000 - - +Chlorofluorocarbons (CFCs) 1 - - +Flourine and inorganic compounds +(as HF) 5 000 - - +Halons 1 - - +Hydrochlorofluorocarbons (HCFCs) 1 - - +Hydrogen Cyanide (HCN) 200 - - +Nitrogen oxides (NOx/NO2) 100 000 - - +Non-methane volatile organic +compounds (NMVOC) 100 000 - - +Sulphur oxides (SOx/SO2) 150 000 - - + +Heavy metals + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Arsenic and compounds (as As) 20 5 5 +Cadmium and compounds (as Cd) 10 5 5 +Chromium and compounds (as Cr) 100 50 50 +Copper and compounds (as Cu) 100 50 50 +Lead and compounds (as Pb) 200 20 20 +Mercury and compounds (as Hg) 10 1 1 +Nickel and compounds (as Ni) 50 20 20 +Zinc and compounds (as Zn) 200 100 100 + +Pesticides + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +1,2,3,4,5,6- hexachlorocyclohexane +(HCH) 10 1 1 +Alachlor - 1 1 +Aldrin 1 1 1 +Atrazine - 1 1 +Chlordane 1 1 1 +Chlordecone 1 1 1 +Chlorfenvinphos - 1 1 +Chlorpyrifos - 1 1 +DDT 1 1 1 +Diuron - 1 1 +Endosulphan - 1 1 +Endrin 1 1 1 +Heptachlor 1 1 1 +Isodrin - 1 - +Isoproturon - 1 1 +Lindane 1 1 1 +Mirex 1 1 1 +Simazine - 1 1 +Toxaphene 1 1 1 +Tributylin and compounds - 1 1 +Trifluralin - 1 1 +Triphenyltin and compounds - 1 1 + +Chlorinated organic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +1,1,1-trichloroethane 100 - - +1,1,2,2-tetrachloroethane 50 - - +1,2-dichloroethane (EDC) 1 000 10 10 +Brominated diphenylethers (PBDE) - 1 1 +Chloro-alkanes, C10-C13 - 1 1 +Dichloromethane (DCM) 1 000 10 10 +Dieldrin 1 1 1 +Halogenated Organic Compounds (AOX) - 1 000 1 000 +Hexabromobifenyl 0,1 0,1 0,1 +Hexachlorobenzene (HCB) 10 1 1 +Hexachlorobutadiene (HCBD) - 1 1 +PCDD+PCFD (Dioxins+furans) (as Teq) 0,0001 0,0001 0,0001 +Pentachlorobenzene 1 1 1 +Pentachlorophenol (PCP) 10 1 1 +Polychlorinated biphenyls (PCB) 0,1 0,1 0,1 +Tetrachloroethylene (PER) 2 000 10 - +Tetrachloromethane (TCM) 100 1 - +Trichlorobenzenes (TCBs) (all isomers) 10 1 - +Trichloroethylene 2 000 10 - +Trichloromethane 500 10 - +Vynil chloride 1 000 10 10 + + +Other organic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Anthracene 50 1 1 +Benzene 1 000 200 (as +BTEX) +200 (as +BTEX) +Benzo(g,h,i)perylene - 1 - +Di-(2-ethyl hexyl) phthalate (DEHP) 10 1 1 +Ethyl benzene - 200 (as +BTEX) +200 (as +BTEX) +Ethylene oxide 1 000 10 10 +Fluoranthene - 1 - +Naphthalene 100 10 10 +Nonylphenol and Nonylphenol ethoxylates +(NP/NPEs) - 1 1 +Octylphenols and octylphenol ethoxylates - 1 - +Organotin compounds (as total Sn) - 50 50 +Phenols (as total C) - 20 20 +Polycyclic Aromatic hydrocarbons (PAHs) 50 5 5 +Toluene - 200 (as +BTEX) +200 (as +BTEX) +Total Organic Carbon (TOC) (as total C or +COD/3) - 50 000 - +Xylenes - 200 (as +BTEX) +200 (as +BTEX) + + +Inorganic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Asbestos 1 1 1 +Chlorides (as total Cl) - 2 million 2 million +Cyanides (as total CN) - 50 50 +Fluorides (as total F) - 2 000 2 000 +Particulate matter (PM10) 50 000 - - +Total Nitrogen - 50 000 50 000 +Total Phosphorus - 5 000 5 000 + + diff --git a/pdfbox/src/test/resources/input/merge/PDFA3A.pdf b/pdfbox/src/test/resources/input/merge/PDFA3A.pdf new file mode 100644 index 00000000000..96848f73583 Binary files /dev/null and b/pdfbox/src/test/resources/input/merge/PDFA3A.pdf differ diff --git a/pdfbox/src/test/resources/input/merge/PDFBOX-4417-001031.pdf b/pdfbox/src/test/resources/input/merge/PDFBOX-4417-001031.pdf new file mode 100644 index 00000000000..eea0d5e03e5 Binary files /dev/null and b/pdfbox/src/test/resources/input/merge/PDFBOX-4417-001031.pdf differ diff --git a/pdfbox/src/test/resources/input/merge/PDFBOX-4417-054080.pdf b/pdfbox/src/test/resources/input/merge/PDFBOX-4417-054080.pdf new file mode 100644 index 00000000000..3857c263336 Binary files /dev/null and b/pdfbox/src/test/resources/input/merge/PDFBOX-4417-054080.pdf differ diff --git a/pdfbox/src/test/resources/input/rendering/PDFBOX-4372-2DAYCLVOFG3FTVO4RMAJJL3VTPNYDFRO-p4_reduced.pdf b/pdfbox/src/test/resources/input/rendering/PDFBOX-4372-2DAYCLVOFG3FTVO4RMAJJL3VTPNYDFRO-p4_reduced.pdf new file mode 100644 index 00000000000..94b2b902fd1 Binary files /dev/null and b/pdfbox/src/test/resources/input/rendering/PDFBOX-4372-2DAYCLVOFG3FTVO4RMAJJL3VTPNYDFRO-p4_reduced.pdf differ diff --git a/pdfbox/src/test/resources/input/rendering/PDFBOX-4372-2DAYCLVOFG3FTVO4RMAJJL3VTPNYDFRO-p4_reduced.pdf-1.png b/pdfbox/src/test/resources/input/rendering/PDFBOX-4372-2DAYCLVOFG3FTVO4RMAJJL3VTPNYDFRO-p4_reduced.pdf-1.png new file mode 100644 index 00000000000..3ee44cdcc23 Binary files /dev/null and b/pdfbox/src/test/resources/input/rendering/PDFBOX-4372-2DAYCLVOFG3FTVO4RMAJJL3VTPNYDFRO-p4_reduced.pdf-1.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/encryption/AESkeylength128.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/encryption/AESkeylength128.pdf new file mode 100644 index 00000000000..973ae45e09b Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/encryption/AESkeylength128.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/encryption/AESkeylength256.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/encryption/AESkeylength256.pdf new file mode 100644 index 00000000000..cb11aae4523 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/encryption/AESkeylength256.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/encryption/PDFBOX-4421-keystore.pfx b/pdfbox/src/test/resources/org/apache/pdfbox/encryption/PDFBOX-4421-keystore.pfx new file mode 100644 index 00000000000..e926a346dd6 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/encryption/PDFBOX-4421-keystore.pfx differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/filter/PDFBOX-1777.bin b/pdfbox/src/test/resources/org/apache/pdfbox/filter/PDFBOX-1977.bin similarity index 100% rename from pdfbox/src/test/resources/org/apache/pdfbox/filter/PDFBOX-1777.bin rename to pdfbox/src/test/resources/org/apache/pdfbox/filter/PDFBOX-1977.bin diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentExportValues.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentExportValues.pdf new file mode 100644 index 00000000000..a3491b3eabc Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentExportValues.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentFieldType.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentFieldType.pdf new file mode 100644 index 00000000000..84c7f665514 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentFieldType.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentOptions.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentOptions.pdf new file mode 100644 index 00000000000..1bc2d2cdf17 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-DifferentOptions.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-SameNameNode.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-SameNameNode.pdf new file mode 100644 index 00000000000..e8b0994cf19 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-SameNameNode.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-TextFieldsOnly.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-TextFieldsOnly.pdf new file mode 100644 index 00000000000..e53f4227bc7 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge-TextFieldsOnly.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge.pdf new file mode 100644 index 00000000000..31e0dc57baa Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcroFormForMerge.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentExportValues-WasMaster.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentExportValues-WasMaster.pdf new file mode 100644 index 00000000000..2246846bf4d Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentExportValues-WasMaster.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentExportValues.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentExportValues.pdf new file mode 100644 index 00000000000..1e97c19b773 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentExportValues.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentFieldType-WasMaster.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentFieldType-WasMaster.pdf new file mode 100644 index 00000000000..c3a49ff87bb Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentFieldType-WasMaster.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentFieldType.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentFieldType.pdf new file mode 100644 index 00000000000..56ac2ab7255 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentFieldType.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentOptions-WasMaster.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentOptions-WasMaster.pdf new file mode 100644 index 00000000000..606736b4401 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentOptions-WasMaster.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentOptions.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentOptions.pdf new file mode 100644 index 00000000000..9171fa4807a Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-DifferentOptions.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-SameMerged.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-SameMerged.pdf new file mode 100644 index 00000000000..504fb218831 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-SameMerged.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-SameNameNode.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-SameNameNode.pdf new file mode 100644 index 00000000000..d2a7863543f Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-SameNameNode.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-TextFieldsOnly-SameMerged.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-TextFieldsOnly-SameMerged.pdf new file mode 100644 index 00000000000..e77efc69011 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/AcrobatMerge-TextFieldsOnly-SameMerged.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/PDFBoxLegacyMerge-SameMerged.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/PDFBoxLegacyMerge-SameMerged.pdf new file mode 100644 index 00000000000..546c1c99998 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/multipdf/PDFBoxLegacyMerge-SameMerged.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/SimpleForm2Fields.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/SimpleForm2Fields.pdf new file mode 100644 index 00000000000..800eee0aeb1 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/SimpleForm2Fields.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/embedded_zip.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/embedded_zip.pdf new file mode 100644 index 00000000000..2c6ea3a83c4 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/embedded_zip.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/nocatalog.fdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/nocatalog.fdf new file mode 100644 index 00000000000..d1b5ea03aaa --- /dev/null +++ b/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/nocatalog.fdf @@ -0,0 +1,8 @@ +%FDF-1.4 +% +1 0 obj +<><>]>>>> +endobj +trailer +<> +%%EOF diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/withcatalog.fdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/withcatalog.fdf new file mode 100644 index 00000000000..470765b7d17 --- /dev/null +++ b/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/withcatalog.fdf @@ -0,0 +1,8 @@ +%FDF-1.4 +% +1 0 obj +<><>]>>/Type/Catalog>> +endobj +trailer +<> +%%EOF diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/fdf/xfdf-test-document-annotations.xml b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/fdf/xfdf-test-document-annotations.xml index eb52d73ad76..51879b1ba84 100644 --- a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/fdf/xfdf-test-document-annotations.xml +++ b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/fdf/xfdf-test-document-annotations.xml @@ -65,5 +65,16 @@ + + + + +

    P&1 P&2 P&3

    + +
    + /Helvetica 12 Tf 0.842 0.424 0.000 rg +
    \ No newline at end of file diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/929316.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/929316.png new file mode 100644 index 00000000000..7a40577c08f Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/929316.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields-bigendian.tif b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields-bigendian.tif new file mode 100644 index 00000000000..3c23c59d4bb Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields-bigendian.tif differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields.tif b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields.tif new file mode 100644 index 00000000000..37f45af1fe1 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields.tif differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/gif-1bit-transparent.gif b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/gif-1bit-transparent.gif new file mode 100644 index 00000000000..756034fafaf Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/gif-1bit-transparent.gif differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/jpegcmyk.jpg b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/jpegcmyk.jpg new file mode 100644 index 00000000000..21af630b9dc Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/jpegcmyk.jpg differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_alpha_gray.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_alpha_gray.png new file mode 100644 index 00000000000..3d53e1d2776 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_alpha_gray.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_alpha_rgb.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_alpha_rgb.png new file mode 100644 index 00000000000..f40a279d515 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_alpha_rgb.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_gray.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_gray.png new file mode 100644 index 00000000000..2db7070439e Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_gray.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_gray_with_gama.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_gray_with_gama.png new file mode 100644 index 00000000000..c4084c3ad82 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_gray_with_gama.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed.png new file mode 100644 index 00000000000..e7e8add2a8e Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_1bit_alpha.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_1bit_alpha.png new file mode 100644 index 00000000000..de3e4f691dc Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_1bit_alpha.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_2bit_alpha.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_2bit_alpha.png new file mode 100644 index 00000000000..fd3c52c1c19 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_2bit_alpha.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_4bit_alpha.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_4bit_alpha.png new file mode 100644 index 00000000000..7a1e619ae85 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_4bit_alpha.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_8bit_alpha.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_8bit_alpha.png new file mode 100644 index 00000000000..9a476bd4407 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_indexed_8bit_alpha.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_rgb_gamma.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_rgb_gamma.png new file mode 100644 index 00000000000..0b4dab8f046 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_rgb_gamma.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_rgb_romm_16bit.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_rgb_romm_16bit.png new file mode 100644 index 00000000000..826b12ab629 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/png_rgb_romm_16bit.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf new file mode 100644 index 00000000000..0a41bc7cc1e Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf-1.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf-1.png new file mode 100644 index 00000000000..f243d96b005 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf-1.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf-2.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf-2.png new file mode 100644 index 00000000000..a7dace4189d Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AcroFormsRotation.pdf-2.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AlignmentTests-flattened-noRef.pdf-1.png b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AlignmentTests-flattened-noRef.pdf-1.png new file mode 100644 index 00000000000..f5cc79cb960 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/AlignmentTests-flattened-noRef.pdf-1.png differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacters.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacters.pdf new file mode 100644 index 00000000000..09ae7ccf26f Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacters.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/DifferentDALevels.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/DifferentDALevels.pdf new file mode 100644 index 00000000000..00d0ce0c36c Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/DifferentDALevels.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/PDFBOX-3835-input-acrobat-wrap.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/PDFBOX-3835-input-acrobat-wrap.pdf new file mode 100644 index 00000000000..d9aa1a91baa Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/PDFBOX-3835-input-acrobat-wrap.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/PDFBOX3812-acrobat-multiline-auto.pdf b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/PDFBOX3812-acrobat-multiline-auto.pdf new file mode 100644 index 00000000000..785a43934d0 Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/interactive/form/PDFBOX3812-acrobat-multiline-auto.pdf differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB Color Space Profile.icm b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB Color Space Profile.icm deleted file mode 100644 index 7f9d18d097d..00000000000 Binary files a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB Color Space Profile.icm and /dev/null differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB Color Space Profile.icm.LICENSE.txt b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB Color Space Profile.icm.LICENSE.txt deleted file mode 100644 index 9b817e33924..00000000000 --- a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB Color Space Profile.icm.LICENSE.txt +++ /dev/null @@ -1,14 +0,0 @@ -Obtained from: http://www.srgb.com/usingsrgb.html - -The file "sRGB Color Space Profile.icm" is: -Copyright (c) 1998 Hewlett-Packard Company - -To anyone who acknowledges that the file "sRGB Color Space Profile.icm" -is provided "AS IS" WITH NO EXPRESS OR IMPLIED WARRANTY: -permission to use, copy and distribute this file for any purpose is hereby -granted without fee, provided that the file is not changed including the HP -copyright notice tag, and that the name of Hewlett-Packard Company not be -used in advertising or publicity pertaining to distribution of the software -without specific, written prior permission. Hewlett-Packard Company makes -no representations about the suitability of this software for any purpose. - diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc new file mode 100644 index 00000000000..6f3efbf02db Binary files /dev/null and b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc differ diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.COPYING b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.COPYING new file mode 100644 index 00000000000..2c71be612da --- /dev/null +++ b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.COPYING @@ -0,0 +1,45 @@ +The profiles in the base directory are provided according to different licenses. + + +Group A +sRGB, LCMSLAB.ICM, LCMSXYZ.ICM, the compatibleWithAdobeRGB.icc and the +Gray.icc, CineonLog_M*.icc, CineLogCurve.icc profiles are all zlib licensed. +Even though it is highly recommended to rename them before editing. + + +Group B +The eciRGB*.icc profiles come with their license in license.rtf. + + +Group C +PhotoGamutRGB_avg6c.icc is licensed to be distributed freely. Modifications +are not allowed. + + + +Additionally all profiles come with the following disclaimer. The provided +ICC Profiles in the package are called DATA in the folling statement. + + + NO WARRANTY + + BECAUSE THE DATA IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE DATA, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE DATA "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE DATA IS WITH YOU. SHOULD THE +DATA PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE DATA AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE DATA (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE DATA TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.LICENSE-ZLIB b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.LICENSE-ZLIB new file mode 100644 index 00000000000..3b357a1d687 --- /dev/null +++ b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.LICENSE-ZLIB @@ -0,0 +1,23 @@ +The zlib/libpng License + +Copyright (c) 2008 Kai-Uwe Behrmann + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. + diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.README b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.README new file mode 100644 index 00000000000..7c29edcdacb --- /dev/null +++ b/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/sRGB.icc.README @@ -0,0 +1,12 @@ +Included are profiles created by Marti Maria (littleCMS) : + + # CIE*Lab + # CIE*XYZ + # sRGB + +Various contributors: + + # LStar-RGB from ColorSolutions + # Photogamut-RGB from the Photogamut workgroup + # Cineon and Gray from Kai-Uwe Behrmann + # compatibleWithAdobeRGB1998 from Graeme Gill diff --git a/pdfbox/src/test/resources/org/apache/pdfbox/ttf/LiberationSans-Regular.ttf b/pdfbox/src/test/resources/org/apache/pdfbox/ttf/LiberationSans-Regular.ttf deleted file mode 100644 index 626dd9364f1..00000000000 Binary files a/pdfbox/src/test/resources/org/apache/pdfbox/ttf/LiberationSans-Regular.ttf and /dev/null differ diff --git a/pom.xml b/pom.xml index 6c875c6dc3a..b49dd4af878 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT parent/pom.xml @@ -117,19 +117,13 @@ - + - - - - - - - + From: ${username}@apache.org To: dev@pdfbox.apache.org @@ -143,7 +137,7 @@ The release candidate is a zip archive of the sources in: http://svn.apache.org/repos/asf/pdfbox/tags/${project.version}/ -The SHA1 checksum of the archive is ${checksum}. +The SHA-512 checksum of the archive is ${checksum}. Please vote on releasing this package as Apache PDFBox ${project.version}. The vote is open for the next 72 hours and passes if a majority of at @@ -158,9 +152,9 @@ The release candidate has been prepared in: ${basedir}/target/${project.version} -Please deploy it to people.apache.org like this: +Please commit it to - scp -r ${basedir}/target/${project.version} people.apache.org:public_html/pdfbox/ + https://dist.apache.org/repos/dist/dev/pdfbox/${project.version}/ A release vote template has been generated for you: diff --git a/preflight-app/pom.xml b/preflight-app/pom.xml index cafd44b899f..f387ea2c222 100644 --- a/preflight-app/pom.xml +++ b/preflight-app/pom.xml @@ -23,7 +23,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml @@ -61,6 +61,11 @@ bcprov-jdk15on provided + + org.apache.pdfbox + jbig2-imageio + provided + @@ -73,9 +78,9 @@ org.apache.pdfbox.* true - *;scope=provided;inline=org/apache/**|org/bouncycastle/**|com/ibm/icu/** + *;scope=provided;inline=org/apache/**|org/bouncycastle/**|com/ibm/icu/**|META-INF/services/** ${project.url} - !junit.framework,!junit.textui,javax.*;resolution:=optional,org.apache.avalon.framework.logger;resolution:=optional,org.apache.log;resolution:=optional,org.apache.log4j;resolution:=optional,* + !junit.framework,!junit.textui,javax.*;resolution:=optional,org.apache.avalon.framework.logger;resolution:=optional,org.apache.log;resolution:=optional,* org.apache.pdfbox.preflight.Validator_A1b diff --git a/preflight/pom.xml b/preflight/pom.xml index 6bc2870b235..7e36f1547a5 100644 --- a/preflight/pom.xml +++ b/preflight/pom.xml @@ -26,7 +26,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml @@ -35,6 +35,26 @@ true + + + + [11,) + + + + javax.xml.bind + jaxb-api + provided + + + javax.activation + activation + provided + + + + + @@ -42,6 +62,7 @@ maven-surefire-plugin true + ${addmod} @@ -55,7 +76,7 @@ **/integration/** - -Xmx768m + ${addmod} @@ -66,6 +87,7 @@ ${skipITs} + ${addmod} **/integration/* @@ -106,7 +128,6 @@ com.googlecode.maven-download-plugin download-maven-plugin - 1.2.1 get-isartor @@ -115,10 +136,10 @@ wget - http://www.pdfa.org/wp-content/uploads/2011/08/isartor-pdfa-2008-08-13.zip + https://www.pdfa.org/wp-content/until2016_uploads/2011/08/isartor-pdfa-2008-08-13.zip true ${project.build.directory}/pdfs - 9f129c834bc6f9f8dabad4491c4c10ec + 66bf4ad470b36079c1e0ceca4438053f32649f964fb1de5cd88babce36c5afc0ba6fa7880bc1c9aac791df872cdfc8dc9851bfd3c75ae96786edd8fac61193ae @@ -129,44 +150,15 @@ ${skip-bavaria} - http://www.pdflib.com/fileadmin/pdflib/Bavaria/2009-04-03-Bavaria-pdfa.zip + https://web.archive.org/web/20160305185745if_/http://www.pdflib.com/fileadmin/pdflib/Bavaria/2009-04-03-Bavaria-pdfa.zip true ${project.build.directory}/pdfs - d8fccb2fea540ab49bef237f3579546b + a6efe70574dcde3628271fc1d7aa32cc00095334aa9415e5ebfb96cc20e0f79edd040c0290d5a76b4ced4c6a4343ba4af9567bf12eb7cfe3ec70f1a43202c231 - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - com.googlecode.maven-download-plugin - download-maven-plugin - [1.2.1,) - - wget - - - - - - - - - - - - @@ -189,11 +181,6 @@ junit junit - - log4j - log4j - test - @@ -207,15 +194,11 @@ bcprov-jdk15on true - - com.levigo.jbig2 - levigo-jbig2-imageio + org.apache.pdfbox + jbig2-imageio test - diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConfiguration.java b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConfiguration.java index 05da6c745a1..d36c6803641 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConfiguration.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConfiguration.java @@ -21,6 +21,7 @@ package org.apache.pdfbox.preflight; +import java.lang.reflect.InvocationTargetException; import java.util.Collection; import java.util.LinkedHashMap; import java.util.Map; @@ -113,6 +114,11 @@ public class PreflightConfiguration */ private ColorSpaceHelperFactory colorSpaceHelperFact; + /** + * Define the maximum number of errors. + */ + private int maxErrors = 10000; + public static PreflightConfiguration createPdfA1BConfiguration() { PreflightConfiguration configuration = new PreflightConfiguration(); @@ -182,7 +188,7 @@ else if (errorOnMissingProcess) try { - return clazz.newInstance(); + return clazz.getDeclaredConstructor().newInstance(); } catch (InstantiationException e) { @@ -192,6 +198,22 @@ else if (errorOnMissingProcess) { throw new ValidationException(processName + " can't be created", e); } + catch (NoSuchMethodException e) + { + throw new ValidationException(processName + " can't be created", e); + } + catch (SecurityException e) + { + throw new ValidationException(processName + " can't be created", e); + } + catch (IllegalArgumentException e) + { + throw new ValidationException(processName + " can't be created", e); + } + catch (InvocationTargetException e) + { + throw new ValidationException(processName + " can't be created", e); + } } public void replaceProcess(String processName, Class process) @@ -283,4 +305,23 @@ public void setColorSpaceHelperFact(ColorSpaceHelperFactory colorSpaceHelperFact this.colorSpaceHelperFact = colorSpaceHelperFact; } + /** + * Get the maximum number of errors after which to abort when possible. + * + * @return + */ + public int getMaxErrors() + { + return maxErrors; + } + + /** + * Set the maximum number of errors after which to abort when possible. + * + * @param maxErrors + */ + public void setMaxErrors(int maxErrors) + { + this.maxErrors = maxErrors; + } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java index d12dde32fe0..1ebf3f6d2d9 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java @@ -66,8 +66,8 @@ public interface PreflightConstants String OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER = "OutputConditionIdentifier"; String OUTPUT_INTENT_DICTIONARY_VALUE_OUTPUT_CONDITION_IDENTIFIER_CUSTOM = "Custom"; - String TRANPARENCY_DICTIONARY_KEY_EXTGSTATE = "ExtGState"; - String TRANPARENCY_DICTIONARY_KEY_EXTGSTATE_ENTRY_REGEX = "(GS|gs)([0-9])+"; + String TRANSPARENCY_DICTIONARY_KEY_EXTGSTATE = "ExtGState"; + String TRANSPARENCY_DICTIONARY_KEY_EXTGSTATE_ENTRY_REGEX = "(GS|gs)([0-9])+"; String TRANSPARENCY_DICTIONARY_KEY_BLEND_MODE = "BM"; String TRANSPARENCY_DICTIONARY_KEY_UPPER_CA = "CA"; @@ -321,7 +321,7 @@ public interface PreflightConstants */ String ERROR_SYNTAX_HEXA_STRING_TOO_LONG = "1.0.5"; /** - * The number is out of Range ( ex : greatter than 2^31-1) + * The number is out of Range ( ex : greater than 2^31-1) */ String ERROR_SYNTAX_NUMERIC_RANGE = "1.0.6"; /** @@ -388,7 +388,7 @@ public interface PreflightConstants */ String ERROR_SYNTAX_STREAM_LENGTH_INVALID = "1.2.5"; /** - * F or/and FFilter or/and FDecodeParams are present in a stream dictionary + * F or/and FFilter or/and FDecodeParms are present in a stream dictionary */ String ERROR_SYNTAX_STREAM_FX_KEYS = "1.2.6"; /** @@ -873,7 +873,7 @@ public interface PreflightConstants */ String ERROR_METADATA_FORMAT_XPACKET = "7.1.5"; /** - * Metadata mismatch between PDF Dictionnary and xmp + * Metadata mismatch between PDF Dictionary and xmp */ String ERROR_METADATA_MISMATCH = "7.2"; @@ -943,7 +943,7 @@ public interface PreflightConstants String ERROR_METADATA_CATEGORY_PROPERTY_INVALID = "7.5.1"; /** - * the infor dictionary is corrupt or value can't be read + * the info dictionary is corrupt or value can't be read */ String ERROR_METADATA_DICT_INFO_CORRUPT = "7.12"; /** diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java index a730cec5c1c..7ca4bc64264 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java @@ -23,13 +23,16 @@ import java.io.Closeable; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import javax.activation.DataSource; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.pdfparser.XrefTrailerResolver; +import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; import org.apache.pdfbox.preflight.font.container.FontContainer; import org.apache.pdfbox.preflight.graphic.ICCProfileWrapper; @@ -41,7 +44,7 @@ public class PreflightContext implements Closeable /** * Contains the list of font name embedded in the PDF document. */ - private final Map fontContainers = new HashMap(); + private final Map> fontContainers = new HashMap>(); /** * The PDFbox object representation of the PDF source. @@ -51,7 +54,7 @@ public class PreflightContext implements Closeable /** * The datasource to load the document from. Needed by StreamValidationProcess. */ - private DataSource source = null; + private DataSource dataSource = null; /** * Contains all Xref/trailer objects and resolves them into single object using startxref reference. @@ -77,21 +80,25 @@ public class PreflightContext implements Closeable private PreflightPath validationPath = new PreflightPath(); + private final Set processedSet = new HashSet(); + private Integer currentPageNumber = null; + private long fileLen; + /** * Create the DocumentHandler using the DataSource which represent the PDF file to check. * - * @param source + * @param dataSource */ - public PreflightContext(DataSource source) + public PreflightContext(DataSource dataSource) { - this.source = source; + this.dataSource = dataSource; } - public PreflightContext(DataSource source, PreflightConfiguration configuration) + public PreflightContext(DataSource dataSource, PreflightConfiguration configuration) { - this.source = source; + this.dataSource = dataSource; this.config = configuration; } @@ -146,12 +153,12 @@ public void setDocument(PreflightDocument document) */ public DataSource getSource() { - return source; + return dataSource; } public boolean isComplete() { - return (document != null) && (source != null); + return (document != null) && (dataSource != null); } /** @@ -160,7 +167,7 @@ public boolean isComplete() * @param cBase the COSBase for the font container. * @param fc the font container. */ - public void addFontContainer(COSBase cBase, FontContainer fc) + public void addFontContainer(COSBase cBase, FontContainer fc) { this.fontContainers.put(cBase, fc); } @@ -172,7 +179,7 @@ public void addFontContainer(COSBase cBase, FontContainer fc) * @param cBase the COSBase for the font container * @return the font container. */ - public FontContainer getFontContainer(COSBase cBase) + public FontContainer getFontContainer(COSBase cBase) { return this.fontContainers.get(cBase); } @@ -277,4 +284,34 @@ public Integer getCurrentPageNumber() return currentPageNumber; } + public void setFileLen(long fileLen) + { + this.fileLen = fileLen; + } + + public long getFileLen() + { + return fileLen; + } + + /** + * Add the argument to the set of processed elements, + * + * @param cos + */ + public void addToProcessedSet(COSObjectable cos) + { + processedSet.add(cos); + } + + /** + * Tell if the argument is in the set of processed elements. + * + * @param cos + * @return true if in the set, false if not. + */ + public boolean isInProcessedSet(COSObjectable cos) + { + return processedSet.contains(cos); + } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightDocument.java b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightDocument.java index 237fac50f28..8a2ccd7d4cf 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightDocument.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightDocument.java @@ -156,6 +156,9 @@ public void setContext(PreflightContext context) */ public void validate() throws ValidationException { + // force early class loading to check if people forgot to use --add-modules javax.xml.bind + // on java 9 & 10, or to add jaxb-api on java 11 and later + javax.xml.bind.DatatypeConverter.parseInt("0"); context.setConfig(config); Collection processes = config.getProcessNames(); for (String name : processes) @@ -164,6 +167,11 @@ public void validate() throws ValidationException } } + /** + * Returns the format which is used to validate the pdf document. + * + * @return the format used for validation + */ public Format getSpecification() { return specification; diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightPath.java b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightPath.java index dfffb924d4d..8df83330bd0 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightPath.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightPath.java @@ -24,9 +24,13 @@ import java.util.Stack; /** - * Contains a stack of objects to follow the validation path. Ex : - if the ValidationProcess computes a Type1Font - * object, this object should contains a path like PDPage|PDResources|PDFont. - if the ValidationProcess computes a - * XObject object, this object could contains a path like PDPage|PDResources|PDFontType3|PDResource|PDXObject. + * Contains a stack of objects to follow the validation path. Examples: + *
      + *
    • If the ValidationProcess computes a Type1Font, this object could contain a path like + * PDPage|PDResources|PDFont. + *
    • If the ValidationProcess computes an XObject, this object could contain a path like + * PDPage|PDResources|PDFontType3|PDResource|PDXObject. + *
    */ public class PreflightPath { diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/ValidationResult.java b/preflight/src/main/java/org/apache/pdfbox/preflight/ValidationResult.java index b95860aa2d7..231f645c1d2 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/ValidationResult.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/ValidationResult.java @@ -348,7 +348,7 @@ else if (errorCode.startsWith(PreflightConstants.ERROR_PDF_PROCESSING_MISSING)) } else { - // default Unkown error + // default Unknown error this.details = "Unknown error"; } t = new Exception(); @@ -369,7 +369,7 @@ public ValidationError(String errorCode, String details, Throwable cause) { StringBuilder sb = new StringBuilder(this.details.length() + details.length() + 2); sb.append(this.details).append(", ").append(details); - this.details = sb.toString(); + this.details = sb.length() > 400 ? sb.substring(0, 400) : sb.toString(); } this.cause = cause; t = new Exception(); diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java b/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java index 7cc43e237b7..fa8ffeae0c6 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java @@ -51,7 +51,7 @@ /** * This class is a simple main class used to check the validity of a pdf file. * - * Usage : java net.awl.edoc.pdfa.Validator <file path> + * Usage : java org.apache.pdfbox.preflight.Validator_A1b <file path> * * @author gbailleul * @@ -95,12 +95,14 @@ public static void main(String[] args) } else { + @SuppressWarnings({"squid:S4435"}) // self-created XML Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); XmlResultParser xrp = new XmlResultParser(); if (isGroup) { + @SuppressWarnings({"squid:S2755"}) // self-created XML Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); Element root = document.createElement("preflights"); document.appendChild(root); @@ -141,6 +143,7 @@ public static void main(String[] args) Element result = xrp.validate(new FileDataSource(args[posFile])); Document document = result.getOwnerDocument(); document.appendChild(result); + @SuppressWarnings({"squid:S4435"}) // self-created XML Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); @@ -150,7 +153,7 @@ public static void main(String[] args) } - private static void usage() throws IOException + private static void usage() { String version = Version.getVersion(); @@ -188,7 +191,7 @@ private static int runSimple(File file) throws IOException } else { - System.out.println("The file " + file.getName() + " is not valid, error(s) :"); + System.out.println("The file " + file.getName() + " is not a valid PDF/A-1b file, error(s) :"); for (ValidationError error : result.getErrorsList()) { System.out.print(error.getErrorCode() + " : " + error.getDetails()); @@ -215,16 +218,23 @@ private static List listFiles(String path) throws IOException if (f.isFile()) { FileReader fr = new FileReader(f); - BufferedReader buf = new BufferedReader(fr); - while (buf.ready()) + BufferedReader bufferedReader = null; + try { - File fn = new File(buf.readLine()); - if (fn.exists()) + bufferedReader = new BufferedReader(fr); + while (bufferedReader.ready()) { - files.add(fn); - } // else warn ? + File fn = new File(bufferedReader.readLine()); + if (fn.exists()) + { + files.add(fn); + } // else warn ? + } + } + finally + { + IOUtils.closeQuietly(bufferedReader); } - IOUtils.closeQuietly(buf); } else { diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/action/AbstractActionManager.java b/preflight/src/main/java/org/apache/pdfbox/preflight/action/AbstractActionManager.java index 4d344757fff..82fc9931617 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/action/AbstractActionManager.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/action/AbstractActionManager.java @@ -77,7 +77,7 @@ public boolean isAdditionalAction() } /** - * @return the actionDictionnary + * @return the actionDictionary */ public COSDictionary getActionDictionnary() { @@ -127,21 +127,21 @@ public boolean valid() throws ValidationException * Validate an Action dictionary. * * Return false if the dictionary is invalid (ex : missing key). If the ActionManager represents an - * AdditionalAction, this method returns false and updates the error list when the additonalActionAuth parameter is + * AdditionalAction, this method returns false and updates the error list when the additionalActionAuth parameter is * set to false. * * This method call the innerValid method to process specific checks according to the action type. * * If innerValid successes, all actions contained in the Next entry of the Action dictionary are validated. * - * @param additonalActionAuth + * @param additionalActionAuth * boolean to know if an additional action is authorized. * @return the validation state of the Action dictionary. * @throws ValidationException */ - public boolean valid(boolean additonalActionAuth) throws ValidationException + public boolean valid(boolean additionalActionAuth) throws ValidationException { - if (isAdditionalAction() && !additonalActionAuth) + if (isAdditionalAction() && !additionalActionAuth) { context.addValidationError(new ValidationError(ERROR_ACTION_FORBIDDEN_ADDITIONAL_ACTION, "Additional Action are forbidden")); diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/action/ActionManagerFactory.java b/preflight/src/main/java/org/apache/pdfbox/preflight/action/ActionManagerFactory.java index d640ecf3d1c..b8ea1c8944c 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/action/ActionManagerFactory.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/action/ActionManagerFactory.java @@ -134,9 +134,9 @@ private void callCreateAction(COSBase aDict, PreflightContext ctx, List constructor = clazz.getConstructor(PreflightContext.class, - COSDictionary.class); + Constructor constructor = + clazz.getDeclaredConstructor(PreflightContext.class, COSDictionary.class); result = constructor.newInstance(ctx, annotDic); result.setFactory(this); } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/annotation/TextAnnotationValidator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/annotation/TextAnnotationValidator.java index 388c165c8ea..30e7f0d0314 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/annotation/TextAnnotationValidator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/annotation/TextAnnotationValidator.java @@ -48,8 +48,9 @@ public TextAnnotationValidator(PreflightContext ctx, COSDictionary annotDictiona /* * (non-Javadoc) * - * @see net.awl.edoc.pdfa.validation.annotation.AnnotationValidator#checkFlags( java.util.List) + * @see org.apache.pdfbox.preflight.annotation.AnnotationValidator#checkFlags( java.util.List) */ + @Override protected boolean checkFlags() { // call common flags settings diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java b/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java index a1f00feb87c..e8c8460f626 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java @@ -26,6 +26,7 @@ import java.io.InputStream; import java.util.List; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSFloat; @@ -143,7 +144,7 @@ protected void processOperator(Operator operator, List operands) throws /* * Process Specific Validation. The Generic Processing is useless for PDF/A validation */ - if ("BI".equals(operator.getName())) + if (OperatorName.BEGIN_INLINE_IMAGE.equals(operator.getName())) { validateInlineImageFilter(operator); validateInlineImageColorSpace(operator); @@ -154,6 +155,7 @@ protected void processOperator(Operator operator, List operands) throws validateRenderingIntent(operator, operands); checkSetColorSpaceOperators(operator, operands); validateNumberOfGraphicStates(operator); + validateDefaultColorSpace(operator); } @Override @@ -164,10 +166,9 @@ protected void unsupportedOperator(Operator operator, List arguments) } /** - * Process Text Validation. According to the operator one of the both method will be called. - * (validStringDefinition(PDFOperator operator, List arguments) / validStringArray(PDFOperator operator, List - * arguments)) - * + * Process Text Validation. Depending on the operator parameter, this will either call + * validateStringDefinition or validateStringArray. + * * @param operator * @param arguments * @throws IOException @@ -175,12 +176,13 @@ protected void unsupportedOperator(Operator operator, List arguments) protected void checkShowTextOperators(Operator operator, List arguments) throws IOException { String op = operator.getName(); - if ("Tj".equals(op) || "'".equals(op) || "\"".equals(op)) + if (OperatorName.SHOW_TEXT.equals(op) || OperatorName.SHOW_TEXT_LINE.equals(op) + || OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(op)) { validateStringDefinition(operator, arguments); } - if ("TJ".equals(op)) + if (OperatorName.SHOW_TEXT_ADJUSTED.equals(op)) { validateStringArray(operator, arguments); } @@ -201,7 +203,7 @@ private void validateStringDefinition(Operator operator, List arguments) thro /* * For a Text operator, the arguments list should contain only one COSString object */ - if ("\"".equals(operator.getName())) + if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(operator.getName())) { if (arguments.size() != 3) { @@ -300,7 +302,7 @@ public void validateText(byte[] string) throws IOException return; } - FontContainer fontContainer = context.getFontContainer(font.getCOSObject()); + FontContainer fontContainer = context.getFontContainer(font.getCOSObject()); if (renderingMode == RenderingMode.NEITHER && (fontContainer == null || !fontContainer.isEmbeddedFont())) { // font not embedded and rendering mode is 3. Valid case and nothing to check diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightStreamEngine.java b/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightStreamEngine.java index 6b3041ec5db..a7b119722aa 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightStreamEngine.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightStreamEngine.java @@ -21,6 +21,7 @@ package org.apache.pdfbox.preflight.content; +import java.awt.color.ColorSpace; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_INVALID_COLOR_SPACE_CMYK; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_INVALID_COLOR_SPACE_MISSING; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_INVALID_COLOR_SPACE_RGB; @@ -28,7 +29,6 @@ import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_GRAPHIC_STATES; -import java.awt.color.ICC_ColorSpace; import java.io.IOException; import java.util.List; @@ -56,6 +56,7 @@ import org.apache.pdfbox.preflight.utils.RenderingIntents; import org.apache.pdfbox.contentstream.operator.DrawObject; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.contentstream.PDFStreamEngine; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN; import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN; @@ -91,6 +92,8 @@ import org.apache.pdfbox.contentstream.operator.text.SetTextRise; import org.apache.pdfbox.contentstream.operator.text.SetWordSpacing; import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; /** * This class inherits from org.apache.pdfbox.util.PDFStreamEngine to allow the validation of specific rules in @@ -162,58 +165,58 @@ public PreflightStreamEngine(PreflightContext context, PDPage page) * Do not use the PDFBox Operator, because of the PageDrawer class cast Or because the Operator doesn't exist */ - addOperator(new StubOperator("l")); - addOperator(new StubOperator("re")); - addOperator(new StubOperator("c")); - addOperator(new StubOperator("y")); - addOperator(new StubOperator("v")); - addOperator(new StubOperator("n")); - addOperator(new StubOperator("BI")); - addOperator(new StubOperator("ID")); - addOperator(new StubOperator("EI")); - addOperator(new StubOperator("m")); - addOperator(new StubOperator("W*")); - addOperator(new StubOperator("W")); - addOperator(new StubOperator("h")); - - addOperator(new StubOperator("Tj")); - addOperator(new StubOperator("TJ")); - addOperator(new StubOperator("'")); - addOperator(new StubOperator("\"")); - - addOperator(new StubOperator("b")); - addOperator(new StubOperator("B")); - addOperator(new StubOperator("b*")); - addOperator(new StubOperator("B*")); - - addOperator(new StubOperator("BDC")); - addOperator(new StubOperator("BMC")); - addOperator(new StubOperator("DP")); - addOperator(new StubOperator("EMC")); - addOperator(new StubOperator("BX")); - addOperator(new StubOperator("EX")); - - addOperator(new StubOperator("d0")); - addOperator(new StubOperator("d1")); - - addOperator(new StubOperator("f")); - addOperator(new StubOperator("F")); - addOperator(new StubOperator("f*")); - - addOperator(new StubOperator("M")); - addOperator(new StubOperator("MP")); - - addOperator(new StubOperator("i")); - - addOperator(new StubOperator("ri")); - addOperator(new StubOperator("s")); - addOperator(new StubOperator("S")); - addOperator(new StubOperator("sh")); + addOperator(new StubOperator(OperatorName.LINE_TO)); + addOperator(new StubOperator(OperatorName.APPEND_RECT)); + addOperator(new StubOperator(OperatorName.CURVE_TO)); + addOperator(new StubOperator(OperatorName.CURVE_TO_REPLICATE_FINAL_POINT)); + addOperator(new StubOperator(OperatorName.CURVE_TO_REPLICATE_INITIAL_POINT)); + addOperator(new StubOperator(OperatorName.ENDPATH)); + addOperator(new StubOperator(OperatorName.BEGIN_INLINE_IMAGE)); + addOperator(new StubOperator(OperatorName.BEGIN_INLINE_IMAGE_DATA)); + addOperator(new StubOperator(OperatorName.END_INLINE_IMAGE)); + addOperator(new StubOperator(OperatorName.MOVE_TO)); + addOperator(new StubOperator(OperatorName.CLIP_EVEN_ODD)); + addOperator(new StubOperator(OperatorName.CLIP_NON_ZERO)); + addOperator(new StubOperator(OperatorName.CLOSE_PATH)); + + addOperator(new StubOperator(OperatorName.SHOW_TEXT)); + addOperator(new StubOperator(OperatorName.SHOW_TEXT_ADJUSTED)); + addOperator(new StubOperator(OperatorName.SHOW_TEXT_LINE)); + addOperator(new StubOperator(OperatorName.SHOW_TEXT_LINE_AND_SPACE)); + + addOperator(new StubOperator(OperatorName.CLOSE_FILL_NON_ZERO_AND_STROKE)); + addOperator(new StubOperator(OperatorName.FILL_NON_ZERO_AND_STROKE)); + addOperator(new StubOperator(OperatorName.CLOSE_FILL_EVEN_ODD_AND_STROKE)); + addOperator(new StubOperator(OperatorName.FILL_EVEN_ODD_AND_STROKE)); + + addOperator(new StubOperator(OperatorName.BEGIN_MARKED_CONTENT_SEQ)); + addOperator(new StubOperator(OperatorName.BEGIN_MARKED_CONTENT)); + addOperator(new StubOperator(OperatorName.MARKED_CONTENT_POINT_WITH_PROPS)); + addOperator(new StubOperator(OperatorName.END_MARKED_CONTENT)); + addOperator(new StubOperator(OperatorName.BEGIN_COMPATIBILITY_SECTION)); + addOperator(new StubOperator(OperatorName.END_COMPATIBILITY_SECTION)); + + addOperator(new StubOperator(OperatorName.TYPE3_D0)); + addOperator(new StubOperator(OperatorName.TYPE3_D1)); + + addOperator(new StubOperator(OperatorName.FILL_NON_ZERO)); + addOperator(new StubOperator(OperatorName.LEGACY_FILL_NON_ZERO)); + addOperator(new StubOperator(OperatorName.FILL_EVEN_ODD)); + + addOperator(new StubOperator(OperatorName.SET_LINE_MITERLIMIT)); + addOperator(new StubOperator(OperatorName.MARKED_CONTENT_POINT)); + + addOperator(new StubOperator(OperatorName.SET_FLATNESS)); + + addOperator(new StubOperator(OperatorName.SET_RENDERINGINTENT)); + addOperator(new StubOperator(OperatorName.CLOSE_AND_STROKE)); + addOperator(new StubOperator(OperatorName.STROKE_PATH)); + addOperator(new StubOperator(OperatorName.SHADING_FILL)); } /** * Check operands of the "ri" operator. Operands must exist in the RenderingIntent list. - * (net.awl.edoc.pdfa.validation.utils.RenderingIntents) + * (org.apache.pdfbox.preflight.utils.RenderingIntents) * * @param operator * the "ri" operator @@ -222,19 +225,15 @@ public PreflightStreamEngine(PreflightContext context, PDPage page) * @throws ContentStreamException * ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY if the operand is invalid */ - protected void validateRenderingIntent(Operator operator, List arguments) throws ContentStreamException + protected void validateRenderingIntent(Operator operator, List arguments) throws ContentStreamException { - if ("ri".equals(operator.getName())) + if (OperatorName.SET_RENDERINGINTENT.equals(operator.getName())) { String riArgument0 = ""; if (arguments.get(0) instanceof COSName) { riArgument0 = ((COSName) arguments.get(0)).getName(); } - else if (arguments.get(0) instanceof String) - { - riArgument0 = (String) arguments.get(0); - } if (!RenderingIntents.contains(riArgument0)) { @@ -252,7 +251,7 @@ else if (arguments.get(0) instanceof String) */ protected void validateNumberOfGraphicStates(Operator operator) throws ContentStreamException { - if ("q".equals(operator.getName())) + if (OperatorName.SAVE.equals(operator.getName())) { int numberOfGraphicStates = this.getGraphicsStackSize(); if (numberOfGraphicStates > MAX_GRAPHIC_STATES) @@ -384,23 +383,31 @@ protected void checkColorOperators(String operation) throws ContentStreamExcepti { PDColorSpace cs = getColorSpace(operation); - if (("rg".equals(operation) || "RG".equals(operation)) + if ((OperatorName.NON_STROKING_RGB.equals(operation) + || OperatorName.STROKING_COLOR_RGB.equals(operation)) && !validColorSpace(cs, ColorSpaceType.RGB)) { registerError("The operator \"" + operation + "\" can't be used with CMYK Profile", ERROR_GRAPHIC_INVALID_COLOR_SPACE_RGB); return; } - if (("k".equals(operation) || "K".equals(operation)) + if ((OperatorName.NON_STROKING_CMYK.equals(operation) + || OperatorName.STROKING_COLOR_CMYK.equals(operation)) && !validColorSpace(cs, ColorSpaceType.CMYK)) { registerError("The operator \"" + operation + "\" can't be used with RGB Profile", ERROR_GRAPHIC_INVALID_COLOR_SPACE_CMYK); return; } - if (("g".equals(operation) || "G".equals(operation) - || "f".equals(operation) || "F".equals(operation) || "f*".equals(operation) - || "B".equals(operation) || "B*".equals(operation) || "b".equals(operation) || "b*".equals(operation)) + if ((OperatorName.NON_STROKING_GRAY.equals(operation) + || OperatorName.STROKING_COLOR_GRAY.equals(operation) + || OperatorName.FILL_NON_ZERO.equals(operation) + || OperatorName.LEGACY_FILL_NON_ZERO.equals(operation) + || OperatorName.FILL_EVEN_ODD.equals(operation) + || OperatorName.FILL_NON_ZERO_AND_STROKE.equals(operation) + || OperatorName.FILL_EVEN_ODD_AND_STROKE.equals(operation) + || OperatorName.CLOSE_FILL_NON_ZERO_AND_STROKE.equals(operation) + || OperatorName.CLOSE_FILL_EVEN_ODD_AND_STROKE.equals(operation)) && !validColorSpace(cs, ColorSpaceType.ALL)) { registerError("The operator \"" + operation + "\" can't be used without Color Profile", @@ -408,6 +415,64 @@ protected void checkColorOperators(String operation) throws ContentStreamExcepti } } + /** + * In some cases, the colorspace isn't checked because defaults (/DeviceGray) is used. Thus we + * need to check all text output, stroke and fill for /DeviceGray. + * + * @param operator an operator. + * @throws ContentStreamException + */ + void validateDefaultColorSpace(Operator operator) throws ContentStreamException + { + boolean v = false; + String op = operator.getName(); + if (OperatorName.SHOW_TEXT.equals(op) || OperatorName.SHOW_TEXT_ADJUSTED.equals(op) + || OperatorName.SHOW_TEXT_LINE.equals(op) + || OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(op)) + { + RenderingMode rm = getGraphicsState().getTextState().getRenderingMode(); + if (rm.isFill() && + getGraphicsState().getNonStrokingColor().getColorSpace() instanceof PDDeviceGray) + { + v = true; + } + if (rm.isStroke() && + getGraphicsState().getStrokingColor().getColorSpace() instanceof PDDeviceGray) + { + v = true; + } + } + // fills + if ((OperatorName.FILL_NON_ZERO.equals(op) || OperatorName.LEGACY_FILL_NON_ZERO.equals(op) + || OperatorName.FILL_EVEN_ODD.equals(op) + || OperatorName.FILL_NON_ZERO_AND_STROKE.equals(op) + || OperatorName.FILL_EVEN_ODD_AND_STROKE.equals(op) + || OperatorName.CLOSE_FILL_NON_ZERO_AND_STROKE.equals(op) + || OperatorName.CLOSE_FILL_EVEN_ODD_AND_STROKE.equals(op)) + && + getGraphicsState().getNonStrokingColor().getColorSpace() instanceof PDDeviceGray) + { + v = true; + } + // strokes + if ((OperatorName.FILL_NON_ZERO_AND_STROKE.equals(op) + || OperatorName.FILL_EVEN_ODD_AND_STROKE.equals(op) + || OperatorName.CLOSE_FILL_NON_ZERO_AND_STROKE.equals(op) + || OperatorName.CLOSE_FILL_EVEN_ODD_AND_STROKE.equals(op) + || OperatorName.CLOSE_AND_STROKE.equals(op) || OperatorName.STROKE_PATH.equals(op)) + && + getGraphicsState().getStrokingColor().getColorSpace() instanceof PDDeviceGray) + { + v = true; + } + if (v && !validColorSpaceDestOutputProfile(PreflightStreamEngine.ColorSpaceType.ALL)) + { + registerError("/DeviceGray default for operator \"" + op + + "\" can't be used without Color Profile", + ERROR_GRAPHIC_INVALID_COLOR_SPACE_MISSING); + } + } + private boolean validColorSpace(PDColorSpace colorSpace, ColorSpaceType expectedIccType) throws ContentStreamException { @@ -471,8 +536,8 @@ private boolean isDeviceIndependent(PDColorSpace cs, ColorSpaceType expectedIccT int type = ((PDICCBased)cs).getColorSpaceType(); switch (expectedIccType) { - case RGB: return type == ICC_ColorSpace.TYPE_RGB; - case CMYK: return type == ICC_ColorSpace.TYPE_CMYK; + case RGB: return type == ColorSpace.TYPE_RGB; + case CMYK: return type == ColorSpace.TYPE_CMYK; default: return true; } } @@ -518,19 +583,16 @@ private PDColorSpace getColorSpace(String operation) * @param arguments * @throws IOException */ - protected void checkSetColorSpaceOperators(Operator operator, List arguments) throws IOException + protected void checkSetColorSpaceOperators(Operator operator, List arguments) throws IOException { - if (!("CS".equals(operator.getName()) || "cs".equals(operator.getName()))) + if (!OperatorName.STROKING_COLORSPACE.equals(operator.getName()) + && !OperatorName.NON_STROKING_COLORSPACE.equals(operator.getName())) { return; } String colorSpaceName; - if (arguments.get(0) instanceof String) - { - colorSpaceName = (String) arguments.get(0); - } - else if (arguments.get(0) instanceof COSString) + if (arguments.get(0) instanceof COSString) { colorSpaceName = (arguments.get(0)).toString(); } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/content/StubOperator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/content/StubOperator.java index 8242d9e27f4..60b3d08765f 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/content/StubOperator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/content/StubOperator.java @@ -35,6 +35,7 @@ import static org.apache.pdfbox.preflight.PreflightConstants.MAX_STRING_LENGTH; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.apache.pdfbox.cos.COSArray; @@ -45,6 +46,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.contentstream.operator.OperatorProcessor; /** @@ -55,6 +57,39 @@ public class StubOperator extends OperatorProcessor { private final String name; + private static final List CHECK_NO_OPERANDS = Arrays.asList( // + OperatorName.STROKE_PATH, OperatorName.FILL_NON_ZERO, OperatorName.LEGACY_FILL_NON_ZERO, + OperatorName.FILL_EVEN_ODD, OperatorName.FILL_NON_ZERO_AND_STROKE, + OperatorName.FILL_EVEN_ODD_AND_STROKE, OperatorName.CLOSE_FILL_NON_ZERO_AND_STROKE, + OperatorName.CLOSE_FILL_EVEN_ODD_AND_STROKE, OperatorName.CLOSE_AND_STROKE, + OperatorName.END_MARKED_CONTENT, OperatorName.CLOSE_PATH, OperatorName.CLIP_NON_ZERO, + OperatorName.CLIP_EVEN_ODD, OperatorName.ENDPATH); + + private static final List CHECK_STRING_OPERANDS = Arrays.asList( // + OperatorName.BEGIN_MARKED_CONTENT, OperatorName.SET_GRAPHICS_STATE_PARAMS, + OperatorName.SET_RENDERINGINTENT, OperatorName.SHADING_FILL, OperatorName.SHOW_TEXT, + OperatorName.SHOW_TEXT_LINE, OperatorName.MARKED_CONTENT_POINT); + + private static final List CHECK_TAG_AND_PROPERTY_OPERANDS = Arrays.asList( // + OperatorName.BEGIN_MARKED_CONTENT_SEQ, OperatorName.MARKED_CONTENT_POINT_WITH_PROPS); + + private static final List CHECK_NUMBER_OPERANDS_6 = Arrays.asList( // + OperatorName.CURVE_TO, OperatorName.TYPE3_D1); + + private static final List CHECK_NUMBER_OPERANDS_4 = Arrays.asList( // + OperatorName.CURVE_TO_REPLICATE_FINAL_POINT, + OperatorName.CURVE_TO_REPLICATE_INITIAL_POINT, OperatorName.APPEND_RECT); + + private static final List CHECK_NUMBER_OPERANDS_2 = Arrays.asList( // + OperatorName.MOVE_TO, OperatorName.LINE_TO, OperatorName.TYPE3_D0); + + private static final List CHECK_NUMBER_OPERANDS = Arrays.asList( // + OperatorName.NON_STROKING_GRAY, OperatorName.STROKING_COLOR_GRAY, + OperatorName.SET_FLATNESS, OperatorName.SET_LINE_MITERLIMIT); + + private static final List CHECK_ARRAY_OPERANDS = Arrays.asList( // + OperatorName.SHOW_TEXT_ADJUSTED); + public StubOperator(String name) { this.name = name; @@ -69,168 +104,46 @@ public StubOperator(String name) @Override public void process(Operator operator, List arguments) throws IOException { - String op = operator.getName(); - if ("S".equals(op)) - { - checkNoOperands(arguments); - } - else if ("B".equals(op)) - { - checkNoOperands(arguments); - } - else if ("f".equals(op)) - { - checkNoOperands(arguments); - } - else if ("F".equals(op)) - { - checkNoOperands(arguments); - } - else if ("f*".equals(op)) - { - checkNoOperands(arguments); - } - else if ("b".equals(op)) - { - checkNoOperands(arguments); - } - else if ("B*".equals(op)) + String opName = operator.getName(); + if (CHECK_NO_OPERANDS.contains(opName)) { checkNoOperands(arguments); } - else if ("b*".equals(op)) - { - checkNoOperands(arguments); - } - else if ("s".equals(op)) - { - checkNoOperands(arguments); - } - else if ("EMC".equals(op)) - { - checkNoOperands(arguments); - } - else if ("BMC".equals(op)) + else if (CHECK_STRING_OPERANDS.contains(opName)) { checkStringOperands(arguments, 1); } - else if ("BDC".equals(op)) - { - checkTagAndPropertyOperands(arguments); - } - else if ("DP".equals(op)) + else if (CHECK_TAG_AND_PROPERTY_OPERANDS.contains(opName)) { checkTagAndPropertyOperands(arguments); } - else if ("c".equals(op)) + else if (CHECK_NUMBER_OPERANDS_6.contains(opName)) { checkNumberOperands(arguments, 6); } - else if ("v".equals(op)) + else if (CHECK_NUMBER_OPERANDS_4.contains(opName)) { checkNumberOperands(arguments, 4); } - else if ("y".equals(op)) - { - checkNumberOperands(arguments, 4); - } - else if ("d0".equals(op)) + else if (CHECK_NUMBER_OPERANDS_2.contains(opName)) { checkNumberOperands(arguments, 2); } - else if ("d1".equals(op)) - { - checkNumberOperands(arguments, 6); - } - else if ("g".equals(op)) - { - checkNumberOperands(arguments, 1); - } - else if ("G".equals(op)) + else if (CHECK_NUMBER_OPERANDS.contains(opName)) { checkNumberOperands(arguments, 1); } - else if ("gs".equals(op)) - { - checkStringOperands(arguments, 1); - } - else if ("h".equals(op)) - { - checkNoOperands(arguments); - } - else if ("i".equals(op)) - { - checkNumberOperands(arguments, 1); - } - else if ("l".equals(op)) - { - checkNumberOperands(arguments, 2); - } - else if ("m".equals(op)) - { - checkNumberOperands(arguments, 2); - } - else if ("M".equals(op)) - { - checkNumberOperands(arguments, 1); - } - else if ("MP".equals(op)) - { - checkStringOperands(arguments, 1); - } - else if ("n".equals(op)) - { - checkNoOperands(arguments); - } - else if ("re".equals(op)) - { - checkNumberOperands(arguments, 4); - } - else if ("ri".equals(op)) - { - checkStringOperands(arguments, 1); - } - else if ("s".equals(op)) - { - checkNoOperands(arguments); - } - else if ("S".equals(op)) - { - checkNoOperands(arguments); - } - else if ("sh".equals(op)) - { - checkStringOperands(arguments, 1); - } - else if ("'".equals(op)) - { - checkStringOperands(arguments, 1); - } - else if ("Tj".equals(op)) - { - checkStringOperands(arguments, 1); - } - else if ("TJ".equals(op)) + else if (CHECK_ARRAY_OPERANDS.contains(opName)) { checkArrayOperands(arguments, 1); } - else if ("W".equals(op)) - { - checkNoOperands(arguments); - } - else if ("W*".equals(op)) - { - checkNoOperands(arguments); - } - else if ("\"".equals(op)) + else if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(opName)) { checkNumberOperands(arguments.subList(0, 2), 2); checkStringOperands(arguments.subList(2, arguments.size()), 1); } - // else // ---- Some operators are processed by PDFBox Objects. - // ---- Other operators are authorized but it isn't used. - + // ---- Other operators are authorized but not used. } /** @@ -328,9 +241,8 @@ private void checkNumberOperands(List arguments, int length) throws Con throw createInvalidArgumentsError(); } - for (int i = 0; i < length; ++i) + for (COSBase arg : arguments) { - COSBase arg = arguments.get(i); if (!(arg instanceof COSFloat) && !(arg instanceof COSInteger)) { throw createInvalidArgumentsError(); @@ -343,7 +255,8 @@ private void checkNumberOperands(List arguments, int length) throws Con } if (arg instanceof COSFloat - && (((COSFloat) arg).doubleValue() > MAX_POSITIVE_FLOAT || ((COSFloat) arg).doubleValue() < MAX_NEGATIVE_FLOAT)) + && (((COSFloat) arg).floatValue() > MAX_POSITIVE_FLOAT + || ((COSFloat) arg).floatValue() < MAX_NEGATIVE_FLOAT)) { throw createLimitError(ERROR_SYNTAX_NUMERIC_RANGE, "Invalid float range in a Number operand"); } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/font/TrueTypeFontValidator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/font/TrueTypeFontValidator.java index 9ad9066f108..24fd51a21ec 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/font/TrueTypeFontValidator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/font/TrueTypeFontValidator.java @@ -23,7 +23,6 @@ import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_FONTS_ENCODING; -import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.font.encoding.Encoding; import org.apache.pdfbox.pdmodel.font.encoding.MacRomanEncoding; @@ -73,7 +72,7 @@ protected void checkEncoding() * For symbolic font, no encoding entry is allowed and only one encoding entry is expected into the FontFile * CMap (Check latter when the FontFile stream will be checked) */ - if (fd.isSymbolic() && ((COSDictionary) fontDictionary).getItem(COSName.ENCODING) != null) + if (fd.isSymbolic() && fontDictionary.getItem(COSName.ENCODING) != null) { this.fontContainer.push(new ValidationError(ERROR_FONTS_ENCODING, fd.getFontName() + ": The Encoding should be missing for the Symbolic TTF")); diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type0FontValidator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type0FontValidator.java index 8cf58bea6cb..ff1ea0c1fab 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type0FontValidator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type0FontValidator.java @@ -48,6 +48,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.font.PDCIDFont; import org.apache.pdfbox.pdmodel.font.PDCIDFontType0; import org.apache.pdfbox.pdmodel.font.PDCIDFontType2; import org.apache.pdfbox.pdmodel.font.PDFont; @@ -133,7 +134,7 @@ protected void processDescendantFont() throws ValidationException return; } - FontValidator cidFontValidator = createDescendantValidator(cidFont); + FontValidator> cidFontValidator = createDescendantValidator(cidFont); if (cidFontValidator != null) { this.fontContainer.setDelegateFontContainer(cidFontValidator.getFontContainer()); @@ -141,10 +142,10 @@ protected void processDescendantFont() throws ValidationException } } - protected FontValidator createDescendantValidator(COSDictionary cidFont) + protected FontValidator> createDescendantValidator(COSDictionary cidFont) { String subtype = cidFont.getNameAsString(COSName.SUBTYPE); - FontValidator cidFontValidator = null; + FontValidator> cidFontValidator = null; if (FONT_DICTIONARY_VALUE_TYPE0.equals(subtype)) { cidFontValidator = createCIDType0FontValidator(cidFont); @@ -164,7 +165,7 @@ else if (FONT_DICTIONARY_VALUE_TYPE2.equals(subtype)) /** * Create the validation object for CIDType0 Font */ - protected FontValidator createCIDType0FontValidator(COSDictionary fDict) + protected FontValidator> createCIDType0FontValidator(COSDictionary fDict) { try { @@ -184,7 +185,7 @@ protected FontValidator createCIDType0FontValidator(COS * @param fDict a CIDType2 font dictionary. * @return a CIDType2 tont font validator. */ - protected FontValidator createCIDType2FontValidator(COSDictionary fDict) + protected FontValidator> createCIDType2FontValidator(COSDictionary fDict) { try { @@ -259,9 +260,9 @@ private void processCMapAsStream(COSStream aCMap) InputStream cmapStream = null; try { - // extract information from the CMap stream + // extract information from the CMap stream using strict mode cmapStream = aCMap.createInputStream(); - CMap fontboxCMap = new CMapParser().parse(cmapStream); + CMap fontboxCMap = new CMapParser(true).parse(cmapStream); int wmValue = fontboxCMap.getWMode(); String cmnValue = fontboxCMap.getName(); diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type1FontValidator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type1FontValidator.java index a89e444a91c..6d19ebf2c4e 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type1FontValidator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type1FontValidator.java @@ -43,7 +43,7 @@ public class Type1FontValidator extends SimpleFontValidator { public Type1FontValidator(PreflightContext context, PDSimpleFont font) { - super(context, font, font.getCOSObject(), new Type1Container((PDSimpleFont)font)); + super(context, font, font.getCOSObject(), new Type1Container(font)); } @Override diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type3FontValidator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type3FontValidator.java index 4a7b74895df..16be1945ea2 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type3FontValidator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/font/Type3FontValidator.java @@ -283,13 +283,13 @@ private void checkEncodingAsDictionary(COSDictionary encodingDictionary) * * For each character, the Glyph width must be the same as the Width value declared in the Widths array. */ - private void checkCharProcsAndMetrics() throws ValidationException + private void checkCharProcsAndMetrics() { List widths = getWidths(font); if (widths == null || widths.isEmpty()) { this.fontContainer.push(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, - font.getName() + ": The Witdhs array is unreachable")); + font.getName() + ": The Widths array is unreachable")); return; } @@ -313,7 +313,7 @@ private void checkCharProcsAndMetrics() throws ValidationException if (widths.size() != expectedLength) { this.fontContainer.push(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, - font.getName() + ": The length of Witdhs array is invalid. Expected : \"" + expectedLength + "\" Current : \"" + font.getName() + ": The length of Widths array is invalid. Expected : \"" + expectedLength + "\" Current : \"" + widths.size() + "\"")); return; } @@ -380,7 +380,7 @@ public List getWidths(PDFont font) return widths; } - private PDType3CharProc getCharProc(int code) throws ValidationException + private PDType3CharProc getCharProc(int code) { PDType3CharProc charProc = font.getCharProc(code); if (charProc == null) @@ -444,7 +444,7 @@ private void checkResources() throws ValidationException try { PDFont aFont = PDFontFactory.createFont(xObjFont); - FontContainer aContainer = this.context.getFontContainer(aFont.getCOSObject()); + FontContainer aContainer = this.context.getFontContainer(aFont.getCOSObject()); // another font is used in the Type3, check if the font is valid. if (!aContainer.isValid()) { diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/Type0Container.java b/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/Type0Container.java index 76afdc2b779..5b1ac7244ee 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/Type0Container.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/font/container/Type0Container.java @@ -27,16 +27,16 @@ import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; -public class Type0Container extends FontContainer +public class Type0Container extends FontContainer { - private FontContainer delegateFontContainer; + private FontContainer delegateFontContainer; public Type0Container(PDFont font) { super(font); } - public void setDelegateFontContainer(FontContainer delegateFontContainer) + public void setDelegateFontContainer(FontContainer delegateFontContainer) { this.delegateFontContainer = delegateFontContainer; } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/font/descriptor/FontDescriptorHelper.java b/preflight/src/main/java/org/apache/pdfbox/preflight/font/descriptor/FontDescriptorHelper.java index 56fb2b4fdfd..36c15e21dc0 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/font/descriptor/FontDescriptorHelper.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/font/descriptor/FontDescriptorHelper.java @@ -37,7 +37,6 @@ import static org.apache.pdfbox.preflight.PreflightConstants.FONT_DICTIONARY_KEY_ITALICANGLE; import static org.apache.pdfbox.preflight.PreflightConstants.FONT_DICTIONARY_KEY_STEMV; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -229,7 +228,7 @@ protected void checkFontFileMetaData(PDFontDescriptor fontDescriptor, PDStream f if (metadata.getFilters() != null && !metadata.getFilters().isEmpty()) { this.fContainer.push(new ValidationError(ERROR_SYNTAX_STREAM_INVALID_FILTER, - this.font.getName() + ": Filter specified in font file metadata dictionnary")); + this.font.getName() + ": Filter specified in font file metadata dictionary")); return; } @@ -276,14 +275,11 @@ else if (e.getErrorType() == ErrorType.XpacketBadEnd) protected final byte[] getMetaDataStreamAsBytes(PDMetadata metadata) { byte[] result = null; - ByteArrayOutputStream bos = null; InputStream metaDataContent = null; try { - bos = new ByteArrayOutputStream(); metaDataContent = metadata.createInputStream(); - IOUtils.copy(metaDataContent, bos); - result = bos.toByteArray(); + result = IOUtils.toByteArray(metaDataContent); } catch (IOException e) { @@ -293,7 +289,6 @@ protected final byte[] getMetaDataStreamAsBytes(PDMetadata metadata) finally { IOUtils.closeQuietly(metaDataContent); - IOUtils.closeQuietly(bos); } return result; } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/font/util/FontMetaDataValidation.java b/preflight/src/main/java/org/apache/pdfbox/preflight/font/util/FontMetaDataValidation.java index 03ceea95001..00ee23e466b 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/font/util/FontMetaDataValidation.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/font/util/FontMetaDataValidation.java @@ -131,7 +131,7 @@ public boolean analyseFontName(XMPMetadata metadata, PDFontDescriptor fontDesc, } /** - * If XMP MetaData are present, they must have followings information : + * If XMP MetaData is present, they must have the following information : *
      *
    • dc:rights *
    • Marked (with the value true) diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/font/util/PreflightType3Stream.java b/preflight/src/main/java/org/apache/pdfbox/preflight/font/util/PreflightType3Stream.java index 6d9490a5d2b..db0215c3ef3 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/font/util/PreflightType3Stream.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/font/util/PreflightType3Stream.java @@ -33,6 +33,8 @@ import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.content.PreflightStreamEngine; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; +import org.apache.pdfbox.cos.COSBase; /** * This class is used to parse a glyph of a Type3 font program. If the glyph is parsed without error, the width of the @@ -85,12 +87,12 @@ public Image createImage() throws IOException * If there is an error processing the operation. */ @Override - protected void processOperator(Operator operator, List operands) throws IOException + protected void processOperator(Operator operator, List operands) throws IOException { super.processOperator(operator, operands); String operation = operator.getName(); - if (operation.equals("BI")) + if (operation.equals(OperatorName.BEGIN_INLINE_IMAGE)) { image = new PDInlineImage(operator.getImageParameters(), operator.getImageData(), @@ -100,13 +102,11 @@ protected void processOperator(Operator operator, List operands) throws IOExcept validateInlineImageColorSpace(operator); } - if (operation.equals("d0")) + if (operation.equals(OperatorName.TYPE3_D0)) { - checkType3FirstOperator(operands); - } - else if (operation.equals("d1")) + else if (operation.equals(OperatorName.TYPE3_D1)) { COSNumber llx = (COSNumber) operands.get(2); COSNumber lly = (COSNumber) operands.get(3); @@ -136,19 +136,15 @@ else if (operation.equals("d1")) * @param arguments * @throws IOException */ - private void checkType3FirstOperator(List arguments) throws IOException + private void checkType3FirstOperator(List arguments) throws IOException { if (!firstOperator) { throw new IOException("Type3 CharProc : First operator must be d0 or d1"); } - Object obj = arguments.get(0); - if (obj instanceof Number) - { - width = ((Number) obj).intValue(); - } - else if (obj instanceof COSNumber) + COSBase obj = arguments.get(0); + if (obj instanceof COSNumber) { width = ((COSNumber) obj).floatValue(); } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/ColorSpaceHelperFactory.java b/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/ColorSpaceHelperFactory.java index 7db7bdbe031..bc98a9e3059 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/ColorSpaceHelperFactory.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/ColorSpaceHelperFactory.java @@ -31,11 +31,11 @@ public class ColorSpaceHelperFactory { /** - * Return an instance of ColorSpaceHelper according to the ColorSpaceRestiction value. + * Return an instance of ColorSpaceHelper according to the ColorSpaceRestriction value. *
        - *
      • ColorSpaceRestiction.NO_PATTERN : returns NoPatternColorSpaceHelper - *
      • ColorSpaceRestiction.ONLY_DEVICE : returns DeviceColorSpaceHelper - *
      • default : returns StandardColorSpaceHelper + *
      • ColorSpaceRestriction.NO_PATTERN : returns NoPatternColorSpaceHelper + *
      • ColorSpaceRestriction.ONLY_DEVICE : returns DeviceColorSpaceHelper + *
      • ColorSpaceRestriction.NO_RESTRICTION (default) : returns StandardColorSpaceHelper *
      * * @param context @@ -64,6 +64,6 @@ public ColorSpaceHelper getColorSpaceHelper(PreflightContext context, PDColorSpa */ public enum ColorSpaceRestriction { - NO_RESTRICTION, NO_PATTERN, ONLY_DEVICE; + NO_RESTRICTION, NO_PATTERN, ONLY_DEVICE } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/ICCProfileWrapper.java b/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/ICCProfileWrapper.java index 51fe5c92127..003f4dc26e5 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/ICCProfileWrapper.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/ICCProfileWrapper.java @@ -21,9 +21,11 @@ package org.apache.pdfbox.preflight.graphic; +import java.awt.color.ColorSpace; import java.awt.color.ICC_ColorSpace; import java.awt.color.ICC_Profile; import java.io.IOException; +import java.io.InputStream; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; @@ -88,7 +90,7 @@ public ICC_Profile getProfile() */ public boolean isRGBColorSpace() { - return ICC_ColorSpace.TYPE_RGB == colorSpace.getType(); + return ColorSpace.TYPE_RGB == colorSpace.getType(); } /** @@ -98,7 +100,7 @@ public boolean isRGBColorSpace() */ public boolean isCMYKColorSpace() { - return ICC_ColorSpace.TYPE_CMYK == colorSpace.getType(); + return ColorSpace.TYPE_CMYK == colorSpace.getType(); } /** @@ -108,7 +110,7 @@ public boolean isCMYKColorSpace() */ public boolean isGrayColorSpace() { - return ICC_ColorSpace.TYPE_GRAY == colorSpace.getType(); + return ColorSpace.TYPE_GRAY == colorSpace.getType(); } /** @@ -117,17 +119,19 @@ public boolean isGrayColorSpace() * * @param context * @return an instance of ICCProfileWrapper or null if there are no DestOutputProfile - * @throws ValidationException - * if an IOException occurs during the DestOutputProfile parsing */ - private static ICCProfileWrapper searchFirstICCProfile(PreflightContext context) throws ValidationException + private static ICCProfileWrapper searchFirstICCProfile(PreflightContext context) { PreflightDocument document = context.getDocument(); PDDocumentCatalog catalog = document.getDocumentCatalog(); COSBase cBase = catalog.getCOSObject().getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS)); COSArray outputIntents = COSUtils.getAsArray(cBase, document.getDocument()); - for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) + if (outputIntents == null) + { + return null; + } + for (int i = 0; i < outputIntents.size(); ++i) { COSDictionary outputIntentDict = COSUtils.getAsDictionary(outputIntents.get(i), document.getDocument()); COSBase destOutputProfile = outputIntentDict.getItem(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE); @@ -138,8 +142,15 @@ private static ICCProfileWrapper searchFirstICCProfile(PreflightContext context) COSStream stream = COSUtils.getAsStream(destOutputProfile, document.getDocument()); if (stream != null) { - ICC_Profile iccp = ICC_Profile.getInstance(stream.createInputStream()); - return new ICCProfileWrapper(iccp); + InputStream is = stream.createInputStream(); + try + { + return new ICCProfileWrapper(ICC_Profile.getInstance(is)); + } + finally + { + is.close(); + } } } catch (IllegalArgumentException e) @@ -147,6 +158,11 @@ private static ICCProfileWrapper searchFirstICCProfile(PreflightContext context) context.addValidationError(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_INVALID, "DestOutputProfile isn't a valid ICCProfile. Caused by : " + e.getMessage(), e)); } + catch (ArrayIndexOutOfBoundsException e) + { + context.addValidationError(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_INVALID, + "DestOutputProfile isn't a valid ICCProfile. Caused by : " + e.getMessage(), e)); + } catch (IOException e) { context.addValidationError(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_INVALID, diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/StandardColorSpaceHelper.java b/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/StandardColorSpaceHelper.java index ab7c3bb2a04..f0395d046d4 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/StandardColorSpaceHelper.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/graphic/StandardColorSpaceHelper.java @@ -23,18 +23,24 @@ import java.awt.color.ICC_Profile; import java.io.IOException; +import java.io.InputStream; import java.util.Map; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceN; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceNAttributes; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceNProcess; import org.apache.pdfbox.pdmodel.graphics.color.PDICCBased; import org.apache.pdfbox.pdmodel.graphics.color.PDIndexed; import org.apache.pdfbox.pdmodel.graphics.color.PDSeparation; +import org.apache.pdfbox.preflight.PreflightConfiguration; import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.PreflightPath; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; @@ -50,6 +56,8 @@ import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_INVALID_COLOR_SPACE_TOO_MANY_COMPONENTS_DEVICEN; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_INVALID_PATTERN_COLOR_SPACE_FORBIDDEN; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_INVALID_UNKNOWN_COLOR_SPACE; +import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_TOO_RECENT; +import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_DEVICE_N_LIMIT; /** @@ -80,7 +88,7 @@ protected StandardColorSpaceHelper(PreflightContext _context, PDColorSpace _cs) /* * (non-Javadoc) * - * @see net.awl.edoc.pdfa.validation.graphics.color.ColorSpaceHelper#validate(java .util.List) + * @see org.apache.pdfbox.preflight.graphic.color.ColorSpaceHelper#validate(java .util.List) */ @Override public final void validate() throws ValidationException @@ -213,7 +221,7 @@ protected void processGrayColorSpace(PDColorSpace colorSpace) } /** - * Method called by the processAllColorSpace if the ColorSpace to check is a Clibrated Color (CalGary, CalRGB, Lab). + * Method called by the processAllColorSpace if the ColorSpace to check is a calibrated color (CalGray, CalRGB, Lab). * @param colorSpace * */ @@ -235,7 +243,11 @@ protected void processICCBasedColorSpace(PDColorSpace colorSpace) PDICCBased iccBased = (PDICCBased) colorSpace; try { - ICC_Profile.getInstance(iccBased.getPDStream().createInputStream()); + InputStream is = iccBased.getPDStream().createInputStream(); + // check that ICC profile loads (PDICCBased also does this, but catches the exception) + // PDFBOX-2819: load ICC profile as a stream, not as a byte array because of java error + ICC_Profile iccp = ICC_Profile.getInstance(is); + is.close(); PDColorSpace altpdcs = iccBased.getAlternateColorSpace(); if (altpdcs != null) { @@ -255,14 +267,31 @@ protected void processICCBasedColorSpace(PDColorSpace colorSpace) * * We don't check the alternate ColorSpaces */ + // PDFBOX-4611, PDFBOX-4607: Yes we do because Adobe Reader chokes on it + // and because VeraPDF and PDF-Tools do it. + if (!validateICCProfileNEntry(iccBased.getPDStream().getCOSObject(), iccp)) + { + return; + } + if (!validateICCProfileVersion(iccp)) + { + return; + } + validateICCProfileAlternateEntry(iccBased); } - } + } catch (IllegalArgumentException e) { // this is not a ICC_Profile context.addValidationError(new ValidationError(ERROR_GRAPHIC_INVALID_COLOR_SPACE_ICCBASED, "ICCBased color space is invalid: " + e.getMessage(), e)); } + catch (ArrayIndexOutOfBoundsException e) + { + // this is not a ICC_Profile + context.addValidationError(new ValidationError(ERROR_GRAPHIC_INVALID_COLOR_SPACE_ICCBASED, + "ICCBased color space is invalid: " + e.getMessage(), e)); + } catch (IOException e) { context.addValidationError(new ValidationError(ERROR_GRAPHIC_INVALID_COLOR_SPACE, @@ -302,17 +331,19 @@ protected void processDeviceNColorSpace(PDColorSpace colorSpace) if (attr != null) { final Map colorants = attr.getColorants(); - if (colorants != null) + numberOfColorants = colorants.size(); + for (PDSeparation col : colorants.values()) { - numberOfColorants = colorants.size(); - for (PDSeparation col : colorants.values()) + if (col != null) { - if (col != null) - { - processAllColorSpace(col); - } + processAllColorSpace(col); } } + PDDeviceNProcess process = attr.getProcess(); + if (process != null) + { + processAllColorSpace(process.getColorSpace()); + } } int numberOfComponents = deviceN.getNumberOfComponents(); if (numberOfColorants > MAX_DEVICE_N_LIMIT || numberOfComponents > MAX_DEVICE_N_LIMIT) @@ -451,4 +482,84 @@ else if (colorSpace.getName().equals(ColorSpaces.DeviceGray.getLabel()) && return result; } + + private boolean validateICCProfileVersion(ICC_Profile iccp) + { + PreflightConfiguration config = context.getConfig(); + + // check the ICC Profile version (6.2.2) + if (iccp.getMajorVersion() == 2) + { + if (iccp.getMinorVersion() > 0x40) + { + // in PDF 1.4, max version is 02h.40h (meaning V 3.5) + // see the ICCProfile specification (ICC.1:1998-09)page 13 - §6.1.3 : + // The current profile version number is "2.4.0" (encoded as 02400000h") + ValidationError error = new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_TOO_RECENT, + "Invalid version of the ICCProfile"); + error.setWarning(config.isLazyValidation()); + context.addValidationError(error); + return false; + } + // else OK + } + else if (iccp.getMajorVersion() > 2) + { + // in PDF 1.4, max version is 02h.40h (meaning V 3.5) + // see the ICCProfile specification (ICC.1:1998-09)page 13 - §6.1.3 : + // The current profile version number is "2.4.0" (encoded as 02400000h" + ValidationError error = new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_TOO_RECENT, + "Invalid version of the ICCProfile"); + error.setWarning(config.isLazyValidation()); + context.addValidationError(error); + return false; + } + // else seems less than 2, so correct + return true; + } + + private boolean validateICCProfileNEntry(COSStream stream, ICC_Profile iccp) + { + COSDictionary streamDict = (COSDictionary) stream.getCOSObject(); + if (!streamDict.containsKey(COSName.N)) + { + context.addValidationError(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, + "/N entry of ICC profile is mandatory")); + return false; + } + COSBase nValue = streamDict.getItem(COSName.N); + if (!(nValue instanceof COSNumber)) + { + context.addValidationError(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, + "/N entry of ICC profile must be a number, but is " + nValue)); + return false; + } + int nNumberValue = ((COSNumber) nValue).intValue(); + if (nNumberValue != 1 && nNumberValue != 3 && nNumberValue != 4) + { + context.addValidationError(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, + "/N entry of ICC profile must be 1, 3 or 4, but is " + nNumberValue)); + return false; + } + if (iccp.getNumComponents() != nNumberValue) + { + context.addValidationError(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, + "/N entry of ICC profile is " + nNumberValue + " but the ICC profile has " + iccp.getNumComponents() + " components")); + return false; + } + return true; + } + + private void validateICCProfileAlternateEntry(PDICCBased iccBased) throws IOException + { + PDColorSpace altCS = iccBased.getAlternateColorSpace(); + if (altCS != null && altCS.getNumberOfComponents() != iccBased.getNumberOfComponents()) + { + // https://github.com/veraPDF/veraPDF-library/issues/773 + context.addValidationError(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, + "/N entry of ICC profile is different (" + iccBased.getNumberOfComponents() + + ") than alternate entry colorspace component count (" + + altCS.getNumberOfComponents() + ")")); + } + } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/PDFAIdentificationValidation.java b/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/PDFAIdentificationValidation.java index 07d7e32cff2..be57383b031 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/PDFAIdentificationValidation.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/PDFAIdentificationValidation.java @@ -82,7 +82,7 @@ public List validatePDFAIdentifer(XMPMetadata metadata) throws } } checkConformanceLevel(ve, id.getConformance()); - checkPartNumber(ve, id.getPart()); + checkPartNumber(ve, id.getPart() == null ? -1 : id.getPart()); return ve; } @@ -107,7 +107,8 @@ protected void checkConformanceLevel(List ve, String value) { if (value == null || !(value.equals("A") || value.equals("B"))) { - ve.add(new ValidationError(ERROR_METADATA_INVALID_PDFA_CONFORMANCE)); + ve.add(new ValidationError(ERROR_METADATA_INVALID_PDFA_CONFORMANCE, + "conformance level must be A or B")); } } @@ -115,7 +116,7 @@ protected void checkPartNumber(List ve, int value) { if (value != 1) { - ve.add(new ValidationError(ERROR_METADATA_INVALID_PDFA_VERSION_ID)); + ve.add(new ValidationError(ERROR_METADATA_INVALID_PDFA_VERSION_ID, "part must be 1")); } } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/RDFAboutAttributeConcordanceValidation.java b/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/RDFAboutAttributeConcordanceValidation.java index c274cc39a98..278c0986e8a 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/RDFAboutAttributeConcordanceValidation.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/RDFAboutAttributeConcordanceValidation.java @@ -28,16 +28,17 @@ import org.apache.xmpbox.schema.XMPSchema; /** - * Class which all elements within an rdf:RDF have the same value for their rdf:about attributes - * + * Class which checks that all elements within an rdf:RDF have the same value for their rdf:about + * attributes. + * * @author Germain Costenobel - * + * */ public class RDFAboutAttributeConcordanceValidation { /** - * + * * @param metadata the XMP metadata. * @throws DifferentRDFAboutException * @throws ValidationException @@ -48,11 +49,11 @@ public void validateRDFAboutAttributes(XMPMetadata metadata) throws ValidationEx List schemas = metadata.getAllSchemas(); if (schemas.isEmpty()) { - throw new ValidationException("Schemas not found in the given metadata representation"); + throw new ValidationException("No schema found in the given metadata representation"); } - + String about = schemas.get(0).getAboutValue(); - + // rdf:description must have an rdf:about attribute for (XMPSchema xmpSchema : schemas) { @@ -62,7 +63,7 @@ public void validateRDFAboutAttributes(XMPMetadata metadata) throws ValidationEx { throw new DifferentRDFAboutException(); } - + if ("".equals(about)) { about = schemaAboutValue; diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/SynchronizedMetaDataValidation.java b/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/SynchronizedMetaDataValidation.java index ca9aa3c3f13..d2f17e49104 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/SynchronizedMetaDataValidation.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/SynchronizedMetaDataValidation.java @@ -35,7 +35,6 @@ import org.apache.pdfbox.preflight.PreflightConstants; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; import org.apache.pdfbox.preflight.exception.ValidationException; -import org.apache.xmpbox.DateConverter; import org.apache.xmpbox.XMPMetadata; import org.apache.xmpbox.schema.AdobePDFSchema; import org.apache.xmpbox.schema.DublinCoreSchema; @@ -352,7 +351,7 @@ protected void analyzeCreationDateProperty(PDDocumentInformation dico, XMPBasicS } else { - if (!DateConverter.toISO8601(xmpCreationDate).equals(DateConverter.toISO8601(creationDate))) + if (xmpCreationDate.compareTo(creationDate) != 0) { ve.add(unsynchronizedMetaDataError("CreationDate")); } @@ -395,7 +394,7 @@ protected void analyzeModifyDateProperty(PDDocumentInformation dico, XMPBasicSch } else { - if (!DateConverter.toISO8601(xmpModifyDate).equals(DateConverter.toISO8601(modifyDate))) + if (xmpModifyDate.compareTo(modifyDate) != 0) { ve.add(unsynchronizedMetaDataError("ModificationDate")); } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java b/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java new file mode 100644 index 00000000000..b01961bde1d --- /dev/null +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/metadata/UniquePropertiesValidation.java @@ -0,0 +1,102 @@ +/** *************************************************************************** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *************************************************************************** */ +package org.apache.pdfbox.preflight.metadata; + +import java.util.ArrayList; +import java.util.List; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.preflight.PreflightConstants; +import org.apache.pdfbox.preflight.ValidationResult; +import org.apache.pdfbox.preflight.ValidationResult.ValidationError; +import org.apache.pdfbox.preflight.exception.ValidationException; +import org.apache.xmpbox.XMPMetadata; +import org.apache.xmpbox.schema.AdobePDFSchema; +import org.apache.xmpbox.schema.DublinCoreSchema; +import org.apache.xmpbox.schema.XMPBasicSchema; +import org.apache.xmpbox.schema.XMPSchema; +import org.apache.xmpbox.type.AbstractField; + +/** + * Class which checks that certain metadata properties are unique, see PDFBOX-4860. + * + * @author Tilman Hausherr + * + */ +public class UniquePropertiesValidation +{ + + /** + * Checks that certain metadata properties are unique. + * + * @param document the PDF Document + * @param metadata the XMP MetaData + * @return List of validation errors + * @throws ValidationException + */ + public List validatePropertiesUniqueness(PDDocument document, XMPMetadata metadata) + throws ValidationException + { + if (document == null) + { + throw new ValidationException("Document provided is null"); + } + + List ve = new ArrayList(); + + analyzePropertyUniqueness(metadata.getDublinCoreSchema(), DublinCoreSchema.CREATOR, ve); + analyzePropertyUniqueness(metadata.getDublinCoreSchema(), DublinCoreSchema.TITLE, ve); + analyzePropertyUniqueness(metadata.getDublinCoreSchema(), DublinCoreSchema.DESCRIPTION, ve); + + analyzePropertyUniqueness(metadata.getAdobePDFSchema(), AdobePDFSchema.PRODUCER, ve); + analyzePropertyUniqueness(metadata.getAdobePDFSchema(), AdobePDFSchema.KEYWORDS, ve); + + analyzePropertyUniqueness(metadata.getXMPBasicSchema(), XMPBasicSchema.CREATORTOOL, ve); + analyzePropertyUniqueness(metadata.getXMPBasicSchema(), XMPBasicSchema.CREATEDATE, ve); + analyzePropertyUniqueness(metadata.getXMPBasicSchema(), XMPBasicSchema.MODIFYDATE, ve); + + // should any other properties be checked for uniqueness? Let us know. + + return ve; + } + + private static void analyzePropertyUniqueness(XMPSchema schema, String propertyName, + List ve) + { + if (schema == null) + { + return; + } + int count = 0; + for (AbstractField field : schema.getAllProperties()) + { + if (propertyName.equals(field.getPropertyName())) + { + ++count; + } + } + if (count > 1) + { + ve.add(new ValidationError(PreflightConstants.ERROR_METADATA_PROPERTY_FORMAT, + "property '" + schema.getPrefix() + ":" + propertyName + + "' occurs multiple times")); + } + } +} diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java b/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java index 97ed2680012..bc9ee4114d6 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.activation.DataSource; @@ -47,6 +46,7 @@ import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdfparser.PDFObjectStreamParser; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; @@ -111,6 +111,21 @@ public PreflightParser(File file) throws IOException this.dataSource = new FileDataSource(file); } + /** + * Constructor. + * + * @param file + * @param scratch + * @throws IOException if there is a reading error. + */ + public PreflightParser(File file, ScratchFile scratch) throws IOException + { + // TODO move file handling outside of the parser + super(new RandomAccessBufferedFileInputStream(file), scratch); + this.setLenient(false); + this.dataSource = new FileDataSource(file); + } + /** * Constructor. * @@ -123,6 +138,19 @@ public PreflightParser(String filename) throws IOException this(new File(filename)); } + /** + * Constructor. + * + * @param filename + * @param scratch + * @throws IOException if there is a reading error. + */ + public PreflightParser(String filename, ScratchFile scratch) throws IOException + { + // TODO move file handling outside of the parser + this(new File(filename), scratch); + } + /** * Constructor. This one is slower than the file and the filename constructors, because * a temporary file will be created. @@ -138,6 +166,22 @@ public PreflightParser(DataSource dataSource) throws IOException this.dataSource = dataSource; } + /** + * Constructor. This one is slower than the file and the filename constructors, because + * a temporary file will be created. + * + * @param dataSource the datasource + * @param scratch + * @throws IOException if there is a reading error. + */ + public PreflightParser(DataSource dataSource, ScratchFile scratch) throws IOException + { + // TODO move file handling outside of the parser + super(new RandomAccessBufferedFileInputStream(dataSource.getInputStream()), scratch); + this.setLenient(false); + this.dataSource = dataSource; + } + /** * Create an instance of ValidationResult with a ValidationError(UNKNOWN_ERROR) * @@ -237,6 +281,7 @@ protected void createContext() ctx.setDocument(preflightDocument); preflightDocument.setContext(ctx); ctx.setXrefTrailerResolver(xrefTrailerResolver); + ctx.setFileLen(this.fileLen); } @Override @@ -636,7 +681,7 @@ protected COSBase parseDirObject() throws IOException COSNumber number = (COSNumber) result; if (number instanceof COSFloat) { - Double real = number.doubleValue(); + Float real = number.floatValue(); if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT) { addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, @@ -775,16 +820,13 @@ else if (offsetOrObjstmObNr > 0) endObjectKey = readString(); // we have case with a second 'endstream' before endobj - if (!endObjectKey.startsWith("endobj")) + if (!endObjectKey.startsWith("endobj") && endObjectKey.startsWith("endstream")) { - if (endObjectKey.startsWith("endstream")) + endObjectKey = endObjectKey.substring(9).trim(); + if (endObjectKey.length() == 0) { - endObjectKey = endObjectKey.substring(9).trim(); - if (endObjectKey.length() == 0) - { - // no other characters in extra endstream line - endObjectKey = readString(); // read next line - } + // no other characters in extra endstream line + endObjectKey = readString(); // read next line } } } @@ -830,14 +872,12 @@ else if (securityHandler != null) PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document); parser.parse(); - // get set of object numbers referenced for this object stream - final Set refObjNrs = xrefTrailerResolver.getContainedObjectNumbers(objstmObjNr); - // register all objects which are referenced to be contained in object stream for (COSObject next : parser.getObjects()) { COSObjectKey stmObjKey = new COSObjectKey(next); - if (refObjNrs.contains(stmObjKey.getNumber())) + Long offset = xrefTrailerResolver.getXrefTable().get(stmObjKey); + if (offset != null && offset == -objstmObjNr) { COSObject stmObj = document.getObjectFromPool(stmObjKey); stmObj.setObject(next.getObject()); @@ -870,7 +910,7 @@ protected int lastIndexOf(final char[] pattern, final byte[] buf, final int endO { position = source.getPosition(); } - catch(IOException excpetion) + catch (IOException ex) { position = Long.MIN_VALUE; } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/parser/XmlResultParser.java b/preflight/src/main/java/org/apache/pdfbox/preflight/parser/XmlResultParser.java index 74bac72b6ca..86dff792845 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/parser/XmlResultParser.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/parser/XmlResultParser.java @@ -43,12 +43,13 @@ public class XmlResultParser { - public Element validate(DataSource source) throws IOException + public Element validate(DataSource dataSource) throws IOException { try { + @SuppressWarnings({"squid:S2755"}) // self-created XML Document rdocument = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); - return validate(rdocument,source); + return validate(rdocument,dataSource); } catch (ParserConfigurationException e) { @@ -57,14 +58,14 @@ public Element validate(DataSource source) throws IOException } - public Element validate(Document rdocument, DataSource source) throws IOException + public Element validate(Document rdocument, DataSource dataSource) throws IOException { String pdfType = null; ValidationResult result; long before = System.currentTimeMillis(); try { - PreflightParser parser = new PreflightParser(source); + PreflightParser parser = new PreflightParser(dataSource); try { parser.parse(); @@ -82,13 +83,13 @@ public Element validate(Document rdocument, DataSource source) throws IOExceptio catch(Exception e) { long after = System.currentTimeMillis(); - return generateFailureResponse(rdocument, source.getName(), after-before, pdfType, e); + return generateFailureResponse(rdocument, dataSource.getName(), after-before, pdfType, e); } long after = System.currentTimeMillis(); if (result.isValid()) { - Element preflight = generateResponseSkeleton(rdocument, source.getName(), after-before); + Element preflight = generateResponseSkeleton(rdocument, dataSource.getName(), after-before); // valid ? Element valid = rdocument.createElement("isValid"); valid.setAttribute("type", pdfType); @@ -98,7 +99,7 @@ public Element validate(Document rdocument, DataSource source) throws IOExceptio } else { - Element preflight = generateResponseSkeleton(rdocument, source.getName(), after-before); + Element preflight = generateResponseSkeleton(rdocument, dataSource.getName(), after-before); // valid ? createResponseWithError(rdocument, pdfType, result, preflight); return preflight; diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/AcroFormValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/AcroFormValidationProcess.java index c0a59777dc0..e4ff5b63367 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/AcroFormValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/AcroFormValidationProcess.java @@ -39,7 +39,6 @@ import static org.apache.pdfbox.preflight.PreflightConfiguration.ANNOTATIONS_PROCESS; -import static org.apache.pdfbox.preflight.PreflightConstants.ACROFORM_DICTIONARY_KEY_NEED_APPEARANCES; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_ACTION_FORBIDDEN_ADDITIONAL_ACTIONS_FIELD; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_ACTION_FORBIDDEN_WIDGET_ACTION_FIELD; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_BODY; @@ -55,7 +54,7 @@ public void validate(PreflightContext ctx) throws ValidationException PDDocumentCatalog catalog = ctx.getDocument().getDocumentCatalog(); if (catalog != null) { - PDAcroForm acroForm = catalog.getAcroForm(); + PDAcroForm acroForm = catalog.getAcroForm(null); if (acroForm != null) { checkNeedAppearences(ctx, acroForm); @@ -85,7 +84,7 @@ public void validate(PreflightContext ctx) throws ValidationException */ protected void checkNeedAppearences(PreflightContext ctx, PDAcroForm acroForm) { - if (acroForm.getCOSObject().getBoolean(ACROFORM_DICTIONARY_KEY_NEED_APPEARANCES, false)) + if (acroForm.getNeedAppearances()) { addValidationError(ctx, new ValidationError(ERROR_SYNTAX_DICT_INVALID, "NeedAppearance is present with the value \"true\"")); @@ -177,19 +176,22 @@ protected boolean validateField(PreflightContext ctx, PDField field) throws IOEx if (field instanceof PDTerminalField) { // The widget validation will be done by the widget annotation, a widget contained in a Field can't have action. - PDAnnotationWidget widget = ((PDTerminalField)field).getWidgets().get(0); // fixme: fails to check multiple widgets - if (res && widget != null) + List widgets = field.getWidgets(); + if (res && widgets != null) { - ContextHelper.validateElement(ctx, widget.getCOSObject(), ANNOTATIONS_PROCESS); - COSBase act = widget.getCOSObject().getDictionaryObject(COSName.A); - if (act != null) + for (PDAnnotationWidget widget : widgets) { - addValidationError(ctx, new ValidationError(ERROR_ACTION_FORBIDDEN_WIDGET_ACTION_FIELD, - "\"A\" must not be used in a widget annotation")); - return false; + ContextHelper.validateElement(ctx, widget.getCOSObject(), ANNOTATIONS_PROCESS); + COSBase act = widget.getCOSObject().getDictionaryObject(COSName.A); + if (act != null) + { + addValidationError(ctx, new ValidationError(ERROR_ACTION_FORBIDDEN_WIDGET_ACTION_FIELD, + "\"A\" must not be used in a widget annotation")); + return false; + } } } - return exploreWidgets(ctx, ((PDTerminalField)field).getWidgets()); + return exploreWidgets(ctx, field.getWidgets()); } else { diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/BookmarkValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/BookmarkValidationProcess.java index 7a80dc6d5ab..4cef1dbd9fb 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/BookmarkValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/BookmarkValidationProcess.java @@ -292,7 +292,7 @@ private boolean checkIndirectObject(PreflightContext ctx, COSDictionary dictiona /** * Returns a COSBase as a COSObject or null if null or COSNull. To avoid * trouble, this method is to be called only after having called - * {@link #checkIndirectObjects()}. + * {@link #checkIndirectObjects(PreflightContext, COSDictionary)}. * * @param base should be null, COSNull or a COSObject. * @return null if the parameter is COSNull or null; or else a COSObject. diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/CatalogValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/CatalogValidationProcess.java index 12d8aa269e2..c32f003b32d 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/CatalogValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/CatalogValidationProcess.java @@ -23,6 +23,7 @@ import java.awt.color.ICC_Profile; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -32,26 +33,28 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; +import org.apache.pdfbox.pdmodel.graphics.color.PDICCBased; import org.apache.pdfbox.preflight.PreflightConfiguration; +import static org.apache.pdfbox.preflight.PreflightConfiguration.ACTIONS_PROCESS; +import static org.apache.pdfbox.preflight.PreflightConstants.*; import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; import org.apache.pdfbox.preflight.exception.ValidationException; +import org.apache.pdfbox.preflight.graphic.ColorSpaceHelper; +import org.apache.pdfbox.preflight.graphic.ColorSpaceHelperFactory; +import org.apache.pdfbox.preflight.graphic.ColorSpaceHelperFactory.ColorSpaceRestriction; import org.apache.pdfbox.preflight.graphic.ICCProfileWrapper; import org.apache.pdfbox.preflight.utils.COSUtils; import org.apache.pdfbox.preflight.utils.ContextHelper; - -import static org.apache.pdfbox.preflight.PreflightConfiguration.ACTIONS_PROCESS; -import static org.apache.pdfbox.preflight.PreflightConstants.*; - /** * This ValidationProcess check if the Catalog entries are confirming with the PDF/A-1b specification. */ @@ -226,7 +229,7 @@ protected void validateOCProperties(PreflightContext ctx) throws ValidationExcep } /** - * This method checks the content of each OutputIntent. The S entry must contain GTS_PDFA1. The DestOuputProfile + * This method checks the content of each OutputIntent. The S entry must contain GTS_PDFA1. The DestOutputProfile * must contain a valid ICC Profile Stream. * * If there are more than one OutputIntent, they have to use the same ICC Profile. @@ -243,7 +246,11 @@ public void validateOutputIntent(PreflightContext ctx) throws ValidationExceptio COSArray outputIntents = COSUtils.getAsArray(cBase, cosDocument); Map tmpDestOutputProfile = new HashMap(); - for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) + if (outputIntents == null) + { + return; + } + for (int i = 0; i < outputIntents.size(); ++i) { COSDictionary outputIntentDict = COSUtils.getAsDictionary(outputIntents.get(i), cosDocument); @@ -304,10 +311,10 @@ public void validateOutputIntent(PreflightContext ctx) throws ValidationExceptio /** * This method checks the destOutputProfile which must be a valid ICCProfile. * - * If an other ICCProfile exists in the mapDestOutputProfile, a ValdiationError - * (ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_MULTIPLE) is returned because of only one profile is authorized. If the - * ICCProfile already exist in the mapDestOutputProfile, the method returns null. If the destOutputProfile contains - * an invalid ICCProfile, a ValidationError (ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_INVALID) is returned If the + * If another ICCProfile exists in the mapDestOutputProfile, a ValidationError + * (ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_MULTIPLE) is returned because only one profile is authorized. If the + * ICCProfile already exists in the mapDestOutputProfile, the method returns null. If the destOutputProfile contains + * an invalid ICCProfile, a ValidationError (ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_INVALID) is returned. If the * destOutputProfile is an empty stream, a ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY) is returned. * * If the destOutputFile is valid, mapDestOutputProfile is updated, the ICCProfile is added to the document ctx and @@ -323,11 +330,6 @@ protected void validateICCProfile(COSBase destOutputProfile, Map 0x40) - { - // in PDF 1.4, max version is 02h.40h (meaning V 3.5) - // see the ICCProfile specification (ICC.1:1998-09)page 13 - §6.1.3 : - // The current profile version number is "2.4.0" (encoded as 02400000h") - ValidationError error = new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_TOO_RECENT, - "Invalid version of the ICCProfile"); - error.setWarning(config.isLazyValidation()); - addValidationError(ctx, error); - return false; - } - // else OK - } - else if (iccp.getMajorVersion() > 2) - { - // in PDF 1.4, max version is 02h.40h (meaning V 3.5) - // see the ICCProfile specification (ICC.1:1998-09)page 13 - §6.1.3 : - // The current profile version number is "2.4.0" (encoded as 02400000h" - ValidationError error = new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_TOO_RECENT, - "Invalid version of the ICCProfile"); - error.setWarning(config.isLazyValidation()); - addValidationError(ctx, error); - return false; - } - // else seems less than 2, so correct - return true; - } - - private boolean validateICCProfileNEntry(COSStream stream, PreflightContext ctx, ICC_Profile iccp) - { - COSDictionary streamDict = (COSDictionary) stream.getCOSObject(); - if (!streamDict.containsKey(COSName.N)) - { - addValidationError(ctx, new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, - "/N entry of ICC profile is mandatory")); - return false; - } - COSBase nValue = streamDict.getItem(COSName.N); - if (!(nValue instanceof COSNumber)) - { - addValidationError(ctx, new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, - "/N entry of ICC profile must be a number, but is " + nValue)); - return false; - } - int nNumberValue = ((COSNumber) nValue).intValue(); - if (nNumberValue != 1 && nNumberValue != 3 && nNumberValue != 4) - { - addValidationError(ctx, new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, - "/N entry of ICC profile must be 1, 3 or 4, but is " + nNumberValue)); - return false; + // this is not a ICC_Profile + addValidationError(ctx, new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_ICC_PROFILE_INVALID, + "DestOutputProfile isn't a valid ICCProfile: " + e.getMessage(), e)); } - if (iccp.getNumComponents() != nNumberValue) + catch (IOException e) { - addValidationError(ctx, new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, - "/N entry of ICC profile is " + nNumberValue + " but the ICC profile has " + iccp.getNumComponents() + " components")); - return false; + throw new ValidationException("Unable to parse the ICC Profile.", e); } - return true; } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java index 17525e981de..3030b7d5e50 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/MetadataValidationProcess.java @@ -28,14 +28,12 @@ import java.util.ArrayList; import java.util.List; import javax.imageio.ImageIO; -import javax.xml.bind.DatatypeConverter; import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.common.PDMetadata; import org.apache.pdfbox.preflight.PreflightConstants; import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; @@ -44,8 +42,9 @@ import org.apache.pdfbox.preflight.metadata.RDFAboutAttributeConcordanceValidation; import org.apache.pdfbox.preflight.metadata.RDFAboutAttributeConcordanceValidation.DifferentRDFAboutException; import org.apache.pdfbox.preflight.metadata.SynchronizedMetaDataValidation; +import org.apache.pdfbox.preflight.metadata.UniquePropertiesValidation; import org.apache.pdfbox.preflight.metadata.XpacketParsingException; -import org.apache.pdfbox.preflight.utils.COSUtils; +import org.apache.pdfbox.util.Hex; import org.apache.xmpbox.XMPMetadata; import org.apache.xmpbox.schema.XMPBasicSchema; import org.apache.xmpbox.type.BadFieldValueException; @@ -64,7 +63,7 @@ public void validate(PreflightContext ctx) throws ValidationException { PDDocument document = ctx.getDocument(); - InputStream is = getXpacket(document.getDocument()); + InputStream is = getXpacket(document); DomXmpParser builder = new DomXmpParser(); XMPMetadata metadata = builder.parse(is); is.close(); @@ -88,6 +87,10 @@ public void validate(PreflightContext ctx) throws ValidationException addValidationErrors(ctx, new SynchronizedMetaDataValidation().validateMetadataSynchronization(document, metadata)); + // Call metadata uniqueness checking + addValidationErrors(ctx, + new UniquePropertiesValidation().validatePropertiesUniqueness(document, metadata)); + // Call PDF/A Identifier checking addValidationErrors(ctx, new PDFAIdentificationValidation().validatePDFAIdentifer(metadata)); @@ -195,7 +198,7 @@ private void checkThumbnail(ThumbnailType tb, PreflightContext ctx) byte[] binImage; try { - binImage = DatatypeConverter.parseBase64Binary(tb.getImage()); + binImage = Hex.decodeBase64(tb.getImage()); } catch (IllegalArgumentException e) { @@ -232,7 +235,7 @@ private void checkThumbnail(ThumbnailType tb, PreflightContext ctx) if (bim.getWidth() != tb.getWidth()) { addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_METADATA_FORMAT, - "xapGImg:witdh does not match the actual base64-encoded thumbnail image data")); + "xapGImg:width does not match the actual base64-encoded thumbnail image data")); } } @@ -251,37 +254,38 @@ private boolean hasJpegMagicNumber(byte[] binImage) /** * Return the xpacket from the dictionary's stream */ - private static InputStream getXpacket(COSDocument cdocument) throws IOException, XpacketParsingException + private static InputStream getXpacket(PDDocument document) + throws IOException, XpacketParsingException { - COSObject catalog = cdocument.getCatalog(); - COSBase cb = catalog.getDictionaryObject(COSName.METADATA); - if (cb == null) + PDDocumentCatalog catalog = document.getDocumentCatalog(); + PDMetadata metadata = catalog.getMetadata(); + if (metadata == null) { + COSBase metaObject = catalog.getCOSObject().getDictionaryObject(COSName.METADATA); + if (!(metaObject instanceof COSStream)) + { + // the Metadata object isn't a stream + ValidationError error = new ValidationError( + PreflightConstants.ERROR_METADATA_FORMAT, "Metadata is not a stream"); + throw new XpacketParsingException("Failed while retrieving xpacket", error); + } // missing Metadata Key in catalog ValidationError error = new ValidationError(PreflightConstants.ERROR_METADATA_FORMAT, "Missing Metadata Key in catalog"); throw new XpacketParsingException("Failed while retrieving xpacket", error); } + // no filter key - COSDictionary metadataDictionnary = COSUtils.getAsDictionary(cb, cdocument); - if (metadataDictionnary.getItem(COSName.FILTER) != null) + if (metadata.getFilters() != null) { // should not be defined - ValidationError error = new ValidationError(PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER, - "Filter specified in metadata dictionnary"); - throw new XpacketParsingException("Failed while retrieving xpacket", error); - } - - if (!(metadataDictionnary instanceof COSStream)) - { - // missing Metadata Key in catalog - ValidationError error = new ValidationError(PreflightConstants.ERROR_METADATA_FORMAT, - "Metadata is not a stream"); + ValidationError error = new ValidationError( + PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER, + "Filter specified in metadata dictionary"); throw new XpacketParsingException("Failed while retrieving xpacket", error); } - COSStream stream = (COSStream) metadataDictionnary; - return stream.createInputStream(); + return metadata.exportXMPMetadata(); } /** diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/PageTreeValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/PageTreeValidationProcess.java index 8ae615306ef..8e59f183c3b 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/PageTreeValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/PageTreeValidationProcess.java @@ -28,6 +28,7 @@ import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.preflight.PreflightConstants; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_PDF_PROCESSING_MISSING; import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; @@ -50,12 +51,21 @@ public void validate(PreflightContext context) throws ValidationException "/Pages dictionary entry is missing in document catalog")); return; } - int numPages = context.getDocument().getNumberOfPages(); - for (int i = 0; i < numPages; i++) + int p = 0; + for (PDPage page : context.getDocument().getPages()) { - context.setCurrentPageNumber(i); - validatePage(context, context.getDocument().getPage(i)); + context.setCurrentPageNumber(p); + validatePage(context, page); + + if (context.getDocument().getResult().getErrorsList().size() > context.getConfig().getMaxErrors()) + { + context.addValidationError(new ValidationError(PreflightConstants.ERROR_UNKOWN_ERROR, + "Over " + context.getConfig().getMaxErrors() + + " errors, page tree validation process aborted")); + break; + } context.setCurrentPageNumber(null); + ++p; } } else diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java index 2187c5efbb6..afc7e0cb3d0 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/StreamValidationProcess.java @@ -22,6 +22,7 @@ package org.apache.pdfbox.preflight.process; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DAMAGED; +import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_FX_KEYS; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID; @@ -50,6 +51,8 @@ public class StreamValidationProcess extends AbstractProcess { + private static final String ENDSTREAM = "endstream"; + @Override public void validate(PreflightContext ctx) throws ValidationException { @@ -226,12 +229,15 @@ protected void checkStreamLength(PreflightContext context, COSObject cObj) throw if (readUntilStream(ra)) { int c = ra.read(); - if (c == '\r') + // "stream" has to be followed by a LF or CRLF + if ((c != '\r' && c != '\n') // + || (c == '\r' && ra.read() != '\n')) { - ra.read(); + addValidationError(context, + new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, + "Expected 'EOL' after the stream keyword not found")); + return; } - // else c is '\n' no more character to read - // ---- Here is the true beginning of the Stream Content. // ---- Read the given length of bytes and check the 10 next bytes // ---- to see if there are endstream. @@ -241,9 +247,9 @@ protected void checkStreamLength(PreflightContext context, COSObject cObj) throw do { int cr; - if (nbBytesToRead > 1024) + if (nbBytesToRead > buffer.length) { - cr = ra.read(buffer, 0, 1024); + cr = ra.read(buffer); } else { @@ -261,42 +267,22 @@ protected void checkStreamLength(PreflightContext context, COSObject cObj) throw } while (nbBytesToRead > 0); - int len = "endstream".length() + 2; + int len = ENDSTREAM.length() + 2; byte[] buffer2 = new byte[len]; - for (int i = 0; i < len; ++i) - { - buffer2[i] = (byte) ra.read(); - } + ra.read(buffer2); // ---- check the content of 10 last characters + // there has to be an proceeding EOL (LF or CRLF) String endStream = new String(buffer2, Charsets.ISO_8859_1); - if (buffer2[0] == '\r' && buffer2[1] == '\n') + if ((buffer2[0] != '\r' && buffer2[0] != '\n') // + || (buffer2[0] == '\r' && buffer2[1] != '\n') // + || (buffer2[0] == '\n' && buffer2[1] != 'e') // + || !endStream.contains(ENDSTREAM)) { - if (!endStream.contains("endstream")) - { - addStreamLengthValidationError(context, cObj, length, endStream); - } - } - else if (buffer2[0] == '\r' && buffer2[1] == 'e') - { - if (!endStream.contains("endstream")) - { - addStreamLengthValidationError(context, cObj, length, endStream); - } - } - else if (buffer2[0] == '\n' && buffer2[1] == 'e') - { - if (!endStream.contains("endstream")) - { - addStreamLengthValidationError(context, cObj, length, endStream); - } - } - else - { - if (!endStream.startsWith("endStream")) - { - addStreamLengthValidationError(context, cObj, length, endStream); - } + // TODO in some cases it is hard to say if the reason for this issue is a missing EOL or a wrong + // stream length, see isartor-6-1-7-t03-fail-a.pdf + // the implementation has to be adjusted similar to PreflightParser#parseCOSStream + addStreamLengthValidationError(context, cObj, length, endStream); } } else diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java index 6838dcabac6..ca3305503d9 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java @@ -63,7 +63,10 @@ public void validate(PreflightContext ctx) throws ValidationException PDDocument pdfDoc = ctx.getDocument(); COSDictionary linearizedDict = getLinearizedDictionary(pdfDoc); - if (linearizedDict != null) + // linearized files have two trailers, everything else is not a linearized file + // so don't make the checks for updated linearized files + if (linearizedDict != null && ctx.getXrefTrailerResolver().getTrailerCount() == 2 && + ctx.getFileLen() == linearizedDict.getLong(COSName.L)) { // it is a linearized PDF, check the linearized dictionary checkLinearizedDictionnary(ctx, linearizedDict); @@ -190,34 +193,30 @@ protected boolean compareIds(COSDictionary first, COSDictionary last, COSDocumen // if both are present, otherwise everything is fine if (idFirst != null && idLast != null) { - // ---- cast two COSBase to COSArray. COSArray af = COSUtils.getAsArray(idFirst, cosDocument); COSArray al = COSUtils.getAsArray(idLast, cosDocument); // ---- if one COSArray is null, the PDF/A isn't valid - if ((af == null) || (al == null)) + if (af == null || al == null) { return false; } // ---- compare both arrays boolean isEqual = true; - for (Object of : af.toList()) + for (Object of : af) { boolean oneIsEquals = false; - for (Object ol : al.toList()) + String ofString = ((COSString) of).getString(); + for (Object ol : al) { - // ---- according to PDF Reference 1-4, ID is an array containing two - // strings - if (!oneIsEquals) - { - oneIsEquals = ((COSString) ol).getString().equals(((COSString) of).getString()); - } - else + // ---- according to PDF Reference 1-4, ID is an array containing two strings + if (oneIsEquals) { break; } + oneIsEquals = ((COSString) ol).getString().equals(ofString); } isEqual = isEqual && oneIsEquals; if (!isEqual) diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ExtGStateValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ExtGStateValidationProcess.java index a120497f830..e0f800309e7 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ExtGStateValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ExtGStateValidationProcess.java @@ -31,13 +31,6 @@ import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_TRANSPARENCY_EXT_GS_BLEND_MODE; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_TRANSPARENCY_EXT_GS_CA; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_TRANSPARENCY_EXT_GS_SOFT_MASK; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANPARENCY_DICTIONARY_KEY_EXTGSTATE_ENTRY_REGEX; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANSPARENCY_DICTIONARY_KEY_BLEND_MODE; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANSPARENCY_DICTIONARY_KEY_LOWER_CA; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANSPARENCY_DICTIONARY_KEY_UPPER_CA; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANSPARENCY_DICTIONARY_VALUE_BM_COMPATIBLE; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANSPARENCY_DICTIONARY_VALUE_BM_NORMAL; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANSPARENCY_DICTIONARY_VALUE_SOFT_MASK_NONE; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_COMMON; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NEGATIVE_FLOAT; @@ -69,7 +62,7 @@ public class ExtGStateValidationProcess extends AbstractProcess * Validate the ExtGState dictionaries. * * @param context the context which contains the Resource dictionary. - * @throws ValidationException thrown if a the Extended Graphic State isn't valid. + * @throws ValidationException thrown if an Extended Graphic State isn't valid. */ @Override public void validate(PreflightContext context) throws ValidationException @@ -99,7 +92,7 @@ public void validate(PreflightContext context) throws ValidationException * @param context the context which contains the Resource dictionary. * @param egsEntry a resource COSDictionary. * @return the list of ExtGState dictionaries. - * @throws ValidationException thrown if a the Extended Graphic State isn't valid. + * @throws ValidationException thrown if an Extended Graphic State isn't valid. */ public List extractExtGStateDictionaries(PreflightContext context, COSDictionary egsEntry) throws ValidationException @@ -110,19 +103,15 @@ public List extractExtGStateDictionaries(PreflightContext context if (extGStates != null) { - for (Object object : extGStates.keySet()) + for (COSName key : extGStates.keySet()) { - COSName key = (COSName) object; - if (key.getName().matches(TRANPARENCY_DICTIONARY_KEY_EXTGSTATE_ENTRY_REGEX)) + COSBase gsBase = extGStates.getItem(key); + COSDictionary gsDict = COSUtils.getAsDictionary(gsBase, cosDocument); + if (gsDict == null) { - COSBase gsBase = extGStates.getItem(key); - COSDictionary gsDict = COSUtils.getAsDictionary(gsBase, cosDocument); - if (gsDict == null) - { - throw new ValidationException("The Extended Graphics State dictionary is invalid"); - } - listOfExtGState.add(gsDict); + throw new ValidationException("The Extended Graphics State dictionary is invalid"); } + listOfExtGState.add(gsDict); } } return listOfExtGState; @@ -231,9 +220,7 @@ private void checkFont(PreflightContext context, COSDictionary egs) throws Valid */ private void checkSoftMask(PreflightContext context, COSDictionary egs) { - COSBase smVal = egs.getItem(COSName.SMASK); - if (smVal != null && - !(smVal instanceof COSName && TRANSPARENCY_DICTIONARY_VALUE_SOFT_MASK_NONE.equals(((COSName) smVal).getName()))) + if (egs.containsKey(COSName.SMASK) && !COSName.NONE.equals(egs.getCOSName(COSName.SMASK))) { // ---- Soft Mask is valid only if it is a COSName equals to None context.addValidationError(new ValidationError(ERROR_TRANSPARENCY_EXT_GS_SOFT_MASK, @@ -249,16 +236,12 @@ private void checkSoftMask(PreflightContext context, COSDictionary egs) */ private void checkBlendMode(PreflightContext context, COSDictionary egs) { - COSBase bmVal = egs.getItem(TRANSPARENCY_DICTIONARY_KEY_BLEND_MODE); - if (bmVal != null) + COSName bmVal = egs.getCOSName(COSName.BM); + // ---- Blend Mode is valid only if it is equals to Normal or Compatible + if (bmVal != null && !(COSName.NORMAL.equals(bmVal) || COSName.COMPATIBLE.equals(bmVal))) { - // ---- Blend Mode is valid only if it is equals to Normal or Compatible - if (!(bmVal instanceof COSName && (TRANSPARENCY_DICTIONARY_VALUE_BM_NORMAL.equals(((COSName) bmVal) - .getName()) || TRANSPARENCY_DICTIONARY_VALUE_BM_COMPATIBLE.equals(((COSName) bmVal).getName())))) - { - context.addValidationError(new ValidationError(ERROR_TRANSPARENCY_EXT_GS_BLEND_MODE, - "BlendMode value isn't valid (only Normal and Compatible are authorized)")); - } + context.addValidationError(new ValidationError(ERROR_TRANSPARENCY_EXT_GS_BLEND_MODE, + "BlendMode value isn't valid (only Normal and Compatible are authorized)")); } } @@ -271,7 +254,7 @@ private void checkBlendMode(PreflightContext context, COSDictionary egs) */ private void checkUpperCA(PreflightContext context, COSDictionary egs) { - COSBase uCA = egs.getItem(TRANSPARENCY_DICTIONARY_KEY_UPPER_CA); + COSBase uCA = egs.getDictionaryObject(COSName.CA); if (uCA != null) { // ---- If CA is present only the value 1.0 is authorized @@ -295,7 +278,7 @@ private void checkUpperCA(PreflightContext context, COSDictionary egs) */ private void checkLowerCA(PreflightContext context, COSDictionary egs) { - COSBase lCA = egs.getItem(TRANSPARENCY_DICTIONARY_KEY_LOWER_CA); + COSBase lCA = egs.getDictionaryObject(COSName.CA_NS); if (lCA != null) { // ---- If ca is present only the value 1.0 is authorized @@ -318,7 +301,7 @@ private void checkLowerCA(PreflightContext context, COSDictionary egs) */ protected void checkTRKey(PreflightContext context, COSDictionary egs) { - if (egs.getItem(COSName.TR) != null) + if (egs.containsKey(COSName.TR)) { context.addValidationError(new ValidationError(ERROR_GRAPHIC_UNEXPECTED_KEY, "No TR key expected in Extended graphics state")); @@ -333,13 +316,14 @@ protected void checkTRKey(PreflightContext context, COSDictionary egs) */ protected void checkTR2Key(PreflightContext context, COSDictionary egs) { - if (egs.getItem("TR2") != null) + if (egs.containsKey(COSName.TR2)) { - String s = egs.getNameAsString("TR2"); + String s = egs.getNameAsString(COSName.TR2); if (!"Default".equals(s)) { - context.addValidationError(new ValidationError(ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY, - "TR2 key only expect 'Default' value, not '" + s + "'")); + context.addValidationError( + new ValidationError(ERROR_GRAPHIC_UNEXPECTED_VALUE_FOR_KEY, + "TR2 key only expect 'Default' value, not '" + s + "'")); } } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/FontValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/FontValidationProcess.java index c72fb7255b6..9ebfef2672e 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/FontValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/FontValidationProcess.java @@ -68,11 +68,11 @@ public void validate(PreflightContext context) throws ValidationException else { PDFont font = (PDFont) vPath.peek(); - FontContainer fontContainer = context.getFontContainer(font.getCOSObject()); + FontContainer fontContainer = context.getFontContainer(font.getCOSObject()); if (fontContainer == null) { // if fontContainer isn't null the font is already checked - FontValidator validator = getFontValidator(context, font); + FontValidator> validator = getFontValidator(context, font); if (validator != null) { validator.validate(); @@ -88,7 +88,7 @@ public void validate(PreflightContext context) throws ValidationException * @param font the font object. * @return the font validator instance for the font type. */ - protected FontValidator getFontValidator(PreflightContext context, PDFont font) + protected FontValidator> getFontValidator(PreflightContext context, PDFont font) { String subtype = font.getSubType(); if (FONT_DICTIONARY_VALUE_TRUETYPE.equals(subtype)) diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ResourcesValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ResourcesValidationProcess.java index 8cde5b47c33..811f26b2beb 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ResourcesValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ResourcesValidationProcess.java @@ -54,7 +54,7 @@ import static org.apache.pdfbox.preflight.PreflightConfiguration.TILING_PATTERN_PROCESS; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_INVALID_PATTERN_DEFINITION; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_MAIN; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANPARENCY_DICTIONARY_KEY_EXTGSTATE; +import static org.apache.pdfbox.preflight.PreflightConstants.TRANSPARENCY_DICTIONARY_KEY_EXTGSTATE; public class ResourcesValidationProcess extends AbstractProcess { @@ -92,12 +92,9 @@ else if (!vPath.isExpectedType(PDResources.class)) protected void validateFonts(PreflightContext context, PDResources resources) throws ValidationException { Map mapOfFonts = getFonts(resources.getCOSObject(), context); - if (mapOfFonts != null) + for (Entry entry : mapOfFonts.entrySet()) { - for (Entry entry : mapOfFonts.entrySet()) - { - ContextHelper.validateElement(context, entry.getValue(), FONT_PROCESS); - } + ContextHelper.validateElement(context, entry.getValue(), FONT_PROCESS); } } @@ -148,7 +145,7 @@ private Map getFonts(COSDictionary resources, PreflightContext c */ protected void validateExtGStates(PreflightContext context, PDResources resources) throws ValidationException { - COSBase egsEntry = resources.getCOSObject().getItem(TRANPARENCY_DICTIONARY_KEY_EXTGSTATE); + COSBase egsEntry = resources.getCOSObject().getItem(TRANSPARENCY_DICTIONARY_KEY_EXTGSTATE); COSDocument cosDocument = context.getDocument().getDocument(); COSDictionary extGState = COSUtils.getAsDictionary(egsEntry, cosDocument); if (egsEntry != null) @@ -211,32 +208,26 @@ protected void validateXObjects(PreflightContext context, PDResources resources) COSDocument cosDocument = context.getDocument().getDocument(); COSDictionary mapOfXObj = COSUtils.getAsDictionary(resources.getCOSObject().getItem(COSName.XOBJECT), cosDocument); - if (mapOfXObj != null) + if (mapOfXObj == null) + { + return; + } + for (Entry entry : mapOfXObj.entrySet()) { - for (Entry entry : mapOfXObj.entrySet()) + COSBase xobj = entry.getValue(); + if (xobj != null && COSUtils.isStream(xobj, cosDocument)) { - COSBase xobj = entry.getValue(); - if (xobj != null && COSUtils.isStream(xobj, cosDocument)) + try + { + COSStream stream = COSUtils.getAsStream(xobj, cosDocument); + PDXObject pdXObject = PDXObject.createXObject(stream, resources); + ContextHelper.validateElement(context, pdXObject, GRAPHIC_PROCESS); + } + catch (IOException e) { - try - { - COSStream stream = COSUtils.getAsStream(xobj, cosDocument); - PDXObject pdXObject = PDXObject.createXObject(stream, resources); - if (pdXObject != null) - { - ContextHelper.validateElement(context, pdXObject, GRAPHIC_PROCESS); - } - else - { - ContextHelper.validateElement(context, stream, GRAPHIC_PROCESS); - } - } - catch (IOException e) - { - context.addValidationError(new ValidationError(ERROR_GRAPHIC_MAIN, - e.getMessage() + " for entry '" - + entry.getKey().getName() + "'", e)); - } + context.addValidationError(new ValidationError(ERROR_GRAPHIC_MAIN, + e.getMessage() + " for entry '" + + entry.getKey().getName() + "'", e)); } } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ShadingPatternValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ShadingPatternValidationProcess.java index b81b8c30704..1e6b3546a90 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ShadingPatternValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/ShadingPatternValidationProcess.java @@ -25,7 +25,7 @@ import static org.apache.pdfbox.preflight.PreflightConfiguration.EXTGSTATE_PROCESS; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_GRAPHIC_INVALID_UNKNOWN_COLOR_SPACE; -import static org.apache.pdfbox.preflight.PreflightConstants.TRANPARENCY_DICTIONARY_KEY_EXTGSTATE; +import static org.apache.pdfbox.preflight.PreflightConstants.TRANSPARENCY_DICTIONARY_KEY_EXTGSTATE; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.pdmodel.PDPage; @@ -98,7 +98,7 @@ protected void checkColorSpace(PreflightContext context, PDPage page, PDShading /** * Check the Extended Graphic State contains in the ShadingPattern dictionary if it is present. To check this - * ExtGState, this method uses the net.awl.edoc.pdfa.validation.graphics.ExtGStateContainer object. + * ExtGState, this method uses the org.apache.pdfbox.preflight.graphic.ExtGStateContainer object. * * @param context the preflight context. * @param page the page to check. @@ -109,7 +109,7 @@ protected void checkGraphicState(PreflightContext context, PDPage page, PDShadin throws ValidationException { COSDictionary resources = (COSDictionary) shadingRes.getCOSObject().getDictionaryObject( - TRANPARENCY_DICTIONARY_KEY_EXTGSTATE); + TRANSPARENCY_DICTIONARY_KEY_EXTGSTATE); if (resources != null) { ContextHelper.validateElement(context, resources, EXTGSTATE_PROCESS); diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/SinglePageValidationProcess.java b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/SinglePageValidationProcess.java index a24028d26c1..d8309104df0 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/SinglePageValidationProcess.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/process/reflect/SinglePageValidationProcess.java @@ -153,6 +153,12 @@ protected void validateGraphicObjects(PreflightContext context, PDPage page) thr { thumbBase = ((COSObject) thumbBase).getObject(); } + if (!(thumbBase instanceof COSStream)) + { + context.addValidationError(new ValidationError(ERROR_GRAPHIC_INVALID, + "Thumb image must be a stream")); + return; + } PDXObject thumbImg = PDImageXObject.createThumbnail((COSStream)thumbBase); ContextHelper.validateElement(context, thumbImg, GRAPHIC_PROCESS); } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/utils/COSUtils.java b/preflight/src/main/java/org/apache/pdfbox/preflight/utils/COSUtils.java index 4183d64c4cf..31bbaba7f4f 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/utils/COSUtils.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/utils/COSUtils.java @@ -56,20 +56,7 @@ private COSUtils() */ public static boolean isDictionary(COSBase elt, COSDocument doc) { - if (elt instanceof COSObject) - { - try - { - COSObjectKey key = new COSObjectKey((COSObject) elt); - COSObject obj = doc.getObjectFromPool(key); - return (obj != null && obj.getObject() instanceof COSDictionary); - } - catch (IOException e) - { - return false; - } - } - return (elt instanceof COSDictionary); + return isClass(elt, doc, COSDictionary.class); } /** @@ -91,6 +78,7 @@ public static boolean isString(COSBase elt, COSDocument doc) } catch (IOException e) { + LOGGER.debug("Couldn't get COSObject from object pool - returning false", e); return false; } } @@ -107,21 +95,7 @@ public static boolean isString(COSBase elt, COSDocument doc) */ public static boolean isStream(COSBase elt, COSDocument doc) { - if (elt instanceof COSObject) - { - try - { - COSObjectKey key = new COSObjectKey((COSObject) elt); - COSObject obj = doc.getObjectFromPool(key); - return (obj != null && obj.getObject() instanceof COSStream); - } - catch (IOException e) - { - return false; - } - } - - return (elt instanceof COSStream); + return isClass(elt, doc, COSStream.class); } /** @@ -132,6 +106,19 @@ public static boolean isStream(COSBase elt, COSDocument doc) * @return true if the object is a COSInteger or a reference to it. */ public static boolean isInteger(COSBase elt, COSDocument doc) + { + return isClass(elt, doc, COSInteger.class); + } + + /** + * return true if the elt is of class or a reference to a that class. + * + * @param elt the object to check. + * @param doc the document. + * @param claz the class. + * @return true if the object is a of that class or a reference to it. + */ + private static boolean isClass(COSBase elt, COSDocument doc, Class claz) { if (elt instanceof COSObject) { @@ -139,15 +126,16 @@ public static boolean isInteger(COSBase elt, COSDocument doc) { COSObjectKey key = new COSObjectKey((COSObject) elt); COSObject obj = doc.getObjectFromPool(key); - return (obj != null && obj.getObject() instanceof COSInteger); + return (obj != null && claz.isInstance(obj.getObject())); } catch (IOException e) { + LOGGER.debug("Couldn't get COSObject from object pool - returning false", e); return false; } } - return (elt instanceof COSInteger); + return claz.isInstance(elt); } /** @@ -171,21 +159,7 @@ public static boolean isNumeric(COSBase elt, COSDocument doc) */ public static boolean isFloat(COSBase elt, COSDocument doc) { - if (elt instanceof COSObject) - { - try - { - COSObjectKey key = new COSObjectKey((COSObject) elt); - COSObject obj = doc.getObjectFromPool(key); - return (obj != null && obj.getObject() instanceof COSFloat); - } - catch (IOException e) - { - return false; - } - } - - return (elt instanceof COSFloat); + return isClass(elt, doc, COSFloat.class); } /** @@ -197,21 +171,7 @@ public static boolean isFloat(COSBase elt, COSDocument doc) */ public static boolean isArray(COSBase elt, COSDocument doc) { - if (elt instanceof COSObject) - { - try - { - COSObjectKey key = new COSObjectKey((COSObject) elt); - COSObject obj = doc.getObjectFromPool(key); - return (obj != null && obj.getObject() instanceof COSArray); - } - catch (IOException e) - { - return false; - } - } - - return (elt instanceof COSArray); + return isClass(elt, doc, COSArray.class); } /** @@ -226,23 +186,7 @@ public static COSArray getAsArray(COSBase cbase, COSDocument cDoc) { if (cbase instanceof COSObject) { - try - { - COSObjectKey key = new COSObjectKey((COSObject) cbase); - COSObject obj = cDoc.getObjectFromPool(key); - if (obj != null && obj.getObject() instanceof COSArray) - { - return (COSArray) obj.getObject(); - } - else - { - return null; - } - } - catch (IOException e) - { - return null; - } + return (COSArray) getCOSObjectAsClass((COSObject) cbase, cDoc, COSArray.class); } else if (cbase instanceof COSArray) { @@ -286,6 +230,7 @@ else if (obj != null && obj.getObject() instanceof COSName) } catch (IOException e) { + LOGGER.debug("Couldn't get COSObject from object pool - returning null", e); return null; } } @@ -315,23 +260,7 @@ public static COSDictionary getAsDictionary(COSBase cbase, COSDocument cDoc) { if (cbase instanceof COSObject) { - try - { - COSObjectKey key = new COSObjectKey((COSObject) cbase); - COSObject obj = cDoc.getObjectFromPool(key); - if (obj != null && obj.getObject() instanceof COSDictionary) - { - return (COSDictionary) obj.getObject(); - } - else - { - return null; - } - } - catch (IOException e) - { - return null; - } + return (COSDictionary) getCOSObjectAsClass((COSObject) cbase, cDoc, COSDictionary.class); } else if (cbase instanceof COSDictionary) { @@ -355,23 +284,7 @@ public static COSStream getAsStream(COSBase cbase, COSDocument cDoc) { if (cbase instanceof COSObject) { - try - { - COSObjectKey key = new COSObjectKey((COSObject) cbase); - COSObject obj = cDoc.getObjectFromPool(key); - if (obj != null && obj.getObject() instanceof COSStream) - { - return (COSStream) obj.getObject(); - } - else - { - return null; - } - } - catch (IOException e) - { - return null; - } + return (COSStream) getCOSObjectAsClass((COSObject) cbase, cDoc, COSStream.class); } else if (cbase instanceof COSStream) { @@ -414,6 +327,7 @@ else if (obj.getObject() instanceof COSNumber) } catch (IOException e) { + LOGGER.debug("Couldn't get COSObject from object pool - returning null", e); return null; } } @@ -458,6 +372,7 @@ else if (obj.getObject() instanceof COSNumber) } catch (IOException e) { + LOGGER.debug("Couldn't get COSObject from object pool - returning null", e); return null; } } @@ -488,7 +403,7 @@ public static void closeDocumentQuietly(COSDocument document) } catch (IOException e) { - LOGGER.warn("Error occured during the close of a COSDocument : " + e.getMessage()); + LOGGER.warn("Error occurred during the close of a COSDocument : " + e.getMessage(), e); } } @@ -505,4 +420,36 @@ public static void closeDocumentQuietly(PDDocument document) closeDocumentQuietly(document.getDocument()); } } + + /** + * Return the COSObject object as class if the COSObject object is a reference to an object of + * that class. If not, then this method returns null; + * + * @param cosObject the object to get. + * @param cDoc the document. + * @param claz the class. + * @return the object as class if the object is a reference to that class. Returns null + * otherwise. + */ + private static COSBase getCOSObjectAsClass(COSObject cosObject, COSDocument cDoc, Class claz) + { + try + { + COSObjectKey key = new COSObjectKey(cosObject); + COSObject obj = cDoc.getObjectFromPool(key); + if (obj != null && claz.isInstance(obj.getObject())) + { + return obj.getObject(); + } + else + { + return null; + } + } + catch (IOException e) + { + LOGGER.debug("Couldn't get COSObject from object pool - returning null", e); + return null; + } + } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/utils/ContextHelper.java b/preflight/src/main/java/org/apache/pdfbox/preflight/utils/ContextHelper.java index 244a46dde07..8eaec2ae536 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/utils/ContextHelper.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/utils/ContextHelper.java @@ -21,10 +21,13 @@ package org.apache.pdfbox.preflight.utils; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.preflight.PreflightConfiguration; -import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_PDF_PROCESSING_MISSING; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_PDF_PROCESSING; +import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_PDF_PROCESSING_MISSING; import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.PreflightPath; import org.apache.pdfbox.preflight.exception.ValidationException; @@ -77,7 +80,22 @@ private static void callValidation(PreflightContext context, Object element, Str { return; } - + // avoid checking certain elements twice. This can't be generalized, + // because some are checked by several processes. + if (element instanceof COSObjectable && + // PDFBOX-4450 catch undetected recursions + (PreflightConfiguration.GRAPHIC_PROCESS.equals(processName) || + PreflightConfiguration.FONT_PROCESS.equals(processName))) // for speed + { + // don't check PDObjects, only their COSObject + COSBase cos = ((COSObjectable) element).getCOSObject(); + if (context.isInProcessedSet(cos)) + { + return; + } + context.addToProcessedSet(cos); + } + boolean needPop = validationPath.pushObject(element); PreflightConfiguration config = context.getConfig(); ValidationProcess process = config.getInstanceOfProcess(processName); @@ -91,17 +109,18 @@ private static void callValidation(PreflightContext context, Object element, Str // detect recursion that would lead to stack overflow private static boolean hasRecursion(PreflightContext context, Object element, PreflightPath validationPath) { - if (element instanceof PDResources) + if (element instanceof PDResources || element instanceof PDFormXObject) { for (int i = 0; i < validationPath.size(); ++i) { Object obj = validationPath.getPathElement(i, Object.class); - if (obj instanceof PDResources) + if (obj instanceof COSObjectable) { - PDResources pdRes = (PDResources) obj; - if (pdRes.getCOSObject() == ((PDResources) element).getCOSObject()) + COSObjectable cos = (COSObjectable) obj; + if (cos.getCOSObject() == ((COSObjectable) element).getCOSObject()) { - context.addValidationError(new ValidationError(ERROR_PDF_PROCESSING, "Resources recursion")); + context.addValidationError(new ValidationError(ERROR_PDF_PROCESSING, + element.getClass().getSimpleName() + " recursion")); return true; } } diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/utils/FilterHelper.java b/preflight/src/main/java/org/apache/pdfbox/preflight/utils/FilterHelper.java index 5597f8358f8..5b338c2f093 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/utils/FilterHelper.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/utils/FilterHelper.java @@ -103,8 +103,8 @@ public static void isAuthorizedFilter(PreflightContext context, String filter) * LZW filter is forbidden due to Copyright compatibility. Because of the PDF/A is based on the PDF1.4 * specification, all filters that aren't declared in the PDF Reference Third Edition are rejected. * - * @param context - * @param filter + * @param context the preflight context + * @param filter the filter to check */ public static void isAuthorizedFilterInPDFA(PreflightContext context, String filter) { diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/AbstractXObjValidator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/AbstractXObjValidator.java index d088c3fbf4e..97aa6461f2e 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/AbstractXObjValidator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/AbstractXObjValidator.java @@ -142,7 +142,7 @@ protected void checkPostscriptXObject() /* * (non-Javadoc) * - * @see net.awl.edoc.pdfa.validation.graphics.XObjectValidator#validate() + * @see org.apache.pdfbox.preflight.graphic.XObjectValidator#validate() */ @Override public void validate() throws ValidationException diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/XObjFormValidator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/XObjFormValidator.java index 645b4204aa5..f8096d8396c 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/XObjFormValidator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/XObjFormValidator.java @@ -64,7 +64,7 @@ public XObjFormValidator(PreflightContext context, PDFormXObject xobj) /* * (non-Javadoc) * - * @see net.awl.edoc.pdfa.validation.graphics.AbstractXObjValidator#validate() + * @see org.apache.pdfbox.preflight.graphic.AbstractXObjValidator#validate() */ @Override public void validate() throws ValidationException @@ -79,13 +79,13 @@ public void validate() throws ValidationException /* * (non-Javadoc) * - * @seenet.awl.edoc.pdfa.validation.graphics.AbstractXObjValidator# checkMandatoryFields(java.util.List) + * @seeorg.apache.pdfbox.preflight.graphic.AbstractXObjValidator# checkMandatoryFields(java.util.List) */ @Override protected void checkMandatoryFields() { boolean lastMod = this.xobject.getItem(COSName.LAST_MODIFIED) != null; - boolean pieceInfo = this.xobject.getItem("PieceInfo") != null; + boolean pieceInfo = this.xobject.getItem(COSName.PIECE_INFO) != null; // type and subtype checked before to create the Validator. if (lastMod ^ pieceInfo) { diff --git a/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/XObjImageValidator.java b/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/XObjImageValidator.java index 33c01276f82..1f6ca47ed62 100644 --- a/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/XObjImageValidator.java +++ b/preflight/src/main/java/org/apache/pdfbox/preflight/xobject/XObjImageValidator.java @@ -166,7 +166,7 @@ private boolean isImageMaskTrue() /* * (non-Javadoc) * - * @see net.awl.edoc.pdfa.validation.graphics.AbstractXObjValidator#validate() + * @see org.apache.pdfbox.preflight.graphic.AbstractXObjValidator#validate() */ @Override public void validate() throws ValidationException diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorBavaria.java b/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorBavaria.java index 9ba5fd83d4e..da29a2da9c7 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorBavaria.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorBavaria.java @@ -86,7 +86,7 @@ public static Collection initializeParameters() throws Exception String error = new StringTokenizer(path, "//").nextToken().trim(); String[] errTab = error.split(","); Set errorSet = new HashSet(Arrays.asList(errTab)); - data.add(new Object[] { (File) pdfFile, errorSet } ); + data.add(new Object[] { pdfFile, errorSet } ); } } } @@ -113,7 +113,7 @@ public static Collection initializeParameters() throws Exception String error = new StringTokenizer(path, "//").nextToken().trim(); errorSet.addAll(Arrays.asList(error.split(","))); } - data.add(new Object[] { (File) pdfFile, errorSet } ); + data.add(new Object[] { pdfFile, errorSet } ); } } } diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/TestPDFBox3741.java b/preflight/src/test/java/org/apache/pdfbox/preflight/TestPDFBox3741.java new file mode 100644 index 00000000000..e6c718f34b5 --- /dev/null +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/TestPDFBox3741.java @@ -0,0 +1,55 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.preflight; + +import java.io.IOException; +import javax.activation.DataSource; +import javax.activation.FileDataSource; + +import org.apache.pdfbox.preflight.parser.PreflightParser; + +import org.junit.Assert; +import org.junit.Test; + +/** + * + * @author Tilman Hausherr + */ +public class TestPDFBox3741 +{ + /** + * Test whether use of default colorspace without output intent for text output is detected. + * + * @throws IOException + */ + @Test + public void testPDFBox3741() throws IOException + { + DataSource ds = new FileDataSource("src/test/resources/PDFBOX-3741.pdf"); + PreflightParser parser = new PreflightParser(ds); + parser.parse(); + PreflightDocument document = parser.getPreflightDocument(); + document.validate(); + ValidationResult result = document.getResult(); + document.close(); + + // Error should be: + // 2.4.3: Invalid Color space, /DeviceGray default for operator "Tj" can't be used without Color Profile + Assert.assertFalse("File PDFBOX-3741.pdf should be detected as not PDF/A-1b", result.isValid()); + Assert.assertEquals("List should contain one result", 1, result.getErrorsList().size()); + Assert.assertEquals("2.4.3", result.getErrorsList().get(0).getErrorCode()); + } +} diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/TestPreflightPath.java b/preflight/src/test/java/org/apache/pdfbox/preflight/TestPreflightPath.java index 61423177d8d..1c4c9258111 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/TestPreflightPath.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/TestPreflightPath.java @@ -63,7 +63,7 @@ public void test() assertEquals(1, position); Integer i = path.getPathElement(position, Integer.class); - assertEquals(new Integer(6), i); + assertEquals(Integer.valueOf(6), i); Object str = path.peek(); assertEquals(3, path.size()); diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java b/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java index 6f9aa80d3d3..a2000f6becc 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/AbstractTestAction.java @@ -135,5 +135,6 @@ protected void valid(COSDictionary action, boolean valid, String expectedCode) t assertTrue(errors.isEmpty()); } } + ctx.getDocument().close(); } } diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestGotoAction.java b/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestGotoAction.java index 8d64c9148ea..18d63ab31a0 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestGotoAction.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestGotoAction.java @@ -21,7 +21,6 @@ package org.apache.pdfbox.preflight.action.pdfa1b; -import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo; @@ -39,7 +38,7 @@ public void testGoto_OK() throws Exception gotoAction.setDestination(new PDDestination() { @Override - public COSBase getCOSObject() + public COSName getCOSObject() { return COSName.getPDFName("ADest"); } @@ -55,7 +54,7 @@ public void testGoto_KO_invalidContent() throws Exception gotoAction.setDestination(new PDDestination() { @Override - public COSBase getCOSObject() + public COSDictionary getCOSObject() { return new COSDictionary(); } diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestGotoRemoteAction.java b/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestGotoRemoteAction.java index 04ff17f6723..24f80d0d23d 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestGotoRemoteAction.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestGotoRemoteAction.java @@ -21,7 +21,6 @@ package org.apache.pdfbox.preflight.action.pdfa1b; -import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.filespecification.PDFileSpecification; @@ -39,7 +38,8 @@ public void testGoto_OK() throws Exception gotoAction.setD(COSName.getPDFName("ADest")); gotoAction.setFile(new PDFileSpecification() { - public COSBase getCOSObject() + @Override + public COSName getCOSObject() { return COSName.getPDFName("ADest"); } @@ -65,7 +65,8 @@ public void testGoto_KO_InvalidContent() throws Exception gotoAction.setD(new COSDictionary()); gotoAction.setFile(new PDFileSpecification() { - public COSBase getCOSObject() + @Override + public COSName getCOSObject() { return COSName.getPDFName("ADest"); } @@ -90,7 +91,8 @@ public void testGoto_KO_MissingD() throws Exception PDActionRemoteGoTo gotoAction = new PDActionRemoteGoTo(); gotoAction.setFile(new PDFileSpecification() { - public COSBase getCOSObject() + @Override + public COSName getCOSObject() { return COSName.getPDFName("ADest"); } diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestSubmitAction.java b/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestSubmitAction.java index daf64cb064c..87bd65730ac 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestSubmitAction.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/action/pdfa1b/TestSubmitAction.java @@ -21,7 +21,6 @@ package org.apache.pdfbox.preflight.action.pdfa1b; -import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.filespecification.PDFileSpecification; @@ -38,7 +37,8 @@ protected COSDictionary createSubmitAction() action.setItem(COSName.S, COSName.getPDFName("SubmitForm")); action.setItem(COSName.F, new PDFileSpecification() { - public COSBase getCOSObject() + @Override + public COSName getCOSObject() { return COSName.getPDFName("value"); } diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/integration/TestValidFiles.java b/preflight/src/test/java/org/apache/pdfbox/preflight/integration/TestValidFiles.java index 53cc921b43a..f9985e82f9f 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/integration/TestValidFiles.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/integration/TestValidFiles.java @@ -79,7 +79,7 @@ public static Collection initializeParameters() throws Exception { // find isartor files String isartor = System.getProperty(ISARTOR_FILES); - if (isartor == null) + if (isartor == null || isartor.isEmpty()) { staticLogger.warn(ISARTOR_FILES + " (where are isartor pdf files) is not defined."); return stopIfExpected(); diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/metadata/TestMetadataFiles.java b/preflight/src/test/java/org/apache/pdfbox/preflight/metadata/TestMetadataFiles.java index 17ff52388b9..28cac5dc494 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/metadata/TestMetadataFiles.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/metadata/TestMetadataFiles.java @@ -86,7 +86,7 @@ private boolean checkPDF(File pdf) } catch (IOException e) { - fail("An exception occured while parsing the PDF " + pdf + ": " + e); + fail("An exception occurred while parsing the PDF " + pdf + ": " + e); } if (result != null) { diff --git a/preflight/src/test/java/org/apache/pdfbox/preflight/metadata/TestSynchronizedMetadataValidation.java b/preflight/src/test/java/org/apache/pdfbox/preflight/metadata/TestSynchronizedMetadataValidation.java index 741a7f4e985..d765089b331 100644 --- a/preflight/src/test/java/org/apache/pdfbox/preflight/metadata/TestSynchronizedMetadataValidation.java +++ b/preflight/src/test/java/org/apache/pdfbox/preflight/metadata/TestSynchronizedMetadataValidation.java @@ -123,14 +123,7 @@ public void TestDocumentWithoutInformation() throws Exception @Test public void testEmptyXMP() throws Exception { - title = "TITLE"; - author = "AUTHOR(S)"; - subject = "SUBJECTS"; - keywords = "KEYWORD(S)"; - creator = "CREATOR"; - producer = "PRODUCER"; - creationDate = Calendar.getInstance(); - modifyDate = Calendar.getInstance(); + initValues(); // Writing info in Document Information dictionary // TITLE @@ -173,14 +166,7 @@ public void testEmptyXMP() throws Exception @Test public void testEmptyXMPSchemas() throws Exception { - title = "TITLE"; - author = "AUTHOR(S)"; - subject = "SUBJECTS"; - keywords = "KEYWORD(S)"; - creator = "CREATOR"; - producer = "PRODUCER"; - creationDate = Calendar.getInstance(); - modifyDate = Calendar.getInstance(); + initValues(); // building temporary XMP metadata (but empty) metadata.createAndAddDublinCoreSchema(); @@ -373,16 +359,9 @@ public void testAllInfoUnsynchronized() throws Exception * @throws Exception */ @Test - public void testAllInfoSynhcronized() throws Exception + public void testAllInfoSynchronized() throws Exception { - title = "TITLE"; - author = "AUTHOR(S)"; - subject = "SUBJECTS"; - keywords = "KEYWORD(S)"; - creator = "CREATOR"; - producer = "PRODUCER"; - creationDate = Calendar.getInstance(); - modifyDate = Calendar.getInstance(); + initValues(); // building temporary XMP metadata DublinCoreSchema dc = metadata.createAndAddDublinCoreSchema(); @@ -447,14 +426,7 @@ public void checkSchemaAccessException() throws Exception @Test public void testBadPrefixSchemas() throws Exception { - title = "TITLE"; - author = "AUTHOR(S)"; - subject = "SUBJECTS"; - keywords = "KEYWORD(S)"; - creator = "CREATOR"; - producer = "PRODUCER"; - creationDate = Calendar.getInstance(); - modifyDate = Calendar.getInstance(); + initValues(); // building temporary XMP metadata DublinCoreSchema dc = new DublinCoreSchema(metadata, "dctest"); @@ -514,14 +486,7 @@ public void testBadPrefixSchemas() throws Exception @Test public void testdoublePrefixSchemas() throws Exception { - title = "TITLE"; - author = "AUTHOR(S)"; - subject = "SUBJECTS"; - keywords = "KEYWORD(S)"; - creator = "CREATOR"; - producer = "PRODUCER"; - creationDate = Calendar.getInstance(); - modifyDate = Calendar.getInstance(); + initValues(); // building temporary XMP metadata DublinCoreSchema dc = metadata.createAndAddDublinCoreSchema(); @@ -575,7 +540,40 @@ public void testdoublePrefixSchemas() throws Exception { throw new Exception(e.getMessage()); } + } + + /** + * Tests that two date values, which are from different time zones but + * really identical, are detected as such. + * + * @throws Exception + */ + @Test + public void testPDFBox4292() throws Exception + { + initValues(); + + Calendar cal1 = org.apache.pdfbox.util.DateConverter.toCalendar("20180817115837+02'00'"); + Calendar cal2 = org.apache.xmpbox.DateConverter.toCalendar("2018-08-17T09:58:37Z"); + + XMPBasicSchema xmp = metadata.createAndAddXMPBasicSchema(); + + dico.setCreationDate(cal1); + xmp.setCreateDate(cal2); + dico.setModificationDate(cal1); + xmp.setModifyDate(cal2); + // Launching synchronization test + try + { + ve = sync.validateMetadataSynchronization(doc, metadata); + // Test unsychronized value + Assert.assertEquals(0, ve.size()); + } + catch (ValidationException e) + { + throw new Exception(e.getMessage()); + } } @After @@ -595,4 +593,21 @@ public void checkErrors() throws Exception */ } + private void initValues() + { + title = "TITLE"; + author = "AUTHOR(S)"; + subject = "SUBJECTS"; + keywords = "KEYWORD(S)"; + creator = "CREATOR"; + producer = "PRODUCER"; + creationDate = Calendar.getInstance(); + modifyDate = Calendar.getInstance(); + + // PDFBOX-4292: because xmp keeps the milliseconds before writing to XML, + // but COS doesn't, tests would fail when calendar values are compared + // so reset the milliseconds. + creationDate.set(Calendar.MILLISECOND, 0); + modifyDate.set(Calendar.MILLISECOND, 0); + } } diff --git a/preflight/src/test/resources/PDFBOX-3741.pdf b/preflight/src/test/resources/PDFBOX-3741.pdf new file mode 100644 index 00000000000..151cdaeddca Binary files /dev/null and b/preflight/src/test/resources/PDFBOX-3741.pdf differ diff --git a/preflight/src/test/resources/expected_errors.txt b/preflight/src/test/resources/expected_errors.txt index eabb69eac1f..e13316f13ce 100644 --- a/preflight/src/test/resources/expected_errors.txt +++ b/preflight/src/test/resources/expected_errors.txt @@ -110,7 +110,9 @@ isartor-6-3-4-t01-fail-b.pdf=3.1.3 isartor-6-3-4-t01-fail-c.pdf=3.1.3 isartor-6-3-4-t01-fail-d.pdf=3.1.3 isartor-6-3-4-t01-fail-e.pdf=3.1.3 -isartor-6-3-4-t01-fail-f.pdf=3.1.3 +# 5.3.1 check is not part of isartor, but N Appearance of a Btn widget must not be a stream, but an appearance subdictionary, see PDFBOX-3408 +# PDFTools validator agrees: "The appearance must have state dictionaries (subdictionaries to 'N')" +isartor-6-3-4-t01-fail-f.pdf=3.1.3,5.3.1 isartor-6-3-4-t01-fail-g.pdf=3.2.4 // Type3 Damage because the Type1 used as Resource isn't embedded isartor-6-3-4-t01-fail-h.pdf=3.1.3 isartor-6-3-5-t01-fail-a.pdf=3.3.1 @@ -299,10 +301,7 @@ pardes14_Jid02_reduced.pdf=1.0.3 stat_dis_30_fixed.pdf=3.1.2 Funktionale_Varietaeten.pdf=3.1.4,3.1.11,7.3,2.4.2 apogee.pdf=3.3.1 - -# operator "g" without profile not detected by PDF-Tools validator, maybe because it aborts after the conflicting N. -bug1771.pdf=7.1,2.1.2,1.2.1,1.2.5,2.4.3,1.2.2 - +bug1771.pdf=7.1,2.1.2,1.2.1,1.2.5,1.2.2 empty_word.pdf=7.2 literat.pdf=1.2.5,7.11 nesrin.pdf=1.2.1,1.2.2,1.2.5 @@ -330,7 +329,7 @@ terminanschreiben.pdf=7.1 # Bavaria unclear PDFExportDialog2.pdf= -Pardes13_Rez02.pdf=1.4.6 +Pardes13_Rez02.pdf= # IEEE754D,L,3A4901F387D31108 because of Real good0016.pdf=7.1 diff --git a/preflight/src/test/resources/log4j.xml b/preflight/src/test/resources/log4j.xml deleted file mode 100755 index 3a3090379ef..00000000000 --- a/preflight/src/test/resources/log4j.xml +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/tools/pom.xml b/tools/pom.xml index 0b65d4c1650..31da0507319 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -23,7 +23,7 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml @@ -53,14 +53,14 @@ junit test - - com.levigo.jbig2 - levigo-jbig2-imageio + org.apache.pdfbox + jbig2-imageio test + com.github.jai-imageio jai-imageio-core @@ -89,6 +89,18 @@ + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.pdfbox.tools + + + + diff --git a/tools/src/main/java/org/apache/pdfbox/tools/DecompressObjectstreams.java b/tools/src/main/java/org/apache/pdfbox/tools/DecompressObjectstreams.java index 7b60ef1c940..0b6941b643b 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/DecompressObjectstreams.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/DecompressObjectstreams.java @@ -1,6 +1,4 @@ /* - * Copyright 2010 adam. - * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -18,6 +16,7 @@ package org.apache.pdfbox.tools; import java.io.File; +import java.io.IOException; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; @@ -25,6 +24,7 @@ import org.apache.pdfbox.pdfparser.PDFObjectStreamParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.cos.COSObjectKey; +import org.apache.pdfbox.io.IOUtils; /** * This program will just take all of the stream objects in a PDF and dereference @@ -96,22 +96,13 @@ public static void main(String[] args) } doc.save(outputFilename); } - catch(Exception e) + catch (IOException e) { System.err.println("Error processing file: " + e.getMessage()); } finally { - if(doc != null) - { - try - { - doc.close(); - } - catch(Exception e) - { - } - } + IOUtils.closeQuietly(doc); } } diff --git a/tools/src/main/java/org/apache/pdfbox/tools/Decrypt.java b/tools/src/main/java/org/apache/pdfbox/tools/Decrypt.java index b4b67edda74..8673057493f 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/Decrypt.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/Decrypt.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InputStream; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; @@ -33,6 +34,7 @@ public final class Decrypt { private static final String ALIAS = "-alias"; + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; private static final String KEYSTORE = "-keyStore"; @@ -65,7 +67,7 @@ public static void main(String[] args) throws IOException private void parseCommandLineArgs(String[] args) { - if( args.length < 1 || args.length > 5 ) + if( args.length < 1 || args.length > 8 ) { usage(); } @@ -131,9 +133,9 @@ else if( outfile == null ) private void decrypt() throws IOException { PDDocument document = null; + InputStream keyStoreStream = null; try { - InputStream keyStoreStream = null; if( keyStore != null ) { keyStoreStream = new FileInputStream(keyStore); @@ -165,6 +167,7 @@ private void decrypt() throws IOException { document.close(); } + IOUtils.closeQuietly(keyStoreStream); } } diff --git a/tools/src/main/java/org/apache/pdfbox/tools/Encrypt.java b/tools/src/main/java/org/apache/pdfbox/tools/Encrypt.java index 14f119e10cb..f3d1478ff7d 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/Encrypt.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/Encrypt.java @@ -23,6 +23,8 @@ import java.security.cert.CertificateException; import java.security.cert.CertificateFactory; import java.security.cert.X509Certificate; +import java.util.ArrayList; +import java.util.List; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; @@ -32,7 +34,7 @@ /** * This will read a document from the filesystem, encrypt it and and then write - * the results to the filesystem.

      + * the results to the filesystem. * * @author Ben Litchfield */ @@ -71,11 +73,13 @@ private void encrypt( String[] args ) throws IOException, CertificateException String infile = null; String outfile = null; - String certFile = null; + List certFileList = new ArrayList(); + @SuppressWarnings({"squid:S2068"}) String userPassword = ""; + @SuppressWarnings({"squid:S2068"}) String ownerPassword = ""; - int keyLength = 40; + int keyLength = 256; PDDocument document = null; @@ -126,7 +130,7 @@ else if( key.equals( "-canPrintDegraded" ) ) } else if( key.equals( "-certFile" ) ) { - certFile = args[++i]; + certFileList.add(new File(args[++i])); } else if( key.equals( "-keyLength" ) ) { @@ -165,31 +169,32 @@ else if( outfile == null ) if( !document.isEncrypted() ) { - if( certFile != null ) + if (!certFileList.isEmpty()) { PublicKeyProtectionPolicy ppp = new PublicKeyProtectionPolicy(); PublicKeyRecipient recip = new PublicKeyRecipient(); recip.setPermission(ap); - CertificateFactory cf = CertificateFactory.getInstance("X.509"); - - InputStream inStream = null; - try - { - inStream = new FileInputStream(certFile); - X509Certificate certificate = (X509Certificate)cf.generateCertificate(inStream); - recip.setX509(certificate); - } - finally + + for (File certFile : certFileList) { - if (inStream != null) + InputStream inStream = null; + try { - inStream.close(); + inStream = new FileInputStream(certFile); + X509Certificate certificate = (X509Certificate) cf.generateCertificate(inStream); + recip.setX509(certificate); } - } - - ppp.addRecipient(recip); + finally + { + if (inStream != null) + { + inStream.close(); + } + } + ppp.addRecipient(recip); + } ppp.setEncryptionKeyLength(keyLength); @@ -226,9 +231,9 @@ private static void usage() { String message = "Usage: java -jar pdfbox-app-x.y.z.jar Encrypt [options] [outputfile]\n" + "\nOptions:\n" - + " -O : Set the owner password (ignored if cert is set)\n" - + " -U : Set the user password (ignored if cert is set)\n" - + " -certFile : Path to X.509 certificate\n" + + " -O : Set the owner password (ignored if certFile is set)\n" + + " -U : Set the user password (ignored if certFile is set)\n" + + " -certFile : Path to X.509 certificate (repeat both if needed)\n" + " -canAssemble : Set the assemble permission\n" + " -canExtractContent : Set the extraction permission\n" + " -canExtractForAccessibility : Set the extraction permission\n" @@ -237,8 +242,8 @@ private static void usage() + " -canModifyAnnotations : Set the modify annots permission\n" + " -canPrint : Set the print permission\n" + " -canPrintDegraded : Set the print degraded permission\n" - + " -keyLength : The length of the key in bits " - + "(valid values: 40, 128 or 256, default is 40)\n" + + " -keyLength : Key length in bits " + + "(valid values: 40, 128 or 256, default is 256)\n" + "\nNote: By default all permissions are set to true!"; System.err.println(message); diff --git a/tools/src/main/java/org/apache/pdfbox/tools/ExportFDF.java b/tools/src/main/java/org/apache/pdfbox/tools/ExportFDF.java index ef4c3b28f99..48e3259dc95 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/ExportFDF.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/ExportFDF.java @@ -40,7 +40,7 @@ public ExportFDF() /** * This will import an fdf document and write out another pdf. - *
      + *
      * see usage() for commandline * * @param args command line arguments diff --git a/tools/src/main/java/org/apache/pdfbox/tools/ExportXFDF.java b/tools/src/main/java/org/apache/pdfbox/tools/ExportXFDF.java index b5b876bef3b..60b2db51931 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/ExportXFDF.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/ExportXFDF.java @@ -40,13 +40,12 @@ public ExportXFDF() /** * This will import an fdf document and write out another pdf. - *
      + *
      * see usage() for commandline * * @param args command line arguments * @throws IOException in case the file can not be read or the data can not be exported. * - * @throws IOException If there is an error importing the FDF document. */ public static void main(String[] args) throws IOException { diff --git a/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java b/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java index 1703541fc05..9ca3b1d4bae 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java @@ -26,19 +26,31 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; -import org.apache.pdfbox.pdmodel.graphics.image.PDImage; -import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.graphics.color.PDPattern; +import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup; +import org.apache.pdfbox.pdmodel.graphics.image.PDImage; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern; +import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.pdmodel.graphics.state.PDSoftMask; +import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; import org.apache.pdfbox.tools.imageio.ImageIOUtil; -import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine; +import org.apache.pdfbox.util.Matrix; +import org.apache.pdfbox.util.Vector; /** * Extracts the images from a PDF file. @@ -47,16 +59,19 @@ */ public final class ExtractImages { + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; private static final String PREFIX = "-prefix"; private static final String DIRECTJPEG = "-directJPEG"; + private static final String NOCOLORCONVERT = "-noColorConvert"; private static final List JPEG = Arrays.asList( COSName.DCT_DECODE.getName(), COSName.DCT_DECODE_ABBREVIATION.getName()); - private boolean directJPEG; - private String prefix; + private boolean useDirectJPEG; + private boolean noColorConvert; + private String filePrefix; private final Set seen = new HashSet(); private int imageCounter = 1; @@ -89,6 +104,7 @@ private void run(String[] args) throws IOException else { String pdfFile = null; + @SuppressWarnings({"squid:S2068"}) String password = ""; for(int i = 0; i < args.length; i++) { @@ -108,11 +124,15 @@ else if (args[i].equals(PREFIX)) { usage(); } - prefix = args[i]; + filePrefix = args[i]; } else if (args[i].equals(DIRECTJPEG)) { - directJPEG = true; + useDirectJPEG = true; + } + else if (args[i].equals(NOCOLORCONVERT)) + { + noColorConvert = true; } else { @@ -128,9 +148,9 @@ else if (args[i].equals(DIRECTJPEG)) } else { - if (prefix == null && pdfFile.length() >4) + if (filePrefix == null && pdfFile.length() > 4) { - prefix = pdfFile.substring(0, pdfFile.length() -4); + filePrefix = pdfFile.substring(0, pdfFile.length() - 4); } extract(pdfFile, password); @@ -146,9 +166,11 @@ private static void usage() String message = "Usage: java " + ExtractImages.class.getName() + " [options] \n" + "\nOptions:\n" + " -password : Password to decrypt document\n" - + " -prefix : Image prefix(default to pdf name)\n" - + " -directJPEG : Forces the direct extraction of JPEG images " - + "regardless of colorspace\n" + + " -prefix : Image prefix (default to pdf name)\n" + + " -directJPEG : Forces the direct extraction of JPEG/JPX images \n" + + " regardless of colorspace or masking\n" + + " -noColorConvert : Images are extracted with their \n" + + " original colorspace if possible.\n" + " : The PDF document to use\n"; System.err.println(message); @@ -167,9 +189,8 @@ private void extract(String pdfFile, String password) throws IOException throw new IOException("You do not have permission to extract images"); } - for (int i = 0; i < document.getNumberOfPages(); i++) // todo: ITERATOR would be much better + for (PDPage page : document.getPages()) { - PDPage page = document.getPage(i); ImageGraphicsEngine extractor = new ImageGraphicsEngine(page); extractor.run(); } @@ -185,14 +206,41 @@ private void extract(String pdfFile, String password) throws IOException private class ImageGraphicsEngine extends PDFGraphicsStreamEngine { - protected ImageGraphicsEngine(PDPage page) throws IOException + protected ImageGraphicsEngine(PDPage page) { super(page); } public void run() throws IOException { - processPage(getPage()); + PDPage page = getPage(); + processPage(page); + PDResources res = page.getResources(); + if (res == null) + { + return; + } + for (COSName name : res.getExtGStateNames()) + { + PDExtendedGraphicsState extGState = res.getExtGState(name); + if (extGState == null) + { + // can happen if key exists but no value + continue; + } + PDSoftMask softMask = extGState.getSoftMask(); + if (softMask != null) + { + PDTransparencyGroup group = softMask.getGroup(); + if (group != null) + { + // PDFBOX-4327: without this line NPEs will occur + res.getExtGState(name).copyIntoGraphicsState(getGraphicsState()); + + processSoftMask(group); + } + } + } } @Override @@ -200,6 +248,10 @@ public void drawImage(PDImage pdImage) throws IOException { if (pdImage instanceof PDImageXObject) { + if (pdImage.isStencil()) + { + processColor(getGraphicsState().getNonStrokingColor()); + } PDImageXObject xobject = (PDImageXObject)pdImage; if (seen.contains(xobject.getCOSObject())) { @@ -210,11 +262,11 @@ public void drawImage(PDImage pdImage) throws IOException } // save image - String name = prefix + "-" + imageCounter; + String name = filePrefix + "-" + imageCounter; imageCounter++; System.out.println("Writing image: " + name); - write2file(pdImage, name, directJPEG); + write2file(pdImage, name, useDirectJPEG, noColorConvert); } @Override @@ -268,21 +320,38 @@ public void endPath() throws IOException } @Override - public void strokePath() throws IOException + protected void showGlyph(Matrix textRenderingMatrix, + PDFont font, + int code, + Vector displacement) throws IOException { + RenderingMode renderingMode = getGraphicsState().getTextState().getRenderingMode(); + if (renderingMode.isFill()) + { + processColor(getGraphicsState().getNonStrokingColor()); + } + if (renderingMode.isStroke()) + { + processColor(getGraphicsState().getStrokingColor()); + } + } + @Override + public void strokePath() throws IOException + { + processColor(getGraphicsState().getStrokingColor()); } @Override public void fillPath(int windingRule) throws IOException { - + processColor(getGraphicsState().getNonStrokingColor()); } @Override public void fillAndStrokePath(int windingRule) throws IOException { - + processColor(getGraphicsState().getNonStrokingColor()); } @Override @@ -290,36 +359,88 @@ public void shadingFill(COSName shadingName) throws IOException { } - } - /** - * Writes the image to a file with the filename + an appropriate suffix, like "Image.jpg". - * The suffix is automatically set by the - * @param filename the filename - * @throws IOException When somethings wrong with the corresponding file. - */ - private void write2file(PDImage pdImage, String filename, boolean directJPEG) throws IOException - { - String suffix = pdImage.getSuffix(); - if (suffix == null) + // find out if it is a tiling pattern, then process that one + private void processColor(PDColor color) throws IOException { - suffix = "png"; + if (color.getColorSpace() instanceof PDPattern) + { + PDPattern pattern = (PDPattern) color.getColorSpace(); + PDAbstractPattern abstractPattern = pattern.getPattern(color); + if (abstractPattern instanceof PDTilingPattern) + { + processTilingPattern((PDTilingPattern) abstractPattern, null, null); + } + } } - FileOutputStream out = null; - try + /** + * Writes the image to a file with the filename prefix + an appropriate suffix, like + * "Image.jpg". The suffix is automatically set depending on the image compression in the + * PDF. + * + * @param pdImage the image. + * @param prefix the filename prefix. + * @param directJPEG if true, force saving JPEG/JPX streams as they are in the PDF file. + * @param noColorConvert if true, images are extracted with their original colorspace if + * possible. + * @throws IOException When something is wrong with the corresponding file. + */ + private void write2file(PDImage pdImage, String prefix, boolean directJPEG, + boolean noColorConvert) throws IOException { - out = new FileOutputStream(filename + "." + suffix); - BufferedImage image = pdImage.getImage(); - if (image != null) + String suffix = pdImage.getSuffix(); + if (suffix == null || "jb2".equals(suffix)) + { + suffix = "png"; + } + else if ("jpx".equals(suffix)) { + // use jp2 suffix for file because jpx not known by windows + suffix = "jp2"; + } + + if (hasMasks(pdImage)) + { + // TIKA-3040, PDFBOX-4771: can't save ARGB as JPEG + suffix = "png"; + } + + FileOutputStream out = null; + try + { + if (noColorConvert) + { + // We write the raw image if in any way possible. + // But we have no alpha information here. + BufferedImage image = pdImage.getRawImage(); + if (image != null) + { + int elements = image.getRaster().getNumDataElements(); + suffix = "png"; + if (elements > 3) + { + // More then 3 channels: Thats likely CMYK. We use tiff here, + // but a TIFF codec must be in the class path for this to work. + suffix = "tiff"; + } + out = new FileOutputStream(prefix + "." + suffix); + ImageIOUtil.writeImage(image, suffix, out); + out.flush(); + out.close(); + return; + } + } + + out = new FileOutputStream(prefix + "." + suffix); if ("jpg".equals(suffix)) { String colorSpaceName = pdImage.getColorSpace().getName(); - if (directJPEG || PDDeviceGray.INSTANCE.getName().equals(colorSpaceName) || - PDDeviceRGB.INSTANCE.getName().equals(colorSpaceName)) + if (directJPEG + || (PDDeviceGray.INSTANCE.getName().equals(colorSpaceName) + || PDDeviceRGB.INSTANCE.getName().equals(colorSpaceName))) { - // RGB or Gray colorspace: get and write the unmodifiedJPEG stream + // RGB or Gray colorspace: get and write the unmodified JPEG stream InputStream data = pdImage.createInputStream(JPEG); IOUtils.copy(data, out); IOUtils.closeQuietly(data); @@ -327,22 +448,86 @@ private void write2file(PDImage pdImage, String filename, boolean directJPEG) th else { // for CMYK and other "unusual" colorspaces, the JPEG will be converted + BufferedImage image = pdImage.getImage(); + if (image != null) + { + ImageIOUtil.writeImage(image, suffix, out); + } + } + } + else if ("jp2".equals(suffix)) + { + String colorSpaceName = pdImage.getColorSpace().getName(); + if (directJPEG || + (PDDeviceGray.INSTANCE.getName().equals(colorSpaceName) || + PDDeviceRGB.INSTANCE.getName().equals(colorSpaceName))) + { + // RGB or Gray colorspace: get and write the unmodified JPEG2000 stream + InputStream data = pdImage.createInputStream( + Arrays.asList(COSName.JPX_DECODE.getName())); + IOUtils.copy(data, out); + IOUtils.closeQuietly(data); + } + else + { + // for CMYK and other "unusual" colorspaces, the image will be converted + BufferedImage image = pdImage.getImage(); + if (image != null) + { + ImageIOUtil.writeImage(image, "jpeg2000", out); + } + } + } + else if ("tiff".equals(suffix) && pdImage.getColorSpace().equals(PDDeviceGray.INSTANCE)) + { + BufferedImage image = pdImage.getImage(); + if (image == null) + { + return; + } + // CCITT compressed images can have a different colorspace, but this one is B/W + // This is a bitonal image, so copy to TYPE_BYTE_BINARY + // so that a G4 compressed TIFF image is created by ImageIOUtil.writeImage() + int w = image.getWidth(); + int h = image.getHeight(); + BufferedImage bitonalImage = new BufferedImage(w, h, BufferedImage.TYPE_BYTE_BINARY); + // copy image the old fashioned way - ColorConvertOp is slower! + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + bitonalImage.setRGB(x, y, image.getRGB(x, y)); + } + } + ImageIOUtil.writeImage(bitonalImage, suffix, out); + } + else + { + BufferedImage image = pdImage.getImage(); + if (image != null) + { ImageIOUtil.writeImage(image, suffix, out); } } - else + out.flush(); + } + finally + { + if (out != null) { - ImageIOUtil.writeImage(image, suffix, out); + out.close(); } } - out.flush(); } - finally + + private boolean hasMasks(PDImage pdImage) throws IOException { - if (out != null) + if (pdImage instanceof PDImageXObject) { - out.close(); + PDImageXObject ximg = (PDImageXObject) pdImage; + return ximg.getMask() != null || ximg.getSoftMask() != null; } + return false; } } } diff --git a/tools/src/main/java/org/apache/pdfbox/tools/ExtractText.java b/tools/src/main/java/org/apache/pdfbox/tools/ExtractText.java index 0541a7b113a..e9f4602579b 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/ExtractText.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/ExtractText.java @@ -23,24 +23,38 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.pdfbox.text.TextPosition; +import org.apache.pdfbox.util.Matrix; /** * This is the main program that simply parses the pdf document and transforms it * into text. * * @author Ben Litchfield + * @author Tilman Hausherr */ public final class ExtractText { + private static final Log LOG = LogFactory.getLog(ExtractText.class); + + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; private static final String ENCODING = "-encoding"; private static final String CONSOLE = "-console"; @@ -49,13 +63,15 @@ public final class ExtractText private static final String SORT = "-sort"; private static final String IGNORE_BEADS = "-ignoreBeads"; private static final String DEBUG = "-debug"; - // jjb - added simple HTML output private static final String HTML = "-html"; + private static final String ALWAYSNEXT = "-alwaysNext"; + private static final String ROTATION_MAGIC = "-rotationMagic"; + private static final String STD_ENCODING = "UTF-8"; /* * debug flag */ - private boolean debug = false; + private boolean debugOutput = false; /** * private constructor. @@ -92,8 +108,11 @@ public void startExtraction( String[] args ) throws IOException boolean toHTML = false; boolean sort = false; boolean separateBeads = true; + boolean alwaysNext = false; + boolean rotationMagic = false; + @SuppressWarnings({"squid:S2068"}) String password = ""; - String encoding = "UTF-8"; + String encoding = STD_ENCODING; String pdfFile = null; String outputFile = null; // Defaults to text files @@ -142,9 +161,17 @@ else if( args[i].equals( IGNORE_BEADS ) ) { separateBeads = false; } + else if (args[i].equals(ALWAYSNEXT)) + { + alwaysNext = true; + } + else if (args[i].equals(ROTATION_MAGIC)) + { + rotationMagic = true; + } else if( args[i].equals( DEBUG ) ) { - debug = true; + debugOutput = true; } else if( args[i].equals( END_PAGE ) ) { @@ -204,32 +231,50 @@ else if( args[i].equals( CONSOLE ) ) } else { + if (toHTML && !STD_ENCODING.equals(encoding)) + { + encoding = STD_ENCODING; + System.out.println("The encoding parameter is ignored when writing html output."); + } output = new OutputStreamWriter( new FileOutputStream( outputFile ), encoding ); } + startTime = startProcessing("Starting text extraction"); + if (debugOutput) + { + System.err.println("Writing to " + outputFile); + } PDFTextStripper stripper; if(toHTML) { + // HTML stripper can't work page by page because of startDocument() callback stripper = new PDFText2HTML(); + stripper.setSortByPosition(sort); + stripper.setShouldSeparateByBeads(separateBeads); + stripper.setStartPage(startPage); + stripper.setEndPage(endPage); + + // Extract text for main document: + stripper.writeText(document, output); } else { - stripper = new PDFTextStripper(); - } - stripper.setSortByPosition( sort ); - stripper.setShouldSeparateByBeads( separateBeads ); - stripper.setStartPage( startPage ); - stripper.setEndPage( endPage ); + if (rotationMagic) + { + stripper = new FilteredTextStripper(); + } + else + { + stripper = new PDFTextStripper(); + } + stripper.setSortByPosition(sort); + stripper.setShouldSeparateByBeads(separateBeads); - startTime = startProcessing("Starting text extraction"); - if (debug) - { - System.err.println("Writing to "+outputFile); + // Extract text for main document: + extractPages(startPage, Math.min(endPage, document.getNumberOfPages()), + stripper, document, output, rotationMagic, alwaysNext); } - - // Extract text for main document: - stripper.writeText( document, output ); - + // ... also for any embedded PDFs: PDDocumentCatalog catalog = document.getDocumentCatalog(); PDDocumentNameDictionary names = catalog.getNames(); @@ -243,7 +288,7 @@ else if( args[i].equals( CONSOLE ) ) { for (Map.Entry ent : embeddedFileNames.entrySet()) { - if (debug) + if (debugOutput) { System.err.println("Processing embedded file " + ent.getKey() + ":"); } @@ -251,7 +296,7 @@ else if( args[i].equals( CONSOLE ) ) PDEmbeddedFile file = spec.getEmbeddedFile(); if (file != null && "application/pdf".equals(file.getSubtype())) { - if (debug) + if (debugOutput) { System.err.println(" is PDF (size=" + file.getSize() + ")"); } @@ -260,17 +305,20 @@ else if( args[i].equals( CONSOLE ) ) try { subDoc = PDDocument.load(fis); + if (toHTML) + { + // will not really work because of HTML header + footer + stripper.writeText( subDoc, output ); + } + else + { + extractPages(1, subDoc.getNumberOfPages(), + stripper, subDoc, output, rotationMagic, alwaysNext); + } } finally { fis.close(); - } - try - { - stripper.writeText( subDoc, output ); - } - finally - { IOUtils.closeQuietly(subDoc); } } @@ -288,9 +336,61 @@ else if( args[i].equals( CONSOLE ) ) } } + private void extractPages(int startPage, int endPage, + PDFTextStripper stripper, PDDocument document, Writer output, + boolean rotationMagic, boolean alwaysNext) throws IOException + { + for (int p = startPage; p <= endPage; ++p) + { + stripper.setStartPage(p); + stripper.setEndPage(p); + try + { + if (rotationMagic) + { + PDPage page = document.getPage(p - 1); + int rotation = page.getRotation(); + page.setRotation(0); + AngleCollector angleCollector = new AngleCollector(); + angleCollector.setStartPage(p); + angleCollector.setEndPage(p); + angleCollector.writeText(document, new NullWriter()); + // rotation magic + for (int angle : angleCollector.getAngles()) + { + // prepend a transformation + // (we could skip these parts for angle 0, but it doesn't matter much) + PDPageContentStream cs = new PDPageContentStream(document, page, + PDPageContentStream.AppendMode.PREPEND, false); + cs.transform(Matrix.getRotateInstance(-Math.toRadians(angle), 0, 0)); + cs.close(); + + stripper.writeText(document, output); + + // remove prepended transformation + ((COSArray) page.getCOSObject().getItem(COSName.CONTENTS)).remove(0); + } + page.setRotation(rotation); + } + else + { + stripper.writeText(document, output); + } + } + catch (IOException ex) + { + if (!alwaysNext) + { + throw ex; + } + LOG.error("Failed to process page " + p, ex); + } + } + } + private long startProcessing(String message) { - if (debug) + if (debugOutput) { System.err.println(message); } @@ -299,7 +399,7 @@ private long startProcessing(String message) private void stopProcessing(String message, long startTime) { - if (debug) + if (debugOutput) { long stopTime = System.currentTimeMillis(); float elapsedTime = ((float)(stopTime - startTime))/1000; @@ -307,6 +407,13 @@ private void stopProcessing(String message, long startTime) } } + static int getAngle(TextPosition text) + { + Matrix m = text.getTextMatrix().clone(); + m.concatenate(text.getFont().getFontMatrix()); + return (int) Math.round(Math.toDegrees(Math.atan2(m.getShearY(), m.getScaleY()))); + } + /** * This will print the usage requirements and exit. */ @@ -314,19 +421,98 @@ private static void usage() { String message = "Usage: java -jar pdfbox-app-x.y.z.jar ExtractText [options] [output-text-file]\n" + "\nOptions:\n" - + " -password : Password to decrypt document\n" - + " -encoding : UTF-8 (default) or ISO-8859-1, UTF-16BE, UTF-16LE, etc.\n" - + " -console : Send text to console instead of file\n" - + " -html : Output in HTML format instead of raw text\n" - + " -sort : Sort the text before writing\n" - + " -ignoreBeads : Disables the separation by beads\n" - + " -debug : Enables debug output about the time consumption of every stage\n" - + " -startPage : The first page to start extraction(1 based)\n" - + " -endPage : The last page to extract(inclusive)\n" - + " : The PDF document to use\n" - + " [output-text-file] : The file to write the text to"; + + " -password : Password to decrypt document\n" + + " -encoding : UTF-8 (default) or ISO-8859-1, UTF-16BE,\n" + + " UTF-16LE, etc.\n" + + " -console : Send text to console instead of file\n" + + " -html : Output in HTML format instead of raw text\n" + + " -sort : Sort the text before writing\n" + + " -ignoreBeads : Disables the separation by beads\n" + + " -debug : Enables debug output about the time consumption\n" + + " of every stage\n" + + " -alwaysNext : Process next page (if applicable) despite\n" + + " IOException (ignored when -html)\n" + + " -rotationMagic : Analyze each page for rotated/skewed text,\n" + + " rotate to 0° and extract separately\n" + + " (slower, and ignored when -html)\n" + + " -startPage : The first page to start extraction (1 based)\n" + + " -endPage : The last page to extract (1 based, inclusive)\n" + + " : The PDF document to use\n" + + " [output-text-file] : The file to write the text to"; System.err.println(message); System.exit( 1 ); } } + +/** + * Collect all angles while doing text extraction. Angles are in degrees and rounded to the closest + * integer (to avoid slight differences from floating point arithmetic resulting in similarly + * angled glyphs being treated separately). This class must be constructed for each page so that the + * angle set is initialized. + */ +class AngleCollector extends PDFTextStripper +{ + private final Set angles = new TreeSet(); + + AngleCollector() throws IOException + { + } + + Set getAngles() + { + return angles; + } + + @Override + protected void processTextPosition(TextPosition text) + { + int angle = ExtractText.getAngle(text); + angle = (angle + 360) % 360; + angles.add(angle); + } +} + +/** + * TextStripper that only processes glyphs that have angle 0. + */ +class FilteredTextStripper extends PDFTextStripper +{ + FilteredTextStripper() throws IOException + { + } + + @Override + protected void processTextPosition(TextPosition text) + { + int angle = ExtractText.getAngle(text); + if (angle == 0) + { + super.processTextPosition(text); + } + } +} + +/** + * Dummy output. + */ +class NullWriter extends Writer +{ + @Override + public void write(char[] cbuf, int off, int len) throws IOException + { + // do nothing + } + + @Override + public void flush() throws IOException + { + // do nothing + } + + @Override + public void close() throws IOException + { + // do nothing + } +} diff --git a/tools/src/main/java/org/apache/pdfbox/tools/ImageToPDF.java b/tools/src/main/java/org/apache/pdfbox/tools/ImageToPDF.java new file mode 100644 index 00000000000..ea6737dc2f2 --- /dev/null +++ b/tools/src/main/java/org/apache/pdfbox/tools/ImageToPDF.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.tools; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; + +/** + * Creates a PDF document from images. + * + */ +public final class ImageToPDF +{ + private PDRectangle mediaBox = PDRectangle.LETTER; + private boolean landscape = false; + private boolean autoOrientation = false; + private boolean resize = false; + + private ImageToPDF() + { + } + + public static void main(String[] args) throws IOException + { + // suppress the Dock icon on OS X + System.setProperty("apple.awt.UIElement", "true"); + + ImageToPDF app = new ImageToPDF(); + + if (args.length < 2) + { + app.usage(); + } + + List imageFilenames = new ArrayList(); + String pdfPath = args[args.length - 1]; + + if (!pdfPath.endsWith(".pdf")) + { + System.err.println("Last argument must be the destination .pdf file"); + System.exit(1); + } + for (int i = 0; i < args.length - 1; i++) + { + if (args[i].startsWith("-")) + { + if ("-resize".equals(args[i])) + { + // will be modified to something more flexible + app.resize = true; + } + else if ("-landscape".equals(args[i])) + { + app.setLandscape(true); + } + else if ("-autoOrientation".equals(args[i])) + { + app.setAutoOrientation(true); + } + else if ("-pageSize".equals(args[i])) + { + i++; + PDRectangle rectangle = createRectangle(args[i]); + if (rectangle == null) + { + throw new IOException("Unknown argument: " + args[i]); + } + app.setMediaBox(rectangle); + } + else + { + throw new IOException("Unknown argument: " + args[i]); + } + } + else + { + imageFilenames.add(args[i]); + } + } + + PDDocument doc = new PDDocument(); + app.createPDFFromImages(doc, imageFilenames); + doc.save(pdfPath); + doc.close(); + } + + void createPDFFromImages(PDDocument doc, List imageFilenames) throws IOException + { + for (String imageFileName : imageFilenames) + { + PDImageXObject pdImage = PDImageXObject.createFromFile(imageFileName, doc); + + PDRectangle actualMediaBox = mediaBox; + if ((autoOrientation && pdImage.getWidth() > pdImage.getHeight()) || landscape) + { + actualMediaBox = new PDRectangle(mediaBox.getHeight(), mediaBox.getWidth()); + } + PDPage page = new PDPage(actualMediaBox); + doc.addPage(page); + + PDPageContentStream contents = new PDPageContentStream(doc, page); + if (resize) + { + contents.drawImage(pdImage, 0, 0, actualMediaBox.getWidth(), actualMediaBox.getHeight()); + } + else + { + contents.drawImage(pdImage, 0, 0, pdImage.getWidth(), pdImage.getHeight()); + } + contents.close(); + } + } + + private static PDRectangle createRectangle(String paperSize) + { + if ("letter".equalsIgnoreCase(paperSize)) + { + return PDRectangle.LETTER; + } + else if ("legal".equalsIgnoreCase(paperSize)) + { + return PDRectangle.LEGAL; + } + else if ("A0".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A0; + } + else if ("A1".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A1; + } + else if ("A2".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A2; + } + else if ("A3".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A3; + } + else if ("A4".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A4; + } + else if ("A5".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A5; + } + else if ("A6".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A6; + } + else + { + return null; + } + } + + /** + * Sets page size of produced PDF. + * + * @return returns the page size (media box) + */ + public PDRectangle getMediaBox() + { + return mediaBox; + } + + /** + * Sets page size of produced PDF. + * + * @param mediaBox + */ + public void setMediaBox(PDRectangle mediaBox) + { + this.mediaBox = mediaBox; + } + + /** + * Tells the paper orientation. + * + * @return true for landscape orientation + */ + public boolean isLandscape() + { + return landscape; + } + + /** + * Sets paper orientation. + * + * @param landscape + */ + public void setLandscape(boolean landscape) + { + this.landscape = landscape; + } + + /** + * Gets whether page orientation (portrait / landscape) should be decided automatically for each + * page depending on image proportion. + * + * @return true if auto, false if not. + */ + public boolean isAutoOrientation() + { + return autoOrientation; + } + + /** + * Sets whether page orientation (portrait / landscape) should be decided automatically for each + * page depending on image proportion. + * + * @param autoOrientation true if auto, false if not. + */ + public void setAutoOrientation(boolean autoOrientation) + { + this.autoOrientation = autoOrientation; + } + + /** + * This will print out a message telling how to use this example. + */ + private void usage() + { + StringBuilder message = new StringBuilder(); + message.append("Usage: jar -jar pdfbox-app-x.y.z.jar ImageToPDF [options] .. \n"); + message.append("\nOptions:\n"); + message.append(" -resize : resize to page size\n"); + message.append(" -pageSize : Letter (default)\n"); + message.append(" Legal\n"); + message.append(" A0\n"); + message.append(" A1\n"); + message.append(" A2\n"); + message.append(" A3\n"); + message.append(" A4\n"); + message.append(" A5\n"); + message.append(" A6\n"); + message.append(" -landscape : sets orientation to landscape\n"); + message.append(" -autoOrientation : sets orientation depending of image proportion\n"); + + System.err.println(message.toString()); + System.exit(1); + } +} diff --git a/tools/src/main/java/org/apache/pdfbox/tools/ImportFDF.java b/tools/src/main/java/org/apache/pdfbox/tools/ImportFDF.java index 25ad367da72..a8200c73346 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/ImportFDF.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/ImportFDF.java @@ -65,7 +65,7 @@ public void importFDF( PDDocument pdfDocument, FDFDocument fdfDocument ) throws /** * This will import an fdf document and write out another pdf. - *
      + *
      * see usage() for commandline * * @param args command line arguments diff --git a/tools/src/main/java/org/apache/pdfbox/tools/ImportXFDF.java b/tools/src/main/java/org/apache/pdfbox/tools/ImportXFDF.java index 63dd0040a98..acb8b852549 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/ImportXFDF.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/ImportXFDF.java @@ -59,7 +59,7 @@ public void importFDF( PDDocument pdfDocument, FDFDocument fdfDocument ) throws /** * This will import an fdf document and write out another pdf. - *
      + *
      * see usage() for commandline * * @param args command line arguments diff --git a/tools/src/main/java/org/apache/pdfbox/tools/OverlayPDF.java b/tools/src/main/java/org/apache/pdfbox/tools/OverlayPDF.java index 623aa8fac32..abb97962a13 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/OverlayPDF.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/OverlayPDF.java @@ -137,20 +137,23 @@ else if (overlayer.getDefaultOverlayFile() == null) usage(); } - try + try { PDDocument result = overlayer.overlay(specificPageOverlayFile); result.save(outputFilename); result.close(); - // close the input files AFTER saving the resulting file as some - // streams are shared among the input and the output files - overlayer.close(); } catch (IOException e) { LOG.error("Overlay failed: " + e.getMessage(), e); throw e; } + finally + { + // close the input files AFTER saving the resulting file as some + // streams are shared among the input and the output files + overlayer.close(); + } } private static void usage() diff --git a/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java b/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java index 2798244a625..d527e9404db 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java @@ -26,7 +26,6 @@ */ public final class PDFBox { - private PDFBox() { } @@ -35,6 +34,7 @@ private PDFBox() * Main method. * * @param args command line arguments + * @throws java.lang.Exception */ public static void main(String[] args) throws Exception { @@ -71,7 +71,7 @@ else if (command.equals("PrintPDF")) { PrintPDF.main(arguments); } - else if (command.equals("PDFDebugger")) + else if (command.equals("PDFDebugger") || command.equals("PDFReader")) { PDFDebugger.main(arguments); exitAfterCallingMain = false; @@ -80,11 +80,6 @@ else if (command.equals("PDFMerger")) { PDFMerger.main(arguments); } - else if (command.equals("PDFReader")) - { - PDFDebugger.main(arguments); - exitAfterCallingMain = false; - } else if (command.equals("PDFSplit")) { PDFSplit.main(arguments); @@ -93,6 +88,10 @@ else if (command.equals("PDFToImage")) { PDFToImage.main(arguments); } + else if (command.equals("ImageToPDF")) + { + ImageToPDF.main(arguments); + } else if (command.equals("TextToPDF")) { TextToPDF.main(arguments); @@ -121,7 +120,6 @@ private static void showMessageAndExit() String message = "PDFBox version: \""+ Version.getVersion()+ "\"" + "\nUsage: java -jar pdfbox-app-x.y.z.jar \n" + "\nPossible commands are:\n" - + " ConvertColorspace\n" + " Decrypt\n" + " Encrypt\n" + " ExtractText\n" diff --git a/tools/src/main/java/org/apache/pdfbox/tools/PDFSplit.java b/tools/src/main/java/org/apache/pdfbox/tools/PDFSplit.java index b74a9673a48..2ec7350950b 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/PDFSplit.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/PDFSplit.java @@ -21,6 +21,7 @@ import java.io.FileOutputStream; import java.util.List; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdfwriter.COSWriter; @@ -34,6 +35,7 @@ */ public final class PDFSplit { + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; private static final String SPLIT = "-split"; private static final String START_PAGE = "-startPage"; @@ -61,6 +63,7 @@ public static void main( String[] args ) throws IOException private void split( String[] args ) throws IOException { + @SuppressWarnings({"squid:S2068"}) String password = ""; String split = null; String startPage = null; @@ -180,14 +183,14 @@ else if( args[i].equals( OUTPUT_PREFIX ) ) } finally { - if( document != null ) + if (document != null) { document.close(); - } - for( int i=0; documents != null && i : split after this many pages (default 1, if startPage and endPage are unset)\n" + " -startPage : start page\n" + " -endPage : end page\n" - + " -outputPrefix : Filename prefix for splitted files\n" + + " -outputPrefix : Filename prefix for split files\n" + " : The PDF document to use\n"; System.err.println(message); diff --git a/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java b/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java index 4a55ebb1ddf..265d21ddf16 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java @@ -39,7 +39,6 @@ public class PDFText2HTML extends PDFTextStripper { private static final int INITIAL_PDF_TO_HTML_BYTES = 8192; - private boolean onFirstPage = true; private final FontState fontState = new FontState(); /** @@ -48,7 +47,6 @@ public class PDFText2HTML extends PDFTextStripper */ public PDFText2HTML() throws IOException { - super(); setLineSeparator(LINE_SEPARATOR); setParagraphStart("

      "); setParagraphEnd("

      "+ LINE_SEPARATOR); @@ -64,34 +62,27 @@ public PDFText2HTML() throws IOException * * @throws IOException * If there is a problem writing out the header to the document. + * @deprecated use {@link #startDocument(PDDocument)} */ + @Deprecated protected void writeHeader() throws IOException + { + } + + @Override + protected void startDocument(PDDocument document) throws IOException { StringBuilder buf = new StringBuilder(INITIAL_PDF_TO_HTML_BYTES); buf.append("\n"); buf.append(""); buf.append("").append(escape(getTitle())).append("\n"); - buf.append("\n"); + buf.append("\n"); buf.append("\n"); buf.append("\n"); super.writeString(buf.toString()); } - - /** - * {@inheritDoc} - */ - @Override - protected void writePage() throws IOException - { - if (onFirstPage) - { - writeHeader(); - onFirstPage = false; - } - super.writePage(); - } - + /** * {@inheritDoc} */ @@ -122,11 +113,8 @@ protected String getTitle() StringBuilder titleText = new StringBuilder(); while (textIter.hasNext()) { - Iterator textByArticle = textIter.next().iterator(); - while (textByArticle.hasNext()) + for (TextPosition position : textIter.next()) { - TextPosition position = textByArticle.next(); - float currentFontSize = position.getFontSize(); //If we're past 64 chars we will assume that we're past the title //64 is arbitrary diff --git a/tools/src/main/java/org/apache/pdfbox/tools/PDFToImage.java b/tools/src/main/java/org/apache/pdfbox/tools/PDFToImage.java index 17533a493b1..1b6e0bd066b 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/PDFToImage.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/PDFToImage.java @@ -27,6 +27,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.tools.imageio.ImageIOUtil; @@ -38,6 +39,7 @@ */ public final class PDFToImage { + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; private static final String START_PAGE = "-startPage"; private static final String END_PAGE = "-endPage"; @@ -49,8 +51,10 @@ public final class PDFToImage private static final String COLOR = "-color"; private static final String RESOLUTION = "-resolution"; private static final String DPI = "-dpi"; + private static final String QUALITY = "-quality"; private static final String CROPBOX = "-cropbox"; private static final String TIME = "-time"; + private static final String SUBSAMPLING = "-subsampling"; /** * private constructor. @@ -72,6 +76,7 @@ public static void main( String[] args ) throws IOException // suppress the Dock icon on OS X System.setProperty("apple.awt.UIElement", "true"); + @SuppressWarnings({"squid:S2068"}) String password = ""; String pdfFile = null; String outputPrefix = null; @@ -80,11 +85,13 @@ public static void main( String[] args ) throws IOException int endPage = Integer.MAX_VALUE; String color = "rgb"; int dpi; + float quality = -1; float cropBoxLowerLeftX = 0; float cropBoxLowerLeftY = 0; float cropBoxUpperRightX = 0; float cropBoxUpperRightY = 0; boolean showTime = false; + boolean subsampling = false; try { dpi = Toolkit.getDefaultToolkit().getScreenResolution(); @@ -152,6 +159,11 @@ else if( args[i].equals( RESOLUTION ) || args[i].equals( DPI ) ) i++; dpi = Integer.parseInt(args[i]); } + else if( args[i].equals( QUALITY ) ) + { + i++; + quality = Float.parseFloat(args[i]); + } else if( args[i].equals( CROPBOX ) ) { i++; @@ -167,6 +179,10 @@ else if( args[i].equals( TIME ) ) { showTime = true; } + else if( args[i].equals( SUBSAMPLING ) ) + { + subsampling = true; + } else { if( pdfFile == null ) @@ -185,11 +201,20 @@ else if( args[i].equals( TIME ) ) { outputPrefix = pdfFile.substring( 0, pdfFile.lastIndexOf( '.' )); } + if (quality < 0) + { + quality = "png".equals(imageFormat) ? 0f : 1f; + } PDDocument document = null; try { document = PDDocument.load(new File(pdfFile), password); + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(); + if (acroForm != null && acroForm.getNeedAppearances()) + { + acroForm.refreshAppearances(); + } ImageType imageType = null; if ("bilevel".equalsIgnoreCase(color)) @@ -231,11 +256,12 @@ else if ("rgba".equalsIgnoreCase(color)) boolean success = true; endPage = Math.min(endPage, document.getNumberOfPages()); PDFRenderer renderer = new PDFRenderer(document); + renderer.setSubsamplingAllowed(subsampling); for (int i = startPage - 1; i < endPage; i++) { BufferedImage image = renderer.renderImageWithDPI(i, dpi, imageType); String fileName = outputPrefix + (i + 1) + "." + imageFormat; - success &= ImageIOUtil.writeImage(image, fileName, dpi); + success &= ImageIOUtil.writeImage(image, fileName, dpi, quality); } // performance stats @@ -244,7 +270,7 @@ else if ("rgba".equalsIgnoreCase(color)) int count = 1 + endPage - startPage; if (showTime) { - System.err.printf("Rendered %d page%s in %dms\n", count, count == 1 ? "" : "s", + System.err.printf("Rendered %d page%s in %dms%n", count, count == 1 ? "" : "s", duration / 1000000); } @@ -273,15 +299,18 @@ private static void usage() String message = "Usage: java -jar pdfbox-app-x.y.z.jar PDFToImage [options] \n" + "\nOptions:\n" + " -password : Password to decrypt document\n" - + " -format : Image format: " + getImageFormats() + "\n" + + " -format : Available image formats: " + getImageFormats() + "\n" + " -prefix : Filename prefix for image files\n" - + " -page : The only page to extract (1-based)\n" + + " -page : The only page to extract (1-based)\n" + " -startPage : The first page to start extraction (1-based)\n" - + " -endPage : The last page to extract(inclusive)\n" - + " -color : The color depth (valid: bilevel, indexed, gray, rgb, rgba)\n" - + " -dpi : The DPI of the output image\n" + + " -endPage : The last page to extract (inclusive)\n" + + " -color : The color depth (valid: bilevel, gray, rgb (default), rgba)\n" + + " -dpi : The DPI of the output image, default: screen resolution or 96 if unknown\n" + + " -quality : The quality to be used when compressing the image (0 <= quality <= 1)\n" + + " (default: 0 for PNG and 1 for the other formats)\n" + " -cropbox : The page area to export\n" + " -time : Prints timing information to stdout\n" + + " -subsampling : Activate subsampling (for PDFs with huge images)\n" + " : The PDF document to use\n"; System.err.println(message); @@ -291,7 +320,7 @@ private static void usage() private static String getImageFormats() { StringBuilder retval = new StringBuilder(); - String[] formats = ImageIO.getReaderFormatNames(); + String[] formats = ImageIO.getWriterFormatNames(); for( int i = 0; i < formats.length; i++ ) { if (formats[i].equalsIgnoreCase(formats[i])) @@ -317,7 +346,6 @@ private static void changeCropBox(PDDocument document, float a, float b, float c rectangle.setUpperRightX(c); rectangle.setUpperRightY(d); page.setCropBox(rectangle); - } } } diff --git a/tools/src/main/java/org/apache/pdfbox/tools/PrintPDF.java b/tools/src/main/java/org/apache/pdfbox/tools/PrintPDF.java index 86952bf8063..dcbf5e0d414 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/PrintPDF.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/PrintPDF.java @@ -16,13 +16,18 @@ */ package org.apache.pdfbox.tools; +import java.awt.RenderingHints; import java.awt.print.PrinterException; import java.awt.print.PrinterJob; import java.io.File; import java.io.IOException; - +import java.util.HashMap; +import java.util.Map; import javax.print.PrintService; + import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.encryption.AccessPermission; +import org.apache.pdfbox.printing.Orientation; import org.apache.pdfbox.printing.PDFPageable; /** @@ -32,9 +37,14 @@ */ public final class PrintPDF { + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; private static final String SILENT = "-silentPrint"; private static final String PRINTER_NAME = "-printerName"; + private static final String ORIENTATION = "-orientation"; + private static final String BORDER = "-border"; + private static final String DPI = "-dpi"; + private static final String NOCOLOROPT = "-noColorOpt"; /** * private constructor. @@ -56,10 +66,20 @@ public static void main(String[] args) throws PrinterException, IOException // suppress the Dock icon on OS X System.setProperty("apple.awt.UIElement", "true"); + @SuppressWarnings({"squid:S2068"}) String password = ""; String pdfFile = null; boolean silentPrint = false; String printerName = null; + Orientation orientation = Orientation.AUTO; + boolean showPageBorder = false; + int dpi = 0; + Map orientationMap = new HashMap(); + orientationMap.put("auto", Orientation.AUTO); + orientationMap.put("landscape", Orientation.LANDSCAPE); + orientationMap.put("portrait", Orientation.PORTRAIT); + RenderingHints renderingHints = null; + for (int i = 0; i < args.length; i++) { if (args[i].equals(PASSWORD)) @@ -84,6 +104,42 @@ else if (args[i].equals(SILENT)) { silentPrint = true; } + else if (args[i].equals(ORIENTATION)) + { + i++; + if (i >= args.length) + { + usage(); + } + orientation = orientationMap.get(args[i]); + if (orientation == null) + { + usage(); + } + } + else if (args[i].equals(BORDER)) + { + showPageBorder = true; + } + else if (args[i].equals(NOCOLOROPT)) + { + renderingHints = new RenderingHints(null); + renderingHints.put(RenderingHints.KEY_INTERPOLATION, + RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR); + renderingHints.put(RenderingHints.KEY_RENDERING, + RenderingHints.VALUE_RENDER_QUALITY); + renderingHints.put(RenderingHints.KEY_ANTIALIASING, + RenderingHints.VALUE_ANTIALIAS_OFF); + } + else if (args[i].equals(DPI)) + { + i++; + if (i >= args.length) + { + usage(); + } + dpi = Integer.parseInt(args[i]); + } else { pdfFile = args[i]; @@ -100,24 +156,43 @@ else if (args[i].equals(SILENT)) { document = PDDocument.load(new File(pdfFile), password); + AccessPermission ap = document.getCurrentAccessPermission(); + if (!ap.canPrint()) + { + throw new IOException("You do not have permission to print"); + } + PrinterJob printJob = PrinterJob.getPrinterJob(); printJob.setJobName(new File(pdfFile).getName()); if (printerName != null) { - PrintService[] printService = PrinterJob.lookupPrintServices(); + PrintService[] printServices = PrinterJob.lookupPrintServices(); boolean printerFound = false; - for (int i = 0; !printerFound && i < printService.length; i++) + for (int i = 0; i < printServices.length; i++) { - if (printService[i].getName().contains(printerName)) + if (printServices[i].getName().equals(printerName)) { - printJob.setPrintService(printService[i]); + printJob.setPrintService(printServices[i]); printerFound = true; + break; } } + if (!printerFound) + { + System.err.println("printer '" + printerName + "' not found, using default"); + showAvailablePrinters(); + } } - printJob.setPageable(new PDFPageable(document)); - + PDFPageable pageable = new PDFPageable(document, orientation, showPageBorder, dpi); + pageable.setRenderingHints(renderingHints); + printJob.setPageable(pageable); + + // We're not using PDFPrintable, because then + // "the PageFormat for each page is the default page format" + // which results in the image appearing in the middle of the page, and padded + // when printing on XPS. Also PDFPageable.getPageFormat() won't be called. + if (silentPrint || printJob.printDialog()) { printJob.print(); @@ -139,10 +214,28 @@ private static void usage() { String message = "Usage: java -jar pdfbox-app-x.y.z.jar PrintPDF [options] \n" + "\nOptions:\n" - + " -password : Password to decrypt document\n" - + " -silentPrint : Print without prompting for printer info\n"; - + + " -password : Password to decrypt document\n" + + " -printerName : Print to specific printer\n" + + " -orientation auto|portrait|landscape : Print using orientation\n" + + " (default: auto)\n" + + " -border : Print with border\n" + + " -dpi : Render into intermediate image with\n" + + " specific dpi and then print\n" + + " -noColorOpt : Disable color optimizations\n" + + " (useful when printing barcodes)\n" + + " -silentPrint : Print without printer dialog box\n"; System.err.println(message); + showAvailablePrinters(); System.exit(1); } + + private static void showAvailablePrinters() + { + System.err.println("Available printer names:"); + PrintService[] printServices = PrinterJob.lookupPrintServices(); + for (PrintService printService : printServices) + { + System.err.println(" " + printService.getName()); + } + } } diff --git a/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java b/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java index 594864f6ce3..c17d5cc2a45 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java @@ -28,12 +28,13 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType1Font; /** - * This will take a text file and ouput a pdf with that text. + * This will take a text file and output a pdf with that text. * * @author Ben Litchfield */ @@ -60,6 +61,8 @@ public class TextToPDF private static final float LINE_HEIGHT_FACTOR = 1.05f; private int fontSize = DEFAULT_FONT_SIZE; + private PDRectangle mediaBox = PDRectangle.LETTER; + private boolean landscape = false; private PDFont font = DEFAULT_FONT; private static final Map STANDARD_14 = new HashMap(); @@ -100,6 +103,7 @@ public PDDocument createPDFFromText( Reader text ) throws IOException /** * Create a PDF document with some text. * + * @param doc The document. * @param text The stream of text data. * * @throws IOException If there is an error writing the data. @@ -111,12 +115,17 @@ public void createPDFFromText( PDDocument doc, Reader text ) throws IOException final int margin = 40; float height = font.getBoundingBox().getHeight() / FONTSCALE; + PDRectangle actualMediaBox = mediaBox; + if (landscape) + { + actualMediaBox = new PDRectangle(mediaBox.getHeight(), mediaBox.getWidth()); + } //calculate font height and increase by a factor. height = height*fontSize*LINE_HEIGHT_FACTOR; BufferedReader data = new BufferedReader( text ); - String nextLine = null; - PDPage page = new PDPage(); + String nextLine; + PDPage page = new PDPage(actualMediaBox); PDPageContentStream contentStream = null; float y = -1; float maxStringLength = page.getMediaBox().getWidth() - 2*margin; @@ -124,6 +133,8 @@ public void createPDFFromText( PDDocument doc, Reader text ) throws IOException // There is a special case of creating a PDF document from an empty string. boolean textIsEmpty = true; + StringBuilder nextLineToDraw = new StringBuilder(); + while( (nextLine = data.readLine()) != null ) { @@ -132,31 +143,73 @@ public void createPDFFromText( PDDocument doc, Reader text ) throws IOException // the text. textIsEmpty = false; - String[] lineWords = nextLine.trim().split( " " ); + String[] lineWords = nextLine.replaceAll("[\\n\\r]+$", "").split(" "); int lineIndex = 0; while( lineIndex < lineWords.length ) { - StringBuilder nextLineToDraw = new StringBuilder(); + nextLineToDraw.setLength(0); float lengthIfUsingNextWord = 0; + boolean ff = false; do { - nextLineToDraw.append( lineWords[lineIndex] ); - nextLineToDraw.append( " " ); - lineIndex++; + String word1, word2 = ""; + String word = lineWords[lineIndex]; + int indexFF = word.indexOf('\f'); + if (indexFF == -1) + { + word1 = word; + } + else + { + ff = true; + word1 = word.substring(0, indexFF); + if (indexFF < word.length()) + { + word2 = word.substring(indexFF + 1); + } + } + // word1 is the part before ff, word2 after + // both can be empty + // word1 can also be empty without ff, if a line has many spaces + if (word1.length() > 0 || !ff) + { + nextLineToDraw.append(word1); + nextLineToDraw.append(" "); + } + if (!ff || word2.length() == 0) + { + lineIndex++; + } + else + { + lineWords[lineIndex] = word2; + } + if (ff) + { + break; + } if( lineIndex < lineWords.length ) { - String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex]; + // need cut off at \f in next word to avoid IllegalArgumentException + String nextWord = lineWords[lineIndex]; + indexFF = nextWord.indexOf('\f'); + if (indexFF != -1) + { + nextWord = nextWord.substring(0, indexFF); + } + + String lineWithNextWord = nextLineToDraw.toString() + " " + nextWord; lengthIfUsingNextWord = (font.getStringWidth( lineWithNextWord )/FONTSCALE) * fontSize; } } - while( lineIndex < lineWords.length && - lengthIfUsingNextWord < maxStringLength ); + while (lineIndex < lineWords.length && lengthIfUsingNextWord < maxStringLength); + if( y < margin ) { // We have crossed the end-of-page boundary and need to extend the // document by another page. - page = new PDPage(); + page = new PDPage(actualMediaBox); doc.addPage( page ); if( contentStream != null ) { @@ -167,9 +220,7 @@ public void createPDFFromText( PDDocument doc, Reader text ) throws IOException contentStream.setFont( font, fontSize ); contentStream.beginText(); y = page.getMediaBox().getHeight() - margin + height; - contentStream.newLineAtOffset( - margin, y); - + contentStream.newLineAtOffset(margin, y); } if( contentStream == null ) @@ -179,9 +230,19 @@ public void createPDFFromText( PDDocument doc, Reader text ) throws IOException contentStream.newLineAtOffset(0, -height); y -= height; contentStream.showText(nextLineToDraw.toString()); + if (ff) + { + page = new PDPage(actualMediaBox); + doc.addPage(page); + contentStream.endText(); + contentStream.close(); + contentStream = new PDPageContentStream(doc, page); + contentStream.setFont(font, fontSize); + contentStream.beginText(); + y = page.getMediaBox().getHeight() - margin + height; + contentStream.newLineAtOffset(margin, y); + } } - - } // If the input text was the empty string, then the above while loop will have short-circuited @@ -210,7 +271,7 @@ public void createPDFFromText( PDDocument doc, Reader text ) throws IOException /** * This will create a PDF document with some text in it. - *
      + *
      * see usage() for commandline * * @param args Command line arguments. @@ -251,14 +312,30 @@ else if( args[i].equals( "-fontSize" )) i++; app.setFontSize( Integer.parseInt( args[i] ) ); } + else if( args[i].equals( "-pageSize" )) + { + i++; + PDRectangle rectangle = createRectangle(args[i]); + if (rectangle == null) + { + throw new IOException("Unknown argument: " + args[i]); + } + app.setMediaBox(rectangle); + } + else if( args[i].equals( "-landscape" )) + { + app.setLandscape(true); + } else { - throw new IOException( "Unknown argument:" + args[i] ); + throw new IOException( "Unknown argument: " + args[i] ); } } - - app.createPDFFromText( doc, new FileReader( args[args.length-1] ) ); - doc.save( args[args.length-2] ); + + FileReader fileReader = new FileReader(args[args.length - 1]); + app.createPDFFromText(doc, fileReader); + fileReader.close(); + doc.save(args[args.length - 2]); } } finally @@ -267,6 +344,50 @@ else if( args[i].equals( "-fontSize" )) } } + private static PDRectangle createRectangle( String paperSize ) + { + if ("letter".equalsIgnoreCase(paperSize)) + { + return PDRectangle.LETTER; + } + else if ("legal".equalsIgnoreCase(paperSize)) + { + return PDRectangle.LEGAL; + } + else if ("A0".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A0; + } + else if ("A1".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A1; + } + else if ("A2".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A2; + } + else if ("A3".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A3; + } + else if ("A4".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A4; + } + else if ("A5".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A5; + } + else if ("A6".equalsIgnoreCase(paperSize)) + { + return PDRectangle.A6; + } + else + { + return null; + } + } + /** * This will print out a message telling how to use this example. */ @@ -277,15 +398,25 @@ private void usage() StringBuilder message = new StringBuilder(); message.append("Usage: jar -jar pdfbox-app-x.y.z.jar TextToPDF [options] \n"); message.append("\nOptions:\n"); - message.append(" -standardFont : " + DEFAULT_FONT.getBaseFont() + " (default)\n"); - + message.append(" -standardFont : ").append(DEFAULT_FONT.getBaseFont()).append(" (default)\n"); + for (String std14String : std14) { - message.append(" " + std14String + "\n"); + message.append(" ").append(std14String).append("\n"); } message.append(" -ttf : The TTF font to use.\n"); - message.append(" -fontSize : default: " + DEFAULT_FONT_SIZE ); - + message.append(" -fontSize : default: ").append(DEFAULT_FONT_SIZE).append("\n"); + message.append(" -pageSize : Letter (default)\n"); + message.append(" Legal\n"); + message.append(" A0\n"); + message.append(" A1\n"); + message.append(" A2\n"); + message.append(" A3\n"); + message.append(" A4\n"); + message.append(" A5\n"); + message.append(" A6\n"); + message.append(" -landscape : sets orientation to landscape" ); + System.err.println(message.toString()); System.exit(1); } @@ -342,4 +473,44 @@ public void setFontSize(int aFontSize) { this.fontSize = aFontSize; } + + /** + * Sets page size of produced PDF. + * + * @return returns the page size (media box) + */ + public PDRectangle getMediaBox() + { + return mediaBox; + } + + /** + * Sets page size of produced PDF. + * + * @param mediaBox + */ + public void setMediaBox(PDRectangle mediaBox) + { + this.mediaBox = mediaBox; + } + + /** + * Tells the paper orientation. + * + * @return true for landscape orientation + */ + public boolean isLandscape() + { + return landscape; + } + + /** + * Sets paper orientation. + * + * @param landscape true for landscape orientation + */ + public void setLandscape(boolean landscape) + { + this.landscape = landscape; + } } diff --git a/tools/src/main/java/org/apache/pdfbox/tools/WriteDecodedDoc.java b/tools/src/main/java/org/apache/pdfbox/tools/WriteDecodedDoc.java index 15c03bde26f..920a82ad397 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/WriteDecodedDoc.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/WriteDecodedDoc.java @@ -19,7 +19,6 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; -import java.util.Iterator; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; @@ -35,7 +34,9 @@ public class WriteDecodedDoc { + @SuppressWarnings({"squid:S2068"}) private static final String PASSWORD = "-password"; + private static final String SKIPIMAGES = "-skipImages"; /** * Constructor. @@ -51,10 +52,11 @@ public WriteDecodedDoc() * @param in The filename used for input. * @param out The filename used for output. * @param password The password to open the document. + * @param skipImages Whether to skip decoding images. * * @throws IOException if the output could not be written */ - public void doIt(String in, String out, String password) + public void doIt(String in, String out, String password, boolean skipImages) throws IOException { PDDocument doc = null; @@ -62,13 +64,31 @@ public void doIt(String in, String out, String password) { doc = PDDocument.load(new File(in), password); doc.setAllSecurityToBeRemoved(true); - for (Iterator i = doc.getDocument().getObjects().iterator(); i.hasNext();) + for (COSObject cosObject : doc.getDocument().getObjects()) { - COSBase base = i.next().getObject(); + COSBase base = cosObject.getObject(); if (base instanceof COSStream) { - COSStream stream = (COSStream)base; - byte[] bytes = new PDStream(stream).toByteArray(); + COSStream stream = (COSStream) base; + if (skipImages && + COSName.XOBJECT.equals(stream.getItem(COSName.TYPE)) && + COSName.IMAGE.equals(stream.getItem(COSName.SUBTYPE))) + { + continue; + } + byte[] bytes; + try + { + bytes = new PDStream(stream).toByteArray(); + } + catch (IOException ex) + { + System.err.println("skip " + + cosObject.getObjectNumber() + " " + + cosObject.getGenerationNumber() + " obj: " + + ex.getMessage()); + continue; + } stream.removeItem(COSName.FILTER); OutputStream streamOut = stream.createOutputStream(); streamOut.write(bytes); @@ -76,6 +96,7 @@ public void doIt(String in, String out, String password) } } doc.getDocumentCatalog(); + doc.getDocument().setIsXRefStream(false); doc.save( out ); } finally @@ -89,7 +110,7 @@ public void doIt(String in, String out, String password) /** * This will write a PDF document with completely decoded streams. - *
      + *
      * see usage() for commandline * * @param args command line arguments @@ -101,9 +122,11 @@ public static void main(String[] args) throws IOException System.setProperty("apple.awt.UIElement", "true"); WriteDecodedDoc app = new WriteDecodedDoc(); + @SuppressWarnings({"squid:S2068"}) String password = ""; String pdfFile = null; String outputFile = null; + boolean skipImages = false; for( int i=0; i : Password to decrypt the document\n" + + " -skipImages : Don't uncompress images\n" + " : The PDF document to be decompressed\n" + " [outputfile] : The filename for the decompressed pdf\n"; diff --git a/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java b/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java index 4975fbec762..7e0c054d030 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java @@ -1,315 +1,444 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.tools.imageio; - -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Iterator; - -import javax.imageio.IIOImage; -import javax.imageio.ImageIO; -import javax.imageio.ImageTypeSpecifier; -import javax.imageio.ImageWriteParam; -import javax.imageio.ImageWriter; -import javax.imageio.metadata.IIOInvalidTreeException; -import javax.imageio.metadata.IIOMetadata; -import javax.imageio.metadata.IIOMetadataNode; -import javax.imageio.stream.ImageOutputStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.w3c.dom.NodeList; - -/** - * Handles some ImageIO operations. - */ -public final class ImageIOUtil -{ - /** - * Log instance - */ - private static final Log LOG = LogFactory.getLog(ImageIOUtil.class); - - private ImageIOUtil() - { - } - - /** - * Writes a buffered image to a file using the given image format. See - * {@link #writeImage(BufferedImage image, String formatName, - * OutputStream output, int dpi, float quality)} for more details. - * - * @param image the image to be written - * @param filename used to construct the filename for the individual image. - * Its suffix will be used as the image format. - * @param dpi the resolution in dpi (dots per inch) to be used in metadata - * @return true if the image file was produced, false if there was an error. - * @throws IOException if an I/O error occurs - */ - public static boolean writeImage(BufferedImage image, String filename, - int dpi) throws IOException - { - File file = new File(filename); - FileOutputStream output = new FileOutputStream(file); - try - { - String formatName = filename.substring(filename.lastIndexOf('.') + 1); - return writeImage(image, formatName, output, dpi); - } - finally - { - output.close(); - } - } - - /** - * Writes a buffered image to a file using the given image format. See - * {@link #writeImage(BufferedImage image, String formatName, - * OutputStream output, int dpi, float quality)} for more details. - * - * @param image the image to be written - * @param formatName the target format (ex. "png") which is also the suffix - * for the filename - * @param filename used to construct the filename for the individual image. - * The formatName parameter will be used as the suffix. - * @param dpi the resolution in dpi (dots per inch) to be used in metadata - * @return true if the image file was produced, false if there was an error. - * @throws IOException if an I/O error occurs - * @deprecated use - * {@link #writeImage(BufferedImage image, String filename, int dpi)}, which - * uses the full filename instead of just the prefix. - */ - @Deprecated - public static boolean writeImage(BufferedImage image, String formatName, String filename, - int dpi) throws IOException - { - File file = new File(filename + "." + formatName); - FileOutputStream output = new FileOutputStream(file); - try - { - return writeImage(image, formatName, output, dpi); - } - finally - { - output.close(); - } - } - - /** - * Writes a buffered image to a file using the given image format. See - * {@link #writeImage(BufferedImage image, String formatName, - * OutputStream output, int dpi, float quality)} for more details. - * - * @param image the image to be written - * @param formatName the target format (ex. "png") - * @param output the output stream to be used for writing - * @return true if the image file was produced, false if there was an error. - * @throws IOException if an I/O error occurs - */ - public static boolean writeImage(BufferedImage image, String formatName, OutputStream output) - throws IOException - { - return writeImage(image, formatName, output, 72); - } - - /** - * Writes a buffered image to a file using the given image format. See - * {@link #writeImage(BufferedImage image, String formatName, - * OutputStream output, int dpi, float quality)} for more details. - * - * @param image the image to be written - * @param formatName the target format (ex. "png") - * @param output the output stream to be used for writing - * @param dpi the resolution in dpi (dots per inch) to be used in metadata - * @return true if the image file was produced, false if there was an error. - * @throws IOException if an I/O error occurs - */ - public static boolean writeImage(BufferedImage image, String formatName, OutputStream output, - int dpi) throws IOException - { - return writeImage(image, formatName, output, dpi, 1.0f); - } - - /** - * Writes a buffered image to a file using the given image format. - * Compression is fixed for PNG, GIF, BMP and WBMP, dependent of the quality - * parameter for JPG, and dependent of bit count for TIFF (a bitonal image - * will be compressed with CCITT G4, a color image with LZW). Creating a - * TIFF image is only supported if the jai_imageio library is in the class - * path. - * - * @param image the image to be written - * @param formatName the target format (ex. "png") - * @param output the output stream to be used for writing - * @param dpi the resolution in dpi (dots per inch) to be used in metadata - * @param quality quality to be used when compressing the image (0 < - * quality < 1.0f) - * @return true if the image file was produced, false if there was an error. - * @throws IOException if an I/O error occurs - */ - public static boolean writeImage(BufferedImage image, String formatName, OutputStream output, - int dpi, float quality) throws IOException - { - ImageOutputStream imageOutput = null; - ImageWriter writer = null; - try - { - // find suitable image writer - Iterator writers = ImageIO.getImageWritersByFormatName(formatName); - ImageWriteParam param = null; - IIOMetadata metadata = null; - // Loop until we get the best driver, i.e. one that supports - // setting dpi in the standard metadata format; however we'd also - // accept a driver that can't, if a better one can't be found - while (writers.hasNext()) - { - if (writer != null) - { - writer.dispose(); - } - writer = writers.next(); - param = writer.getDefaultWriteParam(); - metadata = writer.getDefaultImageMetadata(new ImageTypeSpecifier(image), param); - if (metadata != null - && !metadata.isReadOnly() - && metadata.isStandardMetadataFormatSupported()) - { - break; - } - } - if (writer == null) - { - LOG.error("No ImageWriter found for '" + formatName + "' format"); - StringBuilder sb = new StringBuilder(); - String[] writerFormatNames = ImageIO.getWriterFormatNames(); - for (String fmt : writerFormatNames) - { - sb.append(fmt); - sb.append(' '); - } - LOG.error("Supported formats: " + sb); - return false; - } - - // compression - if (param != null && param.canWriteCompressed()) - { - param.setCompressionMode(ImageWriteParam.MODE_EXPLICIT); - if (formatName.toLowerCase().startsWith("tif")) - { - // TIFF compression - TIFFUtil.setCompressionType(param, image); - } - else - { - param.setCompressionType(param.getCompressionTypes()[0]); - param.setCompressionQuality(quality); - } - } - - if (formatName.toLowerCase().startsWith("tif")) - { - // TIFF metadata - TIFFUtil.updateMetadata(metadata, image, dpi); - } - else if ("jpeg".equals(formatName.toLowerCase()) - || "jpg".equals(formatName.toLowerCase())) - { - // This segment must be run before other meta operations, - // or else "IIOInvalidTreeException: Invalid node: app0JFIF" - // The other (general) "meta" methods may not be used, because - // this will break the reading of the meta data in tests - JPEGUtil.updateMetadata(metadata, dpi); - } - else - { - // write metadata is possible - if (metadata != null - && !metadata.isReadOnly() - && metadata.isStandardMetadataFormatSupported()) - { - setDPI(metadata, dpi, formatName); - } - } - - // write - imageOutput = ImageIO.createImageOutputStream(output); - writer.setOutput(imageOutput); - writer.write(null, new IIOImage(image, null, metadata), param); - } - finally - { - if (writer != null) - { - writer.dispose(); - } - if (imageOutput != null) - { - imageOutput.close(); - } - } - return true; - } - - /** - * Gets the named child node, or creates and attaches it. - * - * @param parentNode the parent node - * @param name name of the child node - * - * @return the existing or just created child node - */ - private static IIOMetadataNode getOrCreateChildNode(IIOMetadataNode parentNode, String name) - { - NodeList nodeList = parentNode.getElementsByTagName(name); - if (nodeList != null && nodeList.getLength() > 0) - { - return (IIOMetadataNode) nodeList.item(0); - } - IIOMetadataNode childNode = new IIOMetadataNode(name); - parentNode.appendChild(childNode); - return childNode; - } - - // sets the DPI metadata - private static void setDPI(IIOMetadata metadata, int dpi, String formatName) - throws IIOInvalidTreeException - { - IIOMetadataNode root = (IIOMetadataNode) metadata.getAsTree(MetaUtil.STANDARD_METADATA_FORMAT); - - IIOMetadataNode dimension = getOrCreateChildNode(root, "Dimension"); - - // PNG writer doesn't conform to the spec which is - // "The width of a pixel, in millimeters" - // but instead counts the pixels per millimeter - float res = "PNG".equals(formatName.toUpperCase()) - ? dpi / 25.4f - : 25.4f / dpi; - - IIOMetadataNode child; - - child = getOrCreateChildNode(dimension, "HorizontalPixelSize"); - child.setAttribute("value", Double.toString(res)); - - child = getOrCreateChildNode(dimension, "VerticalPixelSize"); - child.setAttribute("value", Double.toString(res)); - - metadata.mergeTree(MetaUtil.STANDARD_METADATA_FORMAT, root); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.tools.imageio; + +import java.awt.color.ColorSpace; +import java.awt.color.ICC_ColorSpace; +import java.awt.color.ICC_Profile; +import java.awt.image.BufferedImage; + +import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.zip.DeflaterOutputStream; + +import javax.imageio.IIOImage; +import javax.imageio.ImageIO; +import javax.imageio.ImageTypeSpecifier; +import javax.imageio.ImageWriteParam; +import javax.imageio.ImageWriter; +import javax.imageio.metadata.IIOInvalidTreeException; +import javax.imageio.metadata.IIOMetadata; +import javax.imageio.metadata.IIOMetadataNode; +import javax.imageio.stream.ImageOutputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +/** + * Handles some ImageIO operations. + */ +public final class ImageIOUtil +{ + /** + * Log instance + */ + private static final Log LOG = LogFactory.getLog(ImageIOUtil.class); + + private ImageIOUtil() + { + } + + /** + * Writes a buffered image to a file using the given image format. The compression is set for + * maximum compression for PNG and maximum quality for all other file formats. See + * {@link #writeImage(BufferedImage image, String formatName, OutputStream output, int dpi, float compressionQuality)} + * for more details. + * + * @param image the image to be written + * @param filename used to construct the filename for the individual image. + * Its suffix will be used as the image format. + * @param dpi the resolution in dpi (dots per inch) to be used in metadata + * @return true if the image file was produced, false if there was an error. + * @throws IOException if an I/O error occurs + */ + public static boolean writeImage(BufferedImage image, String filename, + int dpi) throws IOException + { + float compressionQuality = 1f; + String formatName = filename.substring(filename.lastIndexOf('.') + 1); + if ("png".equalsIgnoreCase(formatName)) + { + // PDFBOX-4655: prevent huge PNG files on jdk11 / jdk12 / jjdk13 + compressionQuality = 0f; + } + return writeImage(image, filename, dpi, compressionQuality); + } + + /** + * Writes a buffered image to a file using the given image format. + * See {@link #writeImage(BufferedImage image, String formatName, + * OutputStream output, int dpi, float compressionQuality)} for more details. + * + * @param image the image to be written + * @param filename used to construct the filename for the individual image. Its suffix will be + * used as the image format. + * @param dpi the resolution in dpi (dots per inch) to be used in metadata + * @param compressionQuality quality to be used when compressing the image (0 < + * compressionQuality < 1.0f). See {@link ImageWriteParam#setCompressionQuality(float)} for + * more details. + * @return true if the image file was produced, false if there was an error. + * @throws IOException if an I/O error occurs + */ + public static boolean writeImage(BufferedImage image, String filename, + int dpi, float compressionQuality) throws IOException + { + OutputStream output = new BufferedOutputStream(new FileOutputStream(filename)); + try + { + String formatName = filename.substring(filename.lastIndexOf('.') + 1); + return writeImage(image, formatName, output, dpi, compressionQuality); + } + finally + { + output.close(); + } + } + + /** + * Writes a buffered image to a file using the given image format. See + * {@link #writeImage(BufferedImage image, String formatName, + * OutputStream output, int dpi, float compressionQuality)} for more details. + * + * @param image the image to be written + * @param formatName the target format (ex. "png") which is also the suffix + * for the filename + * @param filename used to construct the filename for the individual image. + * The formatName parameter will be used as the suffix. + * @param dpi the resolution in dpi (dots per inch) to be used in metadata + * @return true if the image file was produced, false if there was an error. + * @throws IOException if an I/O error occurs + * @deprecated use + * {@link #writeImage(BufferedImage image, String filename, int dpi)}, which + * uses the full filename instead of just the prefix. + */ + @Deprecated + public static boolean writeImage(BufferedImage image, String formatName, String filename, + int dpi) throws IOException + { + OutputStream output = new BufferedOutputStream(new FileOutputStream(filename + "." + formatName)); + try + { + return writeImage(image, formatName, output, dpi); + } + finally + { + output.close(); + } + } + + /** + * Writes a buffered image to a file using the given image format. The compression is set for + * maximum compression for PNG and maximum quality for all other file formats. See + * {@link #writeImage(BufferedImage image, String formatName, OutputStream output, int dpi, float compressionQuality)} + * for more details. + * + * @param image the image to be written + * @param formatName the target format (ex. "png") + * @param output the output stream to be used for writing + * @return true if the image file was produced, false if there was an error. + * @throws IOException if an I/O error occurs + */ + public static boolean writeImage(BufferedImage image, String formatName, OutputStream output) + throws IOException + { + return writeImage(image, formatName, output, 72); + } + + /** + * Writes a buffered image to a file using the given image format. The compression is set for + * maximum compression for PNG and maximum quality for all other file formats. See + * {@link #writeImage(BufferedImage image, String formatName, OutputStream output, int dpi, float compressionQuality)} + * for more details. + * + * @param image the image to be written + * @param formatName the target format (ex. "png") + * @param output the output stream to be used for writing + * @param dpi the resolution in dpi (dots per inch) to be used in metadata + * @return true if the image file was produced, false if there was an error. + * @throws IOException if an I/O error occurs + */ + public static boolean writeImage(BufferedImage image, String formatName, OutputStream output, + int dpi) throws IOException + { + float compressionQuality = 1f; + if ("png".equalsIgnoreCase(formatName)) + { + // PDFBOX-4655: prevent huge PNG files on jdk11 / jdk12 / jjdk13 + compressionQuality = 0f; + } + return writeImage(image, formatName, output, dpi, compressionQuality); + } + + /** + * Writes a buffered image to a file using the given image format. + * Compression is fixed for PNG, GIF, BMP and WBMP, dependent of the compressionQuality + * parameter for JPG, and dependent of bit count for TIFF (a bitonal image + * will be compressed with CCITT G4, a color image with LZW). Creating a + * TIFF image is only supported if the jai_imageio library (or equivalent) + * is in the class path. + * + * @param image the image to be written + * @param formatName the target format (ex. "png") + * @param output the output stream to be used for writing + * @param dpi the resolution in dpi (dots per inch) to be used in metadata + * @param compressionQuality quality to be used when compressing the image (0 < + * compressionQuality < 1.0f). See {@link ImageWriteParam#setCompressionQuality(float)} for + * more details. + * @return true if the image file was produced, false if there was an error. + * @throws IOException if an I/O error occurs + */ + public static boolean writeImage(BufferedImage image, String formatName, OutputStream output, + int dpi, float compressionQuality) throws IOException + { + return writeImage(image, formatName, output, dpi, compressionQuality, ""); + } + + /** + * Writes a buffered image to a file using the given image format. + * Compression is fixed for PNG, GIF, BMP and WBMP, dependent of the compressionQuality + * parameter for JPG, and dependent of bit count for TIFF (a bitonal image + * will be compressed with CCITT G4, a color image with LZW). Creating a + * TIFF image is only supported if the jai_imageio library is in the class + * path. + * + * @param image the image to be written + * @param formatName the target format (ex. "png") + * @param output the output stream to be used for writing + * @param dpi the resolution in dpi (dots per inch) to be used in metadata + * @param compressionQuality quality to be used when compressing the image (0 < + * compressionQuality < 1.0f). See {@link ImageWriteParam#setCompressionQuality(float)} for + * more details. + * @param compressionType Advanced users only, and only relevant for TIFF + * files: If null, save uncompressed; if empty string, use logic explained + * above; other valid values are found in the javadoc of + * TIFFImageWriteParam. + * @return true if the image file was produced, false if there was an error. + * @throws IOException if an I/O error occurs + */ + public static boolean writeImage(BufferedImage image, String formatName, OutputStream output, + int dpi, float compressionQuality, String compressionType) throws IOException + { + ImageOutputStream imageOutput = null; + ImageWriter writer = null; + try + { + // find suitable image writer + Iterator writers = ImageIO.getImageWritersByFormatName(formatName); + ImageWriteParam param = null; + IIOMetadata metadata = null; + // Loop until we get the best driver, i.e. one that supports + // setting dpi in the standard metadata format; however we'd also + // accept a driver that can't, if a better one can't be found + while (writers.hasNext()) + { + if (writer != null) + { + writer.dispose(); + } + writer = writers.next(); + if (writer == null) + { + continue; + } + param = writer.getDefaultWriteParam(); + metadata = writer.getDefaultImageMetadata(new ImageTypeSpecifier(image), param); + if (metadata != null + && !metadata.isReadOnly() + && metadata.isStandardMetadataFormatSupported()) + { + break; + } + } + if (writer == null) + { + LOG.error("No ImageWriter found for '" + formatName + "' format"); + LOG.error("Supported formats: " + Arrays.toString(ImageIO.getWriterFormatNames())); + return false; + } + + boolean isTifFormat = formatName.toLowerCase().startsWith("tif"); + + // compression + if (param != null && param.canWriteCompressed()) + { + param.setCompressionMode(ImageWriteParam.MODE_EXPLICIT); + if (isTifFormat) + { + if ("".equals(compressionType)) + { + // default logic + TIFFUtil.setCompressionType(param, image); + } + else + { + param.setCompressionType(compressionType); + if (compressionType != null) + { + param.setCompressionQuality(compressionQuality); + } + } + } + else + { + param.setCompressionType(param.getCompressionTypes()[0]); + param.setCompressionQuality(compressionQuality); + } + } + + if (metadata != null) + { + if (isTifFormat) + { + // TIFF metadata + TIFFUtil.updateMetadata(metadata, image, dpi); + } + else if ("jpeg".equalsIgnoreCase(formatName) || "jpg".equalsIgnoreCase(formatName)) + { + // This segment must be run before other meta operations, + // or else "IIOInvalidTreeException: Invalid node: app0JFIF" + // The other (general) "meta" methods may not be used, because + // this will break the reading of the meta data in tests + JPEGUtil.updateMetadata(metadata, dpi); + } + else + { + // write metadata is possible + if (!metadata.isReadOnly() && metadata.isStandardMetadataFormatSupported()) + { + setDPI(metadata, dpi, formatName); + } + } + } + + if (metadata != null && formatName.equalsIgnoreCase("png") && hasICCProfile(image)) + { + // add ICC profile + IIOMetadataNode iccp = new IIOMetadataNode("iCCP"); + ICC_Profile profile = ((ICC_ColorSpace) image.getColorModel().getColorSpace()) + .getProfile(); + iccp.setUserObject(getAsDeflatedBytes(profile)); + iccp.setAttribute("profileName", "unknown"); + iccp.setAttribute("compressionMethod", "deflate"); + Node nativeTree = metadata.getAsTree(metadata.getNativeMetadataFormatName()); + nativeTree.appendChild(iccp); + metadata.mergeTree(metadata.getNativeMetadataFormatName(), nativeTree); + } + + // write + imageOutput = ImageIO.createImageOutputStream(output); + writer.setOutput(imageOutput); + writer.write(null, new IIOImage(image, null, metadata), param); + } + finally + { + if (writer != null) + { + writer.dispose(); + } + if (imageOutput != null) + { + imageOutput.close(); + } + } + return true; + } + + /** + * Determine if the given image has a ICC profile that should be embedded. + * @param image the image to analyse + * @return true if this image has an ICC profile, that is different from sRGB. + */ + private static boolean hasICCProfile(BufferedImage image) + { + ColorSpace colorSpace = image.getColorModel().getColorSpace(); + // We can only export ICC color spaces + if (!(colorSpace instanceof ICC_ColorSpace)) + { + return false; + } + + // The colorspace should not be sRGB and not be the builtin gray colorspace + return !colorSpace.isCS_sRGB() && colorSpace != ColorSpace.getInstance(ColorSpace.CS_GRAY); + } + + private static byte[] getAsDeflatedBytes(ICC_Profile profile) throws IOException + { + byte[] data = profile.getData(); + + ByteArrayOutputStream deflated = new ByteArrayOutputStream(); + DeflaterOutputStream deflater = new DeflaterOutputStream(deflated); + deflater.write(data); + deflater.close(); + + return deflated.toByteArray(); + } + + /** + * Gets the named child node, or creates and attaches it. + * + * @param parentNode the parent node + * @param name name of the child node + * + * @return the existing or just created child node + */ + private static IIOMetadataNode getOrCreateChildNode(IIOMetadataNode parentNode, String name) + { + NodeList nodeList = parentNode.getElementsByTagName(name); + if (nodeList.getLength() > 0) + { + return (IIOMetadataNode) nodeList.item(0); + } + IIOMetadataNode childNode = new IIOMetadataNode(name); + parentNode.appendChild(childNode); + return childNode; + } + + // sets the DPI metadata + private static void setDPI(IIOMetadata metadata, int dpi, String formatName) + throws IIOInvalidTreeException + { + IIOMetadataNode root = (IIOMetadataNode) metadata.getAsTree(MetaUtil.STANDARD_METADATA_FORMAT); + + IIOMetadataNode dimension = getOrCreateChildNode(root, "Dimension"); + + // PNG writer doesn't conform to the spec which is + // "The width of a pixel, in millimeters" + // but instead counts the pixels per millimeter + float res = "PNG".equalsIgnoreCase(formatName) + ? dpi / 25.4f + : 25.4f / dpi; + + IIOMetadataNode child; + + child = getOrCreateChildNode(dimension, "HorizontalPixelSize"); + child.setAttribute("value", Double.toString(res)); + + child = getOrCreateChildNode(dimension, "VerticalPixelSize"); + child.setAttribute("value", Double.toString(res)); + + metadata.mergeTree(MetaUtil.STANDARD_METADATA_FORMAT, root); + } +} diff --git a/tools/src/main/java/org/apache/pdfbox/tools/imageio/MetaUtil.java b/tools/src/main/java/org/apache/pdfbox/tools/imageio/MetaUtil.java index 07fdfbddb72..a18ee8b7a37 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/imageio/MetaUtil.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/imageio/MetaUtil.java @@ -19,6 +19,7 @@ import java.io.StringWriter; import javax.imageio.metadata.IIOMetadata; import javax.imageio.metadata.IIOMetadataNode; +import javax.xml.XMLConstants; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -59,7 +60,9 @@ static void debugLogMetadata(IIOMetadata metadata, String format) { StringWriter xmlStringWriter = new StringWriter(); StreamResult streamResult = new StreamResult(xmlStringWriter); - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + Transformer transformer = transformerFactory.newTransformer(); // see http://stackoverflow.com/a/1264872/535646 transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); diff --git a/tools/src/main/java/org/apache/pdfbox/tools/imageio/TIFFUtil.java b/tools/src/main/java/org/apache/pdfbox/tools/imageio/TIFFUtil.java index 3753f9aa5fd..3bca5d3cded 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/imageio/TIFFUtil.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/imageio/TIFFUtil.java @@ -24,7 +24,6 @@ import javax.imageio.metadata.IIOMetadata; import javax.imageio.metadata.IIOMetadataNode; import java.awt.image.BufferedImage; -import static org.apache.pdfbox.tools.imageio.MetaUtil.SUN_TIFF_FORMAT; import static org.apache.pdfbox.tools.imageio.MetaUtil.debugLogMetadata; /** @@ -35,26 +34,10 @@ final class TIFFUtil { private static final Log LOG = LogFactory.getLog(TIFFUtil.class); - private static String tagSetClassName = "com.sun.media.imageio.plugins.tiff.BaselineTIFFTagSet"; - private TIFFUtil() { } - static - { - try - { - String alternateClassName = "com.github.jaiimageio.plugins.tiff.BaselineTIFFTagSet"; - Class.forName(alternateClassName); - tagSetClassName = alternateClassName; - } - catch (ClassNotFoundException ex) - { - // ignore - } - } - /** * Sets the ImageIO parameter compression type based on the given image. * @param image buffered image used to decide compression type @@ -76,12 +59,14 @@ public static void setCompressionType(ImageWriteParam param, BufferedImage image } /** - * Updates the given ImageIO metadata with Sun's custom TIFF tags. - * {@see https://svn.apache.org/repos/asf/xmlgraphics/commons/tags/commons-1_3_1/src/java/org/ - * apache/xmlgraphics/image/writer/imageio/ImageIOTIFFImageWriter.java} - * {@see http://download.java.net/media/jai-imageio/javadoc/1.0_01/com/sun/media/imageio/ - * plugins/tiff/package-summary.html} - * {@see http://partners.adobe.com/public/developer/tiff/index.html} + * Updates the given ImageIO metadata with Sun's custom TIFF tags, as described in + * the org.apache.xmlgraphics.image.writer.imageio.ImageIOTIFFImageWriter + * sources, + * the com.sun.media.imageio.plugins.tiff + * package javadoc + * and the TIFF + * specification. + * * @param image buffered image which will be written * @param metadata ImageIO metadata * @param dpi image dots per inch @@ -90,20 +75,20 @@ public static void setCompressionType(ImageWriteParam param, BufferedImage image static void updateMetadata(IIOMetadata metadata, BufferedImage image, int dpi) throws IIOInvalidTreeException { - debugLogMetadata(metadata, SUN_TIFF_FORMAT); - - if (!SUN_TIFF_FORMAT.equals(metadata.getNativeMetadataFormatName())) + String metaDataFormat = metadata.getNativeMetadataFormatName(); + if (metaDataFormat == null) { - LOG.debug("Using unknown TIFF image writer: " + metadata.getNativeMetadataFormatName()); + LOG.debug("TIFF image writer doesn't support any data format"); return; } - IIOMetadataNode root = new IIOMetadataNode(SUN_TIFF_FORMAT); + debugLogMetadata(metadata, metaDataFormat); + + IIOMetadataNode root = new IIOMetadataNode(metaDataFormat); IIOMetadataNode ifd; if (root.getElementsByTagName("TIFFIFD").getLength() == 0) { ifd = new IIOMetadataNode("TIFFIFD"); - ifd.setAttribute("tagSets", tagSetClassName); root.appendChild(ifd); } else @@ -127,9 +112,9 @@ static void updateMetadata(IIOMetadata metadata, BufferedImage image, int dpi) ifd.appendChild(createShortField(262, "PhotometricInterpretation", 0)); } - metadata.mergeTree(SUN_TIFF_FORMAT, root); + metadata.mergeTree(metaDataFormat, root); - debugLogMetadata(metadata, SUN_TIFF_FORMAT); + debugLogMetadata(metadata, metaDataFormat); } private static IIOMetadataNode createShortField(int tiffTagNumber, String name, int val) diff --git a/tools/src/test/java/org/apache/pdfbox/tools/imageio/TestImageIOUtils.java b/tools/src/test/java/org/apache/pdfbox/tools/imageio/TestImageIOUtils.java index 93d4c4b4b62..681fbd6e5c3 100644 --- a/tools/src/test/java/org/apache/pdfbox/tools/imageio/TestImageIOUtils.java +++ b/tools/src/test/java/org/apache/pdfbox/tools/imageio/TestImageIOUtils.java @@ -24,8 +24,10 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.FilenameFilter; import java.io.IOException; +import java.io.OutputStream; import java.util.HashSet; import java.util.Iterator; import java.util.Set; @@ -49,6 +51,7 @@ import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.util.filetypedetector.FileType; import org.apache.pdfbox.util.filetypedetector.FileTypeDetector; +import org.junit.Assert; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -85,6 +88,11 @@ void checkSaveResources(PDResources resources) throws IOException { suffix = "JPEG2000"; } + if ("jb2".equals(suffix)) + { + // jbig2 usually not available + suffix = "PNG"; + } boolean writeOK = ImageIOUtil.writeImage(imageObject.getImage(), suffix, new ByteArrayOutputStream()); assertTrue(writeOK); @@ -107,7 +115,6 @@ else if (xobject instanceof PDFormXObject) private void doTestFile(File file, String outDir) throws IOException { PDDocument document = null; - String imageType = "png"; LOG.info("Preparing to convert " + file.getName()); try { @@ -118,43 +125,55 @@ private void doTestFile(File file, String outDir) throws IOException checkSaveResources(document.getPage(0).getResources()); // testing PNG - writeImage(document, imageType, outDir + file.getName() + "-", ImageType.RGB, dpi); + String imageType = "png"; + writeImage(document, imageType, outDir + file.getName() + "-", ImageType.RGB, dpi, 0, ""); checkResolution(outDir + file.getName() + "-1." + imageType, (int) dpi); checkFileTypeByContent(outDir + file.getName() + "-1." + imageType, FileType.PNG); // testing JPG/JPEG imageType = "jpg"; - writeImage(document, imageType, outDir + file.getName() + "-", ImageType.RGB, dpi); + writeImage(document, imageType, outDir + file.getName() + "-", ImageType.RGB, dpi, 0.5f, ""); checkResolution(outDir + file.getName() + "-1." + imageType, (int) dpi); checkFileTypeByContent(outDir + file.getName() + "-1." + imageType, FileType.JPEG); // testing BMP imageType = "bmp"; - writeImage(document, imageType, outDir + file.getName() + "-", ImageType.RGB, dpi); + writeImage(document, imageType, outDir + file.getName() + "-", ImageType.RGB, dpi, 1, ""); checkResolution(outDir + file.getName() + "-1." + imageType, (int) dpi); checkFileTypeByContent(outDir + file.getName() + "-1." + imageType, FileType.BMP); // testing GIF imageType = "gif"; - writeImage(document, imageType, outDir + file.getName() + "-", ImageType.RGB, dpi); - // no META data posible for GIF, thus no dpi test + writeImage(document, imageType, outDir + file.getName() + "-", ImageType.RGB, dpi, 1, ""); + // no META data possible for GIF, thus no dpi test checkFileTypeByContent(outDir + file.getName() + "-1." + imageType, FileType.GIF); // testing WBMP imageType = "wbmp"; - writeImage(document, imageType, outDir + file.getName() + "-", ImageType.BINARY, dpi); - // no META data posible for WBMP, thus no dpi test + writeImage(document, imageType, outDir + file.getName() + "-", ImageType.BINARY, dpi, 1, ""); + // no META data possible for WBMP, thus no dpi test // testing TIFF imageType = "tif"; - writeImage(document, imageType, outDir + file.getName() + "-bw-", ImageType.BINARY, dpi); + writeImage(document, imageType, outDir + file.getName() + "-bw-", ImageType.BINARY, dpi, 1, ""); checkResolution(outDir + file.getName() + "-bw-1." + imageType, (int) dpi); checkTiffCompression(outDir + file.getName() + "-bw-1." + imageType, "CCITT T.6"); checkFileTypeByContent(outDir + file.getName() + "-bw-1." + imageType, FileType.TIFF); - writeImage(document, imageType, outDir + file.getName() + "-co-", ImageType.RGB, dpi); - checkResolution(outDir + file.getName() + "-co-1." + imageType, (int) dpi); - checkTiffCompression(outDir + file.getName() + "-co-1." + imageType, "LZW"); - checkFileTypeByContent(outDir + file.getName() + "-co-1." + imageType, FileType.TIFF); + + writeImage(document, imageType, outDir + file.getName() + "-coLZW-", ImageType.RGB, dpi, 1, ""); + checkResolution(outDir + file.getName() + "-coLZW-1." + imageType, (int) dpi); + checkTiffCompression(outDir + file.getName() + "-coLZW-1." + imageType, "LZW"); + checkFileTypeByContent(outDir + file.getName() + "-coLZW-1." + imageType, FileType.TIFF); + + writeImage(document, imageType, outDir + file.getName() + "-coJPEG-", ImageType.RGB, dpi, 0.5f, "JPEG"); + checkResolution(outDir + file.getName() + "-coJPEG-1." + imageType, (int) dpi); + checkTiffCompression(outDir + file.getName() + "-coJPEG-1." + imageType, "JPEG"); + checkFileTypeByContent(outDir + file.getName() + "-coJPEG-1." + imageType, FileType.TIFF); + + writeImage(document, imageType, outDir + file.getName() + "-coNone-", ImageType.RGB, dpi, 1, null); + checkResolution(outDir + file.getName() + "-coNone-1." + imageType, (int) dpi); + checkTiffCompression(outDir + file.getName() + "-coNone-1." + imageType, "None"); + checkFileTypeByContent(outDir + file.getName() + "-coNone-1." + imageType, FileType.TIFF); } finally { @@ -231,16 +250,19 @@ private void checkNotBlank(String filename, BufferedImage newImage) } private void writeImage(PDDocument document, String imageFormat, String outputPrefix, - ImageType imageType, float dpi) throws IOException + ImageType imageType, float dpi, float compressionQuality, + String compressionType) throws IOException { PDFRenderer renderer = new PDFRenderer(document); BufferedImage image = renderer.renderImageWithDPI(0, dpi, imageType); String fileName = outputPrefix + 1; LOG.info("Writing: " + fileName + "." + imageFormat); System.out.println(" " + fileName + "." + imageFormat); // for Maven (keep me!) - boolean res = ImageIOUtil.writeImage(image, fileName + "." + imageFormat, Math.round(dpi)); + OutputStream os = new FileOutputStream(fileName + "." + imageFormat); + boolean res = ImageIOUtil.writeImage(image, imageFormat, os, Math.round(dpi), compressionQuality, compressionType); + os.close(); assertTrue("ImageIOUtil.writeImage() failed for file " + fileName, res); - if ("jpg".equals(imageFormat) || "gif".equals(imageFormat)) + if ("jpg".equals(imageFormat) || "gif".equals(imageFormat) || "JPEG".equals(compressionType)) { // jpeg is lossy, gif has 256 colors, // so we can't check for content identity @@ -296,9 +318,9 @@ public boolean accept(File dir, String name) private void checkResolution(String filename, int expectedResolution) throws IOException { - assertFalse("Empty file " + filename, new File(filename).length() == 0); + Assert.assertNotEquals("Empty file " + filename, 0, new File(filename).length()); String suffix = filename.substring(filename.lastIndexOf('.') + 1); - if ("BMP".equals(suffix.toUpperCase())) + if ("BMP".equalsIgnoreCase(suffix)) { // BMP reader doesn't work checkBmpResolution(filename, expectedResolution); diff --git a/xmpbox/pom.xml b/xmpbox/pom.xml index 401bdaf7875..40580021ab3 100644 --- a/xmpbox/pom.xml +++ b/xmpbox/pom.xml @@ -27,10 +27,24 @@ org.apache.pdfbox pdfbox-parent - 2.0.0-SNAPSHOT + 2.0.25-SNAPSHOT ../parent/pom.xml + + + + [11,) + + + + javax.xml.bind + jaxb-api + provided + + + + @@ -58,7 +72,27 @@ ../pdfbox-checkstyle-5.xml - + + + + maven-surefire-plugin + + ${addmod} + + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.xmpbox + + + + + + diff --git a/xmpbox/src/main/java/org/apache/xmpbox/DateConverter.java b/xmpbox/src/main/java/org/apache/xmpbox/DateConverter.java index cca75ff9c9f..2ce34718359 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/DateConverter.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/DateConverter.java @@ -27,6 +27,7 @@ import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; +import java.util.Locale; import java.util.SimpleTimeZone; import java.util.TimeZone; import java.util.regex.Matcher; @@ -66,7 +67,7 @@ public final class DateConverter */ private DateConverter() { - }; + } /** * This will convert a string to a calendar. @@ -132,16 +133,9 @@ else if (date.startsWith("D:")) int timeZonePos = 12; if (date.length() - 12 > 5 || (date.length() - 12 == 3 && date.endsWith("Z"))) { - if (date.length() >= 14) - { - second = Integer.parseInt(date.substring(12, 14)); - } + second = Integer.parseInt(date.substring(12, 14)); timeZonePos = 14; } - else - { - second = 0; - } if (date.length() >= (timeZonePos + 1)) { @@ -190,6 +184,7 @@ else if (date.startsWith("D:")) } else { + updateZoneId(zone); retval = new GregorianCalendar(zone); } retval.clear(); @@ -204,7 +199,7 @@ else if (date.startsWith("D:")) && (date.substring(date.length() - 6, date.length() - 5).equals("+") || date.substring( date.length() - 6, date.length() - 5).equals("-"))) { - // thats a timezone string, remove the : + // that's a timezone string, remove the : date = date.substring(0, date.length() - 3) + date.substring(date.length() - 2); } for (int i = 0; (retval == null) && (i < POTENTIAL_FORMATS.length); i++) @@ -229,7 +224,43 @@ else if (date.startsWith("D:")) } return retval; } - + + /** + * Update the zone ID based on the raw offset. This is either GMT, GMT+hh:mm or GMT-hh:mm, where + * n is between 1 and 14. The highest negative hour is -14, the highest positive hour is 12. + * Zones that don't fit in this schema are set to zone ID "unknown". + * + * @param tz the time zone to update. + */ + private static void updateZoneId(TimeZone tz) + { + int offset = tz.getRawOffset(); + char pm = '+'; + if (offset < 0) + { + pm = '-'; + offset = -offset; + } + int hh = offset / 3600000; + int mm = offset % 3600000 / 60000; + if (offset == 0) + { + tz.setID("GMT"); + } + else if (pm == '+' && hh <= 12) + { + tz.setID(String.format(Locale.US, "GMT+%02d:%02d", hh, mm)); + } + else if (pm == '-' && hh <= 14) + { + tz.setID(String.format(Locale.US, "GMT-%02d:%02d", hh, mm)); + } + else + { + tz.setID("unknown"); + } + } + /** * Convert the date to iso 8601 string format. * @@ -251,51 +282,50 @@ public static String toISO8601(Calendar cal) */ public static String toISO8601(Calendar cal, boolean printMillis) { - StringBuffer retval = new StringBuffer(); + StringBuilder retval = new StringBuilder(); retval.append(cal.get(Calendar.YEAR)); - retval.append("-"); - retval.append(String.format("%02d", cal.get(Calendar.MONTH) + 1)); - retval.append("-"); - retval.append(String.format("%02d", cal.get(Calendar.DAY_OF_MONTH))); - retval.append("T"); - retval.append(String.format("%02d", cal.get(Calendar.HOUR_OF_DAY))); - retval.append(":"); - retval.append(String.format("%02d", cal.get(Calendar.MINUTE))); - retval.append(":"); - retval.append(String.format("%02d", cal.get(Calendar.SECOND))); + retval.append('-'); + retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.MONTH) + 1)); + retval.append('-'); + retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.DAY_OF_MONTH))); + retval.append('T'); + retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.HOUR_OF_DAY))); + retval.append(':'); + retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.MINUTE))); + retval.append(':'); + retval.append(String.format(Locale.US, "%02d", cal.get(Calendar.SECOND))); if (printMillis) { - retval.append("."); - retval.append(String.format("%03d", cal.get(Calendar.MILLISECOND))); + retval.append('.'); + retval.append(String.format(Locale.US, "%03d", cal.get(Calendar.MILLISECOND))); } int timeZone = cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET); if (timeZone < 0) { - retval.append("-"); + retval.append('-'); } else { - retval.append("+"); + retval.append('+'); } timeZone = Math.abs(timeZone); - // milliseconds/1000 = seconds = seconds / 60 = minutes = minutes/60 = - // hours + // milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours int hours = timeZone / 1000 / 60 / 60; - int minutes = (timeZone - (hours * 1000 * 60 * 60)) / 1000 / 1000; + int minutes = (timeZone - (hours * 1000 * 60 * 60)) / 1000 / 60; if (hours < 10) { - retval.append("0"); + retval.append('0'); } - retval.append(Integer.toString(hours)); - retval.append(":"); + retval.append(hours); + retval.append(':'); if (minutes < 10) { - retval.append("0"); + retval.append('0'); } - retval.append(Integer.toString(minutes)); + retval.append(minutes); return retval.toString(); } @@ -328,20 +358,23 @@ private static Calendar fromISO8601(String dateString) if (timeZoneString != null) { - - Calendar cal = javax.xml.bind.DatatypeConverter.parseDateTime( - dateString.substring(0, dateString.indexOf(timeZoneString)) - ); - + // can't use parseDateTime immediately, first do handling for time that has no seconds + int teeIndex = dateString.indexOf('T'); + int tzIndex = dateString.indexOf(timeZoneString); + String toParse = dateString.substring(0, tzIndex); + if (tzIndex - teeIndex == 6) + { + toParse = dateString.substring(0, tzIndex) + ":00"; + } + Calendar cal = javax.xml.bind.DatatypeConverter.parseDateTime(toParse); + TimeZone z = TimeZone.getTimeZone(timeZoneString); - cal.setTimeZone(z); - + cal.setTimeZone(z); return cal; } else { - // can't use parseDateTime immediately, - // first do handling for time that has no seconds + // can't use parseDateTime immediately, first do handling for time that has no seconds int teeIndex = dateString.indexOf('T'); if (teeIndex == -1) { diff --git a/xmpbox/src/main/java/org/apache/xmpbox/XMPMetadata.java b/xmpbox/src/main/java/org/apache/xmpbox/XMPMetadata.java index 238b370044f..496e1984cc8 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/XMPMetadata.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/XMPMetadata.java @@ -66,7 +66,7 @@ public class XMPMetadata private TypeMapping typeMapping; /** - * Contructor of an empty default XMPMetaData. + * Constructor of an empty default XMPMetaData. * */ protected XMPMetadata() @@ -176,10 +176,9 @@ public String getXpacketId() public List getAllSchemas() { List schem = new ArrayList(); - Iterator it = schemas.iterator(); - while (it.hasNext()) + for (XMPSchema schema : schemas) { - schem.add(it.next()); + schem.add(schema); } return schem; } @@ -207,10 +206,10 @@ public String getEndXPacket() /** * Get the XMPSchema for the specified namespace. * - * Return the schema corresponding to this nsURI
      + * Return the schema corresponding to this nsURI
      * BE CAREFUL: typically, Metadata should contain one schema for each type. * This method returns the first schema encountered - * corresponding to this NSURI.
      + * corresponding to this NSURI.
      * Return null if unknown * * @param nsURI The namespace URI corresponding to the schema wanted @@ -234,10 +233,10 @@ public XMPSchema getSchema(String nsURI) /** * Get the XMPSchema for the specified Class. * - * Return the schema corresponding to this Class
      + * Return the schema corresponding to this Class
      * BE CAREFUL: typically, Metadata should contain one schema for each type. * This method returns the first schema encountered - * corresponding to this Class.
      + * corresponding to this Class.
      * Return null if unknown * * @param clz The Class corresponding to the schema wanted diff --git a/xmpbox/src/main/java/org/apache/xmpbox/schema/DublinCoreSchema.java b/xmpbox/src/main/java/org/apache/xmpbox/schema/DublinCoreSchema.java index 65c58285925..c787d24eba0 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/schema/DublinCoreSchema.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/schema/DublinCoreSchema.java @@ -151,7 +151,7 @@ public void setCoverageProperty(TextType text) } /** - * set the autor(s) of the resource + * set the author(s) of the resource * * @param properName Value to add */ @@ -300,7 +300,7 @@ public void addRights(String lang, String value) } /** - * Set the unique identifer of the work from which this resource was derived + * Set the unique identifier of the work from which this resource was derived * * @param text * Value to set @@ -311,7 +311,7 @@ public void setSource(String text) } /** - * Set the unique identifer of the work from which this resource was derived + * Set the unique identifier of the work from which this resource was derived * * @param text * Property to set @@ -322,7 +322,7 @@ public void setSourceProperty(TextType text) } /** - * Set the unique identifer of the work from which this resource was derived + * Set the unique identifier of the work from which this resource was derived * * @param text * Property to set @@ -502,7 +502,7 @@ public List getDescriptionLanguages() * * @param lang * The language wanted - * @return Desription value for specified language + * @return Description value for specified language */ public String getDescription(String lang) { diff --git a/xmpbox/src/main/java/org/apache/xmpbox/schema/PDFAIdentificationSchema.java b/xmpbox/src/main/java/org/apache/xmpbox/schema/PDFAIdentificationSchema.java index 2cea8b62f1d..8a50676dea5 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/schema/PDFAIdentificationSchema.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/schema/PDFAIdentificationSchema.java @@ -190,7 +190,7 @@ public void setConformanceProperty(TextType conf) throws BadFieldValueException /** * Give the PDFAVersionId (as an integer) * - * @return Part value (Integer) + * @return Part value (Integer) or null if it is missing */ public Integer getPart() { diff --git a/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPRightsManagementSchema.java b/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPRightsManagementSchema.java index a0f5835ae09..29aeff874aa 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPRightsManagementSchema.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPRightsManagementSchema.java @@ -297,7 +297,7 @@ public String getCertificate() * Set the Certificate URL. * * @param url - * certficate url value to set + * certificate url value to set */ public void setCertificate(String url) { diff --git a/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPSchema.java b/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPSchema.java index 73efb44b5cc..9fcdcde0425 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPSchema.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPSchema.java @@ -632,19 +632,17 @@ private void removeUnqualifiedArrayValue(String arrayName, String fieldValue) if (array != null) { List toDelete = new ArrayList(); - Iterator it = array.getContainer().getAllProperties().iterator(); - while (it.hasNext()) + for (AbstractField abstractField : array.getContainer().getAllProperties()) { - AbstractSimpleProperty tmp = (AbstractSimpleProperty) it.next(); + AbstractSimpleProperty tmp = (AbstractSimpleProperty) abstractField; if (tmp.getStringValue().equals(fieldValue)) { toDelete.add(tmp); } } - Iterator eraseProperties = toDelete.iterator(); - while (eraseProperties.hasNext()) + for (AbstractField aToDelete : toDelete) { - array.getContainer().removeProperty(eraseProperties.next()); + array.getContainer().removeProperty(aToDelete); } } } @@ -746,24 +744,21 @@ public void removeUnqualifiedSequenceValue(String qualifiedSeqName, String seqVa */ public void removeUnqualifiedArrayValue(String arrayName, AbstractField fieldValue) { - String qualifiedArrayName = arrayName; - ArrayProperty array = (ArrayProperty) getAbstractProperty(qualifiedArrayName); + ArrayProperty array = (ArrayProperty) getAbstractProperty(arrayName); if (array != null) { List toDelete = new ArrayList(); - Iterator it = array.getContainer().getAllProperties().iterator(); - while (it.hasNext()) + for (AbstractField abstractField : array.getContainer().getAllProperties()) { - AbstractSimpleProperty tmp = (AbstractSimpleProperty) it.next(); + AbstractSimpleProperty tmp = (AbstractSimpleProperty) abstractField; if (tmp.equals(fieldValue)) { toDelete.add(tmp); } } - Iterator eraseProperties = toDelete.iterator(); - while (eraseProperties.hasNext()) + for (AbstractField aToDelete : toDelete) { - array.getContainer().removeProperty(eraseProperties.next()); + array.getContainer().removeProperty(aToDelete); } } } @@ -791,8 +786,7 @@ public void removeUnqualifiedSequenceValue(String qualifiedSeqName, AbstractFiel */ public void addUnqualifiedSequenceValue(String simpleSeqName, String seqValue) { - String qualifiedSeqName = simpleSeqName; - ArrayProperty seq = (ArrayProperty) getAbstractProperty(qualifiedSeqName); + ArrayProperty seq = (ArrayProperty) getAbstractProperty(simpleSeqName); TextType li = createTextType(XmpConstants.LIST_NAME, seqValue); if (seq != null) { @@ -839,8 +833,7 @@ public void addBagValue(String qualifiedSeqName, AbstractField seqValue) */ public void addUnqualifiedSequenceValue(String seqName, AbstractField seqValue) { - String qualifiedSeqName = seqName; - ArrayProperty seq = (ArrayProperty) getAbstractProperty(qualifiedSeqName); + ArrayProperty seq = (ArrayProperty) getAbstractProperty(seqName); if (seq != null) { seq.getContainer().addProperty(seqValue); @@ -884,24 +877,20 @@ public List getUnqualifiedSequenceValueList(String seqName) */ public void removeUnqualifiedSequenceDateValue(String seqName, Calendar date) { - String qualifiedSeqName = seqName; - ArrayProperty seq = (ArrayProperty) getAbstractProperty(qualifiedSeqName); + ArrayProperty seq = (ArrayProperty) getAbstractProperty(seqName); if (seq != null) { List toDelete = new ArrayList(); - Iterator it = seq.getContainer().getAllProperties().iterator(); - while (it.hasNext()) + for (AbstractField tmp : seq.getContainer().getAllProperties()) { - AbstractField tmp = it.next(); if (tmp instanceof DateType && ((DateType) tmp).getValue().equals(date)) { toDelete.add(tmp); } } - Iterator eraseProperties = toDelete.iterator(); - while (eraseProperties.hasNext()) + for (AbstractField aToDelete : toDelete) { - seq.getContainer().removeProperty(eraseProperties.next()); + seq.getContainer().removeProperty(aToDelete); } } } @@ -946,9 +935,8 @@ public void addUnqualifiedSequenceDateValue(String seqName, Calendar date) */ public List getUnqualifiedSequenceDateValueList(String seqName) { - String qualifiedSeqName = seqName; List retval = null; - ArrayProperty seq = (ArrayProperty) getAbstractProperty(qualifiedSeqName); + ArrayProperty seq = (ArrayProperty) getAbstractProperty(seqName); if (seq != null) { retval = new ArrayList(); @@ -1001,10 +989,9 @@ public void reorganizeAltOrder(ComplexPropertyContainer alt) reordered.add(tmp); toDelete.add(tmp); } - Iterator eraseProperties = toDelete.iterator(); - while (eraseProperties.hasNext()) + for (AbstractField aToDelete : toDelete) { - alt.removeProperty(eraseProperties.next()); + alt.removeProperty(aToDelete); } it = reordered.iterator(); while (it.hasNext()) @@ -1026,12 +1013,11 @@ public void reorganizeAltOrder(ComplexPropertyContainer alt) */ public void setUnqualifiedLanguagePropertyValue(String name, String language, String value) { - String qualifiedName = name; if (language == null || language.isEmpty()) { language = XmpConstants.X_DEFAULT; } - AbstractField property = getAbstractProperty(qualifiedName); + AbstractField property = getAbstractProperty(name); ArrayProperty arrayProp; if (property != null) { @@ -1129,26 +1115,17 @@ public List getUnqualifiedLanguagePropertyLanguagesValue(String name) { if (property instanceof ArrayProperty) { - List retval = new ArrayList(); ArrayProperty arrayProp = (ArrayProperty) property; - for (AbstractField child : arrayProp.getContainer().getAllProperties()) + List allProperties = arrayProp.getContainer().getAllProperties(); + List retval = new ArrayList(allProperties.size()); + for (AbstractField child : allProperties) { Attribute text = child.getAttribute(XmpConstants.LANG_NAME); - if (text != null) - { - retval.add(text.getValue()); - } - else - { - retval.add(XmpConstants.X_DEFAULT); - } + retval.add(text != null ? text.getValue() : XmpConstants.X_DEFAULT); } return retval; } - else - { - throw new IllegalArgumentException("The property '" + name + "' is not of Lang Alt type"); - } + throw new IllegalArgumentException("The property '" + name + "' is not of Lang Alt type"); } // no property with that name return null; @@ -1211,10 +1188,9 @@ private boolean mergeComplexProperty(Iterator itNewValues, ArrayP while (itNewValues.hasNext()) { TextType tmpNewValue = (TextType) itNewValues.next(); - Iterator itOldValues = arrayProperty.getContainer().getAllProperties().iterator(); - while (itOldValues.hasNext()) + for (AbstractField abstractField : arrayProperty.getContainer().getAllProperties()) { - TextType tmpOldValue = (TextType) itOldValues.next(); + TextType tmpOldValue = (TextType) abstractField; if (tmpOldValue.getStringValue().equals(tmpNewValue.getStringValue())) { return true; @@ -1229,7 +1205,7 @@ private boolean mergeComplexProperty(Iterator itNewValues, ArrayP * Get an AbstractField list corresponding to the content of an array * property. * - * @param name The property name whitout namespace. + * @param name The property name without namespace. * @return List of properties contained in the array property. * @throws BadFieldValueException If the property with the requested name isn't an array. */ diff --git a/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPSchemaFactory.java b/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPSchemaFactory.java index 673083e280f..148a7b41bb6 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPSchemaFactory.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/schema/XMPSchemaFactory.java @@ -76,7 +76,7 @@ public String getNamespace() * * @param name * The property name - * @return null if propery name is unknown + * @return null if property name is unknown */ public PropertyType getPropertyType(String name) { @@ -116,20 +116,15 @@ else if (prefix != null && !"".equals(prefix)) schemaArgs = new Object[] { metadata }; } - Constructor schemaConstructor; try { - schemaConstructor = schemaClass.getConstructor(argsClass); - schema = schemaConstructor.newInstance(schemaArgs); - if (schema != null) - { - metadata.addSchema(schema); - } + schema = schemaClass.getDeclaredConstructor(argsClass).newInstance(schemaArgs); + metadata.addSchema(schema); return schema; } catch (Exception e) { - throw new XmpSchemaException("Cannot instanciate specified object schema", e); + throw new XmpSchemaException("Cannot instantiate specified object schema", e); } } diff --git a/xmpbox/src/main/java/org/apache/xmpbox/type/AbstractField.java b/xmpbox/src/main/java/org/apache/xmpbox/type/AbstractField.java index 7185b9fbd28..1c7ac2ba256 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/type/AbstractField.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/type/AbstractField.java @@ -29,7 +29,7 @@ import org.apache.xmpbox.XMPMetadata; /** - * Astract Object representation of a XMP 'field' (-> Properties and specific Schemas) + * Abstract Object representation of a XMP 'field' (-> Properties and specific Schemas) * * @author a183132 * diff --git a/xmpbox/src/main/java/org/apache/xmpbox/type/AbstractSimpleProperty.java b/xmpbox/src/main/java/org/apache/xmpbox/type/AbstractSimpleProperty.java index 5d0137493e2..6723ce12464 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/type/AbstractSimpleProperty.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/type/AbstractSimpleProperty.java @@ -99,10 +99,7 @@ public Object getRawValue() @Override public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("[").append(this.getClass().getSimpleName()).append(":"); - sb.append(getStringValue()).append("]"); - return sb.toString(); + return "[" + this.getClass().getSimpleName() + ":" + getStringValue() + "]"; } /** diff --git a/xmpbox/src/main/java/org/apache/xmpbox/type/ArrayProperty.java b/xmpbox/src/main/java/org/apache/xmpbox/type/ArrayProperty.java index 5a7a4a9e587..77d7d212c61 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/type/ArrayProperty.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/type/ArrayProperty.java @@ -23,7 +23,6 @@ import java.util.ArrayList; import java.util.Collections; -import java.util.Iterator; import java.util.List; import org.apache.xmpbox.XMPMetadata; @@ -44,7 +43,7 @@ public class ArrayProperty extends AbstractComplexProperty private final String prefix; /** - * Contructor of a complex property + * Constructor of a complex property * * @param metadata * The metadata to attach to this property @@ -72,17 +71,13 @@ public Cardinality getArrayType() public List getElementsAsString() { - List retval; - retval = new ArrayList(); - Iterator it = getContainer().getAllProperties().iterator(); - AbstractSimpleProperty tmp; - while (it.hasNext()) + List allProperties = getContainer().getAllProperties(); + List retval = new ArrayList(allProperties.size()); + for (AbstractField tmp : allProperties) { - tmp = (AbstractSimpleProperty) it.next(); - retval.add(tmp.getStringValue()); + retval.add(((AbstractSimpleProperty) tmp).getStringValue()); } - retval = Collections.unmodifiableList(retval); - return retval; + return Collections.unmodifiableList(retval); } /** diff --git a/xmpbox/src/main/java/org/apache/xmpbox/type/Attribute.java b/xmpbox/src/main/java/org/apache/xmpbox/type/Attribute.java index bbb3d5d95d9..36076b0ee88 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/type/Attribute.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/type/Attribute.java @@ -118,9 +118,7 @@ public void setValue(String value) public String toString() { - StringBuilder sb = new StringBuilder(80); - sb.append("[attr:{").append(nsURI).append("}").append(name).append("=").append(value).append("]"); - return sb.toString(); + return "[attr:{" + nsURI + "}" + name + "=" + value + "]"; } } diff --git a/xmpbox/src/main/java/org/apache/xmpbox/type/ComplexPropertyContainer.java b/xmpbox/src/main/java/org/apache/xmpbox/type/ComplexPropertyContainer.java index d5e9199f818..ff4ae05eba5 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/type/ComplexPropertyContainer.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/type/ComplexPropertyContainer.java @@ -79,10 +79,7 @@ protected AbstractField getFirstEquivalentProperty(String localName, Class structuredMappings; - - // ns -> type - private Map structuredNamespaces; - - // ns -> type - private Map definedStructuredNamespaces; - - private Map definedStructuredMappings; - - private final XMPMetadata metadata; - - private Map schemaMap; - - public TypeMapping(XMPMetadata metadata) - { - this.metadata = metadata; - initialize(); - } - - private static final Class[] SIMPLEPROPERTYCONSTPARAMS = new Class[] { XMPMetadata.class, String.class, - String.class, String.class, Object.class }; - - private void initialize() - { - // structured types - structuredMappings = new EnumMap(Types.class); - structuredNamespaces = new HashMap(); - for (Types type : Types.values()) - { - if (type.isStructured()) - { - Class clz = type.getImplementingClass().asSubclass( - AbstractStructuredType.class); - StructuredType st = clz.getAnnotation(StructuredType.class); - String ns = st.namespace(); - PropertiesDescription pm = initializePropMapping(clz); - structuredNamespaces.put(ns, type); - structuredMappings.put(type, pm); - } - } - - // define structured types - definedStructuredNamespaces = new HashMap(); - definedStructuredMappings = new HashMap(); - - // schema - schemaMap = new HashMap(); - addNameSpace(XMPBasicSchema.class); - addNameSpace(DublinCoreSchema.class); - addNameSpace(PDFAExtensionSchema.class); - addNameSpace(XMPMediaManagementSchema.class); - addNameSpace(AdobePDFSchema.class); - addNameSpace(PDFAIdentificationSchema.class); - addNameSpace(XMPRightsManagementSchema.class); - addNameSpace(PhotoshopSchema.class); - addNameSpace(XMPBasicJobTicketSchema.class); - addNameSpace(ExifSchema.class); - addNameSpace(TiffSchema.class); - addNameSpace(XMPageTextSchema.class); - } - - public void addToDefinedStructuredTypes(String typeName, String ns, PropertiesDescription pm) - { - definedStructuredNamespaces.put(ns, typeName); - definedStructuredMappings.put(typeName, pm); - } - - public PropertiesDescription getDefinedDescriptionByNamespace(String namespace) - { - String dt = definedStructuredNamespaces.get(namespace); - return this.definedStructuredMappings.get(dt); - } - - public AbstractStructuredType instanciateStructuredType(Types type, String propertyName) - throws BadFieldValueException - { - try - { - Class propertyTypeClass = type.getImplementingClass().asSubclass( - AbstractStructuredType.class); - Constructor construct = propertyTypeClass - .getConstructor(new Class[] { XMPMetadata.class }); - AbstractStructuredType tmp = construct.newInstance(metadata); - tmp.setPropertyName(propertyName); - return tmp; - } - catch (InvocationTargetException e) - { - throw new BadFieldValueException("Failed to instanciate structured type : " + type, e); - } - catch (IllegalArgumentException e) - { - throw new BadFieldValueException("Failed to instanciate structured type : " + type, e); - } - catch (InstantiationException e) - { - throw new BadFieldValueException("Failed to instanciate structured type : " + type, e); - } - catch (IllegalAccessException e) - { - throw new BadFieldValueException("Failed to instanciate structured type : " + type, e); - } - catch (SecurityException e) - { - throw new BadFieldValueException("Failed to instanciate structured type : " + type, e); - } - catch (NoSuchMethodException e) - { - throw new BadFieldValueException("Failed to instanciate structured type : " + type, e); - } - } - - public AbstractStructuredType instanciateDefinedType(String propertyName, String namespace) - { - return new DefinedStructuredType(metadata, namespace, null, propertyName); - } - - public AbstractSimpleProperty instanciateSimpleProperty(String nsuri, String prefix, String name, Object value, - Types type) - { - // constructor parameters - Object[] params = new Object[] { metadata, nsuri, prefix, name, value }; - // type - try - { - Class clz = type.getImplementingClass().asSubclass( - AbstractSimpleProperty.class); - Constructor cons = clz.getConstructor(SIMPLEPROPERTYCONSTPARAMS); - return cons.newInstance(params); - } - catch (NoSuchMethodError e) - { - throw new IllegalArgumentException("Failed to instanciate property", e); - } - catch (IllegalArgumentException e) - { - throw new IllegalArgumentException("Failed to instanciate property", e); - } - catch (InstantiationException e) - { - throw new IllegalArgumentException("Failed to instanciate property", e); - } - catch (IllegalAccessException e) - { - throw new IllegalArgumentException("Failed to instanciate property", e); - } - catch (InvocationTargetException e) - { - throw new IllegalArgumentException("Failed to instanciate property", e); - } - catch (SecurityException e) - { - throw new IllegalArgumentException("Failed to instanciate property", e); - } - catch (NoSuchMethodException e) - { - throw new IllegalArgumentException("Failed to instanciate property", e); - } - } - - public AbstractSimpleProperty instanciateSimpleField(Class clz, String nsuri, String prefix, - String propertyName, Object value) - { - PropertiesDescription pm = initializePropMapping(clz); - PropertyType simpleType = pm.getPropertyType(propertyName); - Types type = simpleType.type(); - return instanciateSimpleProperty(nsuri, prefix, propertyName, value, type); - } - - /** - * Check if a namespace used reference a complex basic types (like Thumbnails) - * - * @param namespace - * The namespace URI to check - * @return True if namespace URI is a reference for a complex basic type - */ - public boolean isStructuredTypeNamespace(String namespace) - { - return structuredNamespaces.containsKey(namespace); - } - - public boolean isDefinedTypeNamespace(String namespace) - { - return definedStructuredNamespaces.containsKey(namespace); - } - - public boolean isDefinedType(String name) - { - return this.definedStructuredMappings.containsKey(name); - } - - private void addNameSpace(Class classSchem) - { - StructuredType st = classSchem.getAnnotation(StructuredType.class); - String ns = st.namespace(); - schemaMap.put(ns, new XMPSchemaFactory(ns, classSchem, initializePropMapping(classSchem))); - } - - public void addNewNameSpace(String ns, String prefered) - { - PropertiesDescription mapping = new PropertiesDescription(); - schemaMap.put(ns, new XMPSchemaFactory(ns, XMPSchema.class, mapping)); - } - - public PropertiesDescription getStructuredPropMapping(Types type) - { - return structuredMappings.get(type); - } - - /** - * Return the specialized schema class representation if it's known (create and add it to metadata). In other cases, - * return null - * - * @param metadata - * Metadata to link the new schema - * @param namespace - * The namespace URI - * @param prefix The namespace prefix - * @return Schema representation - * @throws XmpSchemaException - * When Instancing specified Object Schema failed - */ - public XMPSchema getAssociatedSchemaObject(XMPMetadata metadata, String namespace, String prefix) - throws XmpSchemaException - { - if (schemaMap.containsKey(namespace)) - { - XMPSchemaFactory factory = schemaMap.get(namespace); - return factory.createXMPSchema(metadata, prefix); - } - else - { - XMPSchemaFactory factory = getSchemaFactory(namespace); - return factory != null ? factory.createXMPSchema(metadata, prefix) : null; - } - } - - public XMPSchemaFactory getSchemaFactory(String namespace) - { - return schemaMap.get(namespace); - } - - /** - * Say if a specific namespace is known - * - * @param namespace - * The namespace URI checked - * @return True if namespace URI is known - */ - public boolean isDefinedSchema(String namespace) - { - return schemaMap.containsKey(namespace); - } - - public boolean isDefinedNamespace(String namespace) - { - return isDefinedSchema(namespace) || isStructuredTypeNamespace(namespace) || isDefinedTypeNamespace(namespace); - } - - /** - * Give type of specified property in specified schema (given by its namespaceURI) - * - * @param name - * the property Qualified Name - * @return Property type declared for namespace specified, null if unknown - * @throws org.apache.xmpbox.type.BadFieldValueException if the name was not found. - */ - public PropertyType getSpecifiedPropertyType(QName name) throws BadFieldValueException - { - XMPSchemaFactory factory = getSchemaFactory(name.getNamespaceURI()); - if (factory != null) - { - // found in schema - return factory.getPropertyType(name.getLocalPart()); - } - else - { - // try in structured - Types st = structuredNamespaces.get(name.getNamespaceURI()); - if (st != null) - { - return createPropertyType(st, Cardinality.Simple); - } - else - { - // try in defined - String dt = definedStructuredNamespaces.get(name.getNamespaceURI()); - if (dt == null) - { - // not found - throw new BadFieldValueException("No descriptor found for " + name); - } - else - { - return createPropertyType(Types.DefinedType, Cardinality.Simple); - } - } - } - } - - public PropertiesDescription initializePropMapping(Class classSchem) - { - PropertiesDescription propMap = new PropertiesDescription(); - Field[] fields = classSchem.getFields(); - String propName = null; - for (Field field : fields) - { - if (field.isAnnotationPresent(PropertyType.class)) - { - try - { - propName = (String) field.get(propName); - } - catch (Exception e) - { - throw new IllegalArgumentException( - "couldn't read one type declaration, please check accessibility and declaration of fields annoted in " - + classSchem.getName(), e); - } - PropertyType propType = field.getAnnotation(PropertyType.class); - propMap.addNewProperty(propName, propType); - } - } - return propMap; - } - - public BooleanType createBoolean(String namespaceURI, String prefix, String propertyName, boolean value) - { - return new BooleanType(metadata, namespaceURI, prefix, propertyName, value); - } - - public DateType createDate(String namespaceURI, String prefix, String propertyName, Calendar value) - { - return new DateType(metadata, namespaceURI, prefix, propertyName, value); - } - - public IntegerType createInteger(String namespaceURI, String prefix, String propertyName, int value) - { - return new IntegerType(metadata, namespaceURI, prefix, propertyName, value); - } - - public RealType createReal(String namespaceURI, String prefix, String propertyName, float value) - { - return new RealType(metadata, namespaceURI, prefix, propertyName, value); - } - - public TextType createText(String namespaceURI, String prefix, String propertyName, String value) - { - return new TextType(metadata, namespaceURI, prefix, propertyName, value); - } - - public ProperNameType createProperName(String namespaceURI, String prefix, String propertyName, String value) - { - return new ProperNameType(metadata, namespaceURI, prefix, propertyName, value); - } - - public URIType createURI(String namespaceURI, String prefix, String propertyName, String value) - { - return new URIType(metadata, namespaceURI, prefix, propertyName, value); - } - - public URLType createURL(String namespaceURI, String prefix, String propertyName, String value) - { - return new URLType(metadata, namespaceURI, prefix, propertyName, value); - } - - public RenditionClassType createRenditionClass(String namespaceURI, String prefix, String propertyName, String value) - { - return new RenditionClassType(metadata, namespaceURI, prefix, propertyName, value); - } - - public PartType createPart(String namespaceURI, String prefix, String propertyName, String value) - { - return new PartType(metadata, namespaceURI, prefix, propertyName, value); - } - - public MIMEType createMIMEType(String namespaceURI, String prefix, String propertyName, String value) - { - return new MIMEType(metadata, namespaceURI, prefix, propertyName, value); - } - - public LocaleType createLocale(String namespaceURI, String prefix, String propertyName, String value) - { - return new LocaleType(metadata, namespaceURI, prefix, propertyName, value); - } - - public GUIDType createGUID(String namespaceURI, String prefix, String propertyName, String value) - { - return new GUIDType(metadata, namespaceURI, prefix, propertyName, value); - } - - public ChoiceType createChoice(String namespaceURI, String prefix, String propertyName, String value) - { - return new ChoiceType(metadata, namespaceURI, prefix, propertyName, value); - } - - public AgentNameType createAgentName(String namespaceURI, String prefix, String propertyName, String value) - { - return new AgentNameType(metadata, namespaceURI, prefix, propertyName, value); - } - - public XPathType createXPath(String namespaceURI, String prefix, String propertyName, String value) - { - return new XPathType(metadata, namespaceURI, prefix, propertyName, value); - } - - public ArrayProperty createArrayProperty(String namespace, String prefix, String propertyName, Cardinality type) - { - return new ArrayProperty(metadata, namespace, prefix, propertyName, type); - } - - public static PropertyType createPropertyType(final Types type, final Cardinality card) - { - return new PropertyType() - { - - @Override - public Class annotationType() - { - return null; - } - - @Override - public Types type() - { - return type; - } - - @Override - public Cardinality card() - { - return card; - } - }; - } -} +/***************************************************************************** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +package org.apache.xmpbox.type; + +import java.lang.annotation.Annotation; +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.util.Calendar; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.namespace.QName; + +import org.apache.xmpbox.XMPMetadata; +import org.apache.xmpbox.schema.AdobePDFSchema; +import org.apache.xmpbox.schema.DublinCoreSchema; +import org.apache.xmpbox.schema.ExifSchema; +import org.apache.xmpbox.schema.PDFAExtensionSchema; +import org.apache.xmpbox.schema.PDFAIdentificationSchema; +import org.apache.xmpbox.schema.PhotoshopSchema; +import org.apache.xmpbox.schema.TiffSchema; +import org.apache.xmpbox.schema.XMPBasicJobTicketSchema; +import org.apache.xmpbox.schema.XMPBasicSchema; +import org.apache.xmpbox.schema.XMPMediaManagementSchema; +import org.apache.xmpbox.schema.XMPRightsManagementSchema; +import org.apache.xmpbox.schema.XMPSchema; +import org.apache.xmpbox.schema.XMPSchemaFactory; +import org.apache.xmpbox.schema.XMPageTextSchema; +import org.apache.xmpbox.schema.XmpSchemaException; + +public final class TypeMapping +{ + + private Map structuredMappings; + + // ns -> type + private Map structuredNamespaces; + + // ns -> type + private Map definedStructuredNamespaces; + + private Map definedStructuredMappings; + + private final XMPMetadata metadata; + + private Map schemaMap; + + public TypeMapping(XMPMetadata metadata) + { + this.metadata = metadata; + initialize(); + } + + private static final Class[] SIMPLEPROPERTYCONSTPARAMS = new Class[] { XMPMetadata.class, String.class, + String.class, String.class, Object.class }; + + private void initialize() + { + // structured types + structuredMappings = new EnumMap(Types.class); + structuredNamespaces = new HashMap(); + for (Types type : Types.values()) + { + if (type.isStructured()) + { + Class clz = type.getImplementingClass().asSubclass( + AbstractStructuredType.class); + StructuredType st = clz.getAnnotation(StructuredType.class); + String ns = st.namespace(); + PropertiesDescription pm = initializePropMapping(clz); + structuredNamespaces.put(ns, type); + structuredMappings.put(type, pm); + } + } + + // define structured types + definedStructuredNamespaces = new HashMap(); + definedStructuredMappings = new HashMap(); + + // schema + schemaMap = new HashMap(); + addNameSpace(XMPBasicSchema.class); + addNameSpace(DublinCoreSchema.class); + addNameSpace(PDFAExtensionSchema.class); + addNameSpace(XMPMediaManagementSchema.class); + addNameSpace(AdobePDFSchema.class); + addNameSpace(PDFAIdentificationSchema.class); + addNameSpace(XMPRightsManagementSchema.class); + addNameSpace(PhotoshopSchema.class); + addNameSpace(XMPBasicJobTicketSchema.class); + addNameSpace(ExifSchema.class); + addNameSpace(TiffSchema.class); + addNameSpace(XMPageTextSchema.class); + } + + public void addToDefinedStructuredTypes(String typeName, String ns, PropertiesDescription pm) + { + definedStructuredNamespaces.put(ns, typeName); + definedStructuredMappings.put(typeName, pm); + } + + public PropertiesDescription getDefinedDescriptionByNamespace(String namespace) + { + String dt = definedStructuredNamespaces.get(namespace); + return this.definedStructuredMappings.get(dt); + } + + public AbstractStructuredType instanciateStructuredType(Types type, String propertyName) + throws BadFieldValueException + { + try + { + Class propertyTypeClass = type.getImplementingClass().asSubclass( + AbstractStructuredType.class); + Constructor construct = propertyTypeClass + .getDeclaredConstructor(XMPMetadata.class); + AbstractStructuredType tmp = construct.newInstance(metadata); + tmp.setPropertyName(propertyName); + return tmp; + } + catch (InvocationTargetException e) + { + throw new BadFieldValueException("Failed to instantiate structured type : " + type, e); + } + catch (IllegalArgumentException e) + { + throw new BadFieldValueException("Failed to instantiate structured type : " + type, e); + } + catch (InstantiationException e) + { + throw new BadFieldValueException("Failed to instantiate structured type : " + type, e); + } + catch (IllegalAccessException e) + { + throw new BadFieldValueException("Failed to instantiate structured type : " + type, e); + } + catch (SecurityException e) + { + throw new BadFieldValueException("Failed to instantiate structured type : " + type, e); + } + catch (NoSuchMethodException e) + { + throw new BadFieldValueException("Failed to instantiate structured type : " + type, e); + } + } + + public AbstractStructuredType instanciateDefinedType(String propertyName, String namespace) + { + return new DefinedStructuredType(metadata, namespace, null, propertyName); + } + + public AbstractSimpleProperty instanciateSimpleProperty(String nsuri, String prefix, String name, Object value, + Types type) + { + // constructor parameters + Object[] params = new Object[] { metadata, nsuri, prefix, name, value }; + // type + Class clz = + type.getImplementingClass().asSubclass(AbstractSimpleProperty.class); + try + { + Constructor cons = clz.getDeclaredConstructor(SIMPLEPROPERTYCONSTPARAMS); + return cons.newInstance(params); + } + catch (NoSuchMethodError e) + { + throw new IllegalArgumentException("Failed to instantiate " + clz.getSimpleName() + " property with value " + value, e); + } + catch (IllegalArgumentException e) + { + throw new IllegalArgumentException("Failed to instantiate " + clz.getSimpleName() + " property with value " + value, e); + } + catch (InstantiationException e) + { + throw new IllegalArgumentException("Failed to instantiate " + clz.getSimpleName() + " property with value " + value, e); + } + catch (IllegalAccessException e) + { + throw new IllegalArgumentException("Failed to instantiate " + clz.getSimpleName() + " property with value " + value, e); + } + catch (InvocationTargetException e) + { + throw new IllegalArgumentException("Failed to instantiate " + clz.getSimpleName() + " property with value " + value, e); + } + catch (SecurityException e) + { + throw new IllegalArgumentException("Failed to instantiate " + clz.getSimpleName() + " property with value " + value, e); + } + catch (NoSuchMethodException e) + { + throw new IllegalArgumentException("Failed to instantiate " + clz.getSimpleName() + " property with value " + value, e); + } + } + + public AbstractSimpleProperty instanciateSimpleField(Class clz, String nsuri, String prefix, + String propertyName, Object value) + { + PropertiesDescription pm = initializePropMapping(clz); + PropertyType simpleType = pm.getPropertyType(propertyName); + Types type = simpleType.type(); + return instanciateSimpleProperty(nsuri, prefix, propertyName, value, type); + } + + /** + * Check if a namespace used reference a complex basic types (like Thumbnails) + * + * @param namespace + * The namespace URI to check + * @return True if namespace URI is a reference for a complex basic type + */ + public boolean isStructuredTypeNamespace(String namespace) + { + return structuredNamespaces.containsKey(namespace); + } + + public boolean isDefinedTypeNamespace(String namespace) + { + return definedStructuredNamespaces.containsKey(namespace); + } + + public boolean isDefinedType(String name) + { + return this.definedStructuredMappings.containsKey(name); + } + + private void addNameSpace(Class classSchem) + { + StructuredType st = classSchem.getAnnotation(StructuredType.class); + String ns = st.namespace(); + schemaMap.put(ns, new XMPSchemaFactory(ns, classSchem, initializePropMapping(classSchem))); + } + + public void addNewNameSpace(String ns, String preferred) + { + PropertiesDescription mapping = new PropertiesDescription(); + schemaMap.put(ns, new XMPSchemaFactory(ns, XMPSchema.class, mapping)); + } + + public PropertiesDescription getStructuredPropMapping(Types type) + { + return structuredMappings.get(type); + } + + /** + * Return the specialized schema class representation if it's known (create and add it to metadata). In other cases, + * return null + * + * @param metadata + * Metadata to link the new schema + * @param namespace + * The namespace URI + * @param prefix The namespace prefix + * @return Schema representation + * @throws XmpSchemaException + * When Instancing specified Object Schema failed + */ + public XMPSchema getAssociatedSchemaObject(XMPMetadata metadata, String namespace, String prefix) + throws XmpSchemaException + { + if (schemaMap.containsKey(namespace)) + { + XMPSchemaFactory factory = schemaMap.get(namespace); + return factory.createXMPSchema(metadata, prefix); + } + else + { + XMPSchemaFactory factory = getSchemaFactory(namespace); + return factory != null ? factory.createXMPSchema(metadata, prefix) : null; + } + } + + public XMPSchemaFactory getSchemaFactory(String namespace) + { + return schemaMap.get(namespace); + } + + /** + * Say if a specific namespace is known + * + * @param namespace + * The namespace URI checked + * @return True if namespace URI is known + */ + public boolean isDefinedSchema(String namespace) + { + return schemaMap.containsKey(namespace); + } + + public boolean isDefinedNamespace(String namespace) + { + return isDefinedSchema(namespace) || isStructuredTypeNamespace(namespace) || isDefinedTypeNamespace(namespace); + } + + /** + * Give type of specified property in specified schema (given by its namespaceURI) + * + * @param name + * the property Qualified Name + * @return Property type declared for namespace specified, null if unknown + * @throws org.apache.xmpbox.type.BadFieldValueException if the name was not found. + */ + public PropertyType getSpecifiedPropertyType(QName name) throws BadFieldValueException + { + XMPSchemaFactory factory = getSchemaFactory(name.getNamespaceURI()); + if (factory != null) + { + // found in schema + return factory.getPropertyType(name.getLocalPart()); + } + else + { + // try in structured + Types st = structuredNamespaces.get(name.getNamespaceURI()); + if (st != null) + { + return createPropertyType(st, Cardinality.Simple); + } + else + { + // try in defined + String dt = definedStructuredNamespaces.get(name.getNamespaceURI()); + if (dt == null) + { + // not found + throw new BadFieldValueException("No descriptor found for " + name); + } + else + { + return createPropertyType(Types.DefinedType, Cardinality.Simple); + } + } + } + } + + public PropertiesDescription initializePropMapping(Class classSchem) + { + PropertiesDescription propMap = new PropertiesDescription(); + Field[] fields = classSchem.getFields(); + String propName = null; + for (Field field : fields) + { + if (field.isAnnotationPresent(PropertyType.class)) + { + try + { + propName = (String) field.get(propName); + } + catch (Exception e) + { + throw new IllegalArgumentException( + "couldn't read one type declaration, please check accessibility and declaration of fields annotated in " + + classSchem.getName(), e); + } + PropertyType propType = field.getAnnotation(PropertyType.class); + propMap.addNewProperty(propName, propType); + } + } + return propMap; + } + + public BooleanType createBoolean(String namespaceURI, String prefix, String propertyName, boolean value) + { + return new BooleanType(metadata, namespaceURI, prefix, propertyName, value); + } + + public DateType createDate(String namespaceURI, String prefix, String propertyName, Calendar value) + { + return new DateType(metadata, namespaceURI, prefix, propertyName, value); + } + + public IntegerType createInteger(String namespaceURI, String prefix, String propertyName, int value) + { + return new IntegerType(metadata, namespaceURI, prefix, propertyName, value); + } + + public RealType createReal(String namespaceURI, String prefix, String propertyName, float value) + { + return new RealType(metadata, namespaceURI, prefix, propertyName, value); + } + + public TextType createText(String namespaceURI, String prefix, String propertyName, String value) + { + return new TextType(metadata, namespaceURI, prefix, propertyName, value); + } + + public ProperNameType createProperName(String namespaceURI, String prefix, String propertyName, String value) + { + return new ProperNameType(metadata, namespaceURI, prefix, propertyName, value); + } + + public URIType createURI(String namespaceURI, String prefix, String propertyName, String value) + { + return new URIType(metadata, namespaceURI, prefix, propertyName, value); + } + + public URLType createURL(String namespaceURI, String prefix, String propertyName, String value) + { + return new URLType(metadata, namespaceURI, prefix, propertyName, value); + } + + public RenditionClassType createRenditionClass(String namespaceURI, String prefix, String propertyName, String value) + { + return new RenditionClassType(metadata, namespaceURI, prefix, propertyName, value); + } + + public PartType createPart(String namespaceURI, String prefix, String propertyName, String value) + { + return new PartType(metadata, namespaceURI, prefix, propertyName, value); + } + + public MIMEType createMIMEType(String namespaceURI, String prefix, String propertyName, String value) + { + return new MIMEType(metadata, namespaceURI, prefix, propertyName, value); + } + + public LocaleType createLocale(String namespaceURI, String prefix, String propertyName, String value) + { + return new LocaleType(metadata, namespaceURI, prefix, propertyName, value); + } + + public GUIDType createGUID(String namespaceURI, String prefix, String propertyName, String value) + { + return new GUIDType(metadata, namespaceURI, prefix, propertyName, value); + } + + public ChoiceType createChoice(String namespaceURI, String prefix, String propertyName, String value) + { + return new ChoiceType(metadata, namespaceURI, prefix, propertyName, value); + } + + public AgentNameType createAgentName(String namespaceURI, String prefix, String propertyName, String value) + { + return new AgentNameType(metadata, namespaceURI, prefix, propertyName, value); + } + + public XPathType createXPath(String namespaceURI, String prefix, String propertyName, String value) + { + return new XPathType(metadata, namespaceURI, prefix, propertyName, value); + } + + public ArrayProperty createArrayProperty(String namespace, String prefix, String propertyName, Cardinality type) + { + return new ArrayProperty(metadata, namespace, prefix, propertyName, type); + } + + public static PropertyType createPropertyType(final Types type, final Cardinality card) + { + return new PropertyType() + { + + @Override + public Class annotationType() + { + return null; + } + + @Override + public Types type() + { + return type; + } + + @Override + public Cardinality card() + { + return card; + } + }; + } +} diff --git a/xmpbox/src/main/java/org/apache/xmpbox/xml/DomHelper.java b/xmpbox/src/main/java/org/apache/xmpbox/xml/DomHelper.java index 48b57879a46..d9b17482fae 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/xml/DomHelper.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/xml/DomHelper.java @@ -109,13 +109,7 @@ public static boolean isRdfDescription(Element element) public static boolean isParseTypeResource(Element element) { Attr parseType = element.getAttributeNodeNS(XmpConstants.RDF_NAMESPACE, XmpConstants.PARSE_TYPE); - if (parseType != null && XmpConstants.RESOURCE_NAME.equals(parseType.getValue())) - { - // parseType resourc - return true; - } - // else - return false; + return parseType != null && XmpConstants.RESOURCE_NAME.equals(parseType.getValue()); } } diff --git a/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java b/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java index b3709183e2a..56757a92f0a 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java @@ -24,11 +24,12 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Deque; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Stack; import java.util.StringTokenizer; import javax.xml.XMLConstants; @@ -78,15 +79,21 @@ public DomXmpParser() throws XmpParsingException try { DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); + dbFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + dbFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); + dbFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + dbFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + dbFactory.setXIncludeAware(false); + dbFactory.setExpandEntityReferences(false); + dbFactory.setIgnoringComments(true); dbFactory.setNamespaceAware(true); dBuilder = dbFactory.newDocumentBuilder(); nsFinder = new NamespaceFinder(); } catch (ParserConfigurationException e) { - throw new XmpParsingException(ErrorType.Configuration, "Failed to initilalize", e); + throw new XmpParsingException(ErrorType.Configuration, "Failed to initialize", e); } - } public boolean isStrictParsing() @@ -409,7 +416,6 @@ private void manageStructuredType(XMPMetadata xmp, Element property, String pref } private void manageSimpleType(XMPMetadata xmp, Element property, Types type, ComplexPropertyContainer container) - throws XmpParsingException { TypeMapping tm = xmp.getTypeMapping(); String prefix = property.getPrefix(); @@ -456,7 +462,7 @@ private void manageArray(XMPMetadata xmp, Element property, PropertyType type, C for (Element element : lis) { QName propertyQName = new QName(element.getLocalName()); - AbstractField ast = parseLiElement(xmp, propertyQName, element); + AbstractField ast = parseLiElement(xmp, propertyQName, element, type.type()); if (ast != null) { array.addProperty(ast); @@ -493,7 +499,7 @@ private void parseDescriptionInner(XMPMetadata xmp, Element description, Complex } } - private AbstractField parseLiElement(XMPMetadata xmp, QName descriptor, Element liElement) + private AbstractField parseLiElement(XMPMetadata xmp, QName descriptor, Element liElement, Types type) throws XmpParsingException { if (DomHelper.isParseTypeResource(liElement)) @@ -509,13 +515,31 @@ private AbstractField parseLiElement(XMPMetadata xmp, QName descriptor, Element } else { - // no child, so consider as simple text + // no child String text = liElement.getTextContent(); TypeMapping tm = xmp.getTypeMapping(); - AbstractSimpleProperty sp = tm.instanciateSimpleProperty(descriptor.getNamespaceURI(), - descriptor.getPrefix(), descriptor.getLocalPart(), text, Types.Text); - loadAttributes(sp, liElement); - return sp; + if (type.isSimple()) + { + AbstractField af = tm.instanciateSimpleProperty(descriptor.getNamespaceURI(), + descriptor.getPrefix(), descriptor.getLocalPart(), text, type); + loadAttributes(af, liElement); + return af; + } + else + { + // PDFBOX-4325: assume it is structured + AbstractField af; + try + { + af = tm.instanciateStructuredType(type, descriptor.getLocalPart()); + } + catch (BadFieldValueException ex) + { + throw new XmpParsingException(ErrorType.InvalidType, "Parsing of structured type failed", ex); + } + loadAttributes(af, liElement); + return af; + } } } @@ -594,7 +618,7 @@ else if (type.card().isArray()) List lis = DomHelper.getElementChildren(bagOrSeq); for (Element element2 : lis) { - AbstractField ast2 = parseLiElement(xmp, descriptor, element2); + AbstractField ast2 = parseLiElement(xmp, descriptor, element2, type.type()); if (ast2 != null) { array.addProperty(ast2); @@ -784,25 +808,32 @@ else if ((ln != null) && !(ln.equals(element.getLocalName()))) */ private void removeComments(Node root) { - if (root.getChildNodes().getLength()<=1) + // will hold the nodes which are to be deleted + List forDeletion = new ArrayList(); + + NodeList nl = root.getChildNodes(); + + if (nl.getLength()<=1) { // There is only one node so we do not remove it return; } - NodeList nl = root.getChildNodes(); - for (int i=0; i < nl.getLength(); i++) + + for (int i = 0; i < nl.getLength(); i++) { Node node = nl.item(i); if (node instanceof Comment) { - // remove the comment - root.removeChild(node); + // comments to be deleted + forDeletion.add(node); } else if (node instanceof Text) { if (node.getTextContent().trim().isEmpty()) { - root.removeChild(node); + // TODO: verify why this is necessary + // empty text nodes to be deleted + forDeletion.add(node); } } else if (node instanceof Element) @@ -811,6 +842,12 @@ else if (node instanceof Element) removeComments(node); } // else do nothing } + + // now remove the child nodes + for (Node node : forDeletion) + { + root.removeChild(node); + } } private AbstractStructuredType instanciateStructured(TypeMapping tm, Types type, String name, @@ -863,10 +900,9 @@ private PropertyType checkPropertyDefinition(XMPMetadata xmp, QName prop) throws } } - protected class NamespaceFinder + protected static class NamespaceFinder { - - private final Stack> stack = new Stack>(); + private final Deque> stack = new ArrayDeque>(); protected void push(Element description) { @@ -891,9 +927,8 @@ protected Map pop() protected boolean containsNamespace(String namespace) { - for (int i = stack.size() - 1; i >= 0; i--) + for (Map map : stack) { - Map map = stack.get(i); if (map.containsValue(namespace)) { return true; diff --git a/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java b/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java index b7f6990daf1..d89c2825c8c 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java @@ -82,11 +82,15 @@ private static void checkNamespaceDeclaration(Attr attr, Class[] { type.getImplementingClass() }); - set.invoke(schema, new Object[] { asp }); + Method set = getSchemaClass().getMethod(setter, type.getImplementingClass()); + set.invoke(schema, asp); // check property set AbstractSimpleProperty stored = (AbstractSimpleProperty) schema.getProperty(fieldName); Assert.assertEquals(value, stored.getValue()); // check getter String getter = calculateSimpleGetter(fieldName) + "Property"; - Method get = getSchemaClass().getMethod(getter, new Class[0]); - Object result = get.invoke(schema, new Object[0]); + Method get = getSchemaClass().getMethod(getter); + Object result = get.invoke(schema); Assert.assertTrue(type.getImplementingClass().isAssignableFrom(result.getClass())); Assert.assertEquals(asp, result); } @@ -254,28 +254,33 @@ private void internalTestPropertySetterInArray() throws Exception return; } XMPSchema schema = getSchema(); + + System.out.println(fieldName); + // add value String setter = "add" + calculateFieldNameForMethod(fieldName); // TypeDescription td = // typeMapping.getSimpleDescription(type); Object value1 = getJavaValue(type); - Method set = getSchemaClass().getMethod(setter, new Class[] { getJavaType(type) }); - set.invoke(schema, new Object[] { value1 }); + System.out.println(type); + Method set = getSchemaClass().getMethod(setter, getJavaType(type)); + set.invoke(schema, value1); // retrieve complex property String getter = calculateArrayGetter(fieldName) + "Property"; - Method getcp = getSchemaClass().getMethod(getter, new Class[0]); - Object ocp = getcp.invoke(schema, new Object[0]); + Method getcp = getSchemaClass().getMethod(getter); + Object ocp = getcp.invoke(schema); Assert.assertTrue(ocp instanceof ArrayProperty); ArrayProperty cp = (ArrayProperty) ocp; // check size is ok (1) + System.out.println(cp.getContainer().getAllProperties()); Assert.assertEquals(1, cp.getContainer().getAllProperties().size()); // add a new one Object value2 = getJavaValue(type); - set.invoke(schema, new Object[] { value2 }); + set.invoke(schema, value2); Assert.assertEquals(2, cp.getContainer().getAllProperties().size()); // remove the first String remover = "remove" + calculateFieldNameForMethod(fieldName); - Method remove = getSchemaClass().getMethod(remover, new Class[] { getJavaType(type) }); + Method remove = getSchemaClass().getMethod(remover, getJavaType(type)); remove.invoke(schema, value1); Assert.assertEquals(1, cp.getContainer().getAllProperties().size()); diff --git a/xmpbox/src/test/java/org/apache/xmpbox/schema/AbstractXMPSchemaTest.java b/xmpbox/src/test/java/org/apache/xmpbox/schema/AbstractXMPSchemaTest.java index 294a92c518f..5754b9fed0f 100644 --- a/xmpbox/src/test/java/org/apache/xmpbox/schema/AbstractXMPSchemaTest.java +++ b/xmpbox/src/test/java/org/apache/xmpbox/schema/AbstractXMPSchemaTest.java @@ -496,13 +496,12 @@ protected void testGetSetThumbnail() throws Exception String img = "/9j/4AAQSkZJRgABAgEASABIAAD"; setMethod.invoke(schema, height, width, format, img); List found = ((List) getMethod.invoke(schema)); - Assert.assertTrue(found.size() == 1); + Assert.assertEquals(1, found.size()); ThumbnailType t1 = found.get(0); Assert.assertEquals(height, t1.getHeight()); Assert.assertEquals(width, t1.getWidth()); Assert.assertEquals(format, t1.getFormat()); Assert.assertEquals(img, t1.getImage()); - } protected void testGetSetLangAltValue() throws Exception diff --git a/xmpbox/src/test/java/org/apache/xmpbox/schema/BasicJobTicketSchemaTest.java b/xmpbox/src/test/java/org/apache/xmpbox/schema/BasicJobTicketSchemaTest.java index 83946027c36..0b7021a6425 100644 --- a/xmpbox/src/test/java/org/apache/xmpbox/schema/BasicJobTicketSchemaTest.java +++ b/xmpbox/src/test/java/org/apache/xmpbox/schema/BasicJobTicketSchemaTest.java @@ -20,9 +20,7 @@ package org.apache.xmpbox.schema; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.InputStream; import org.apache.xmpbox.XMPMetadata; import org.apache.xmpbox.type.JobType; @@ -69,8 +67,7 @@ public void testAddTwoJobs() throws Exception ByteArrayOutputStream bos = new ByteArrayOutputStream(); serializer.serialize(metadata, bos, true); - InputStream is = new ByteArrayInputStream(bos.toByteArray()); - XMPMetadata rxmp = builder.parse(is); + XMPMetadata rxmp = builder.parse(bos.toByteArray()); XMPBasicJobTicketSchema jt = rxmp.getBasicJobTicketSchema(); Assert.assertNotNull(jt); @@ -91,8 +88,7 @@ public void testAddWithDefaultPrefix() throws Exception ByteArrayOutputStream bos = new ByteArrayOutputStream(); serializer.serialize(metadata, bos, true); - InputStream is = new ByteArrayInputStream(bos.toByteArray()); - XMPMetadata rxmp = builder.parse(is); + XMPMetadata rxmp = builder.parse(bos.toByteArray()); XMPBasicJobTicketSchema jt = rxmp.getBasicJobTicketSchema(); Assert.assertNotNull(jt); @@ -123,8 +119,7 @@ public void testAddWithDefinedPrefix() throws Exception ByteArrayOutputStream bos = new ByteArrayOutputStream(); serializer.serialize(metadata, bos, true); - InputStream is = new ByteArrayInputStream(bos.toByteArray()); - XMPMetadata rxmp = builder.parse(is); + XMPMetadata rxmp = builder.parse(bos.toByteArray()); XMPBasicJobTicketSchema jt = rxmp.getBasicJobTicketSchema(); Assert.assertNotNull(jt); diff --git a/xmpbox/src/test/java/org/apache/xmpbox/schema/XMPSchemaTest.java b/xmpbox/src/test/java/org/apache/xmpbox/schema/XMPSchemaTest.java index 07f2f22b9ac..a51ae2aeeec 100644 --- a/xmpbox/src/test/java/org/apache/xmpbox/schema/XMPSchemaTest.java +++ b/xmpbox/src/test/java/org/apache/xmpbox/schema/XMPSchemaTest.java @@ -57,7 +57,7 @@ public void resetDocument() throws Exception /** * Check if Bag (Unordered Array) management is ok * - * @throws InappropriateTypeException + * @throws IllegalArgumentException */ @Test public void testBagManagement() throws Exception @@ -99,8 +99,8 @@ public void testArrayList() throws Exception /** * Check if Seq (Ordered Array) management is ok * - * @throws InappropriateTypeException - * @throws IOException + * @throws IllegalArgumentException + * @throws java.io.IOException */ @Test public void testSeqManagement() throws Exception @@ -235,7 +235,7 @@ public void testAsSimpleMethods() throws Exception /** * Test All common simple properties management in XMPSchema * - * @throws InappropriateTypeException + * @throws IllegalArgumentException * @throws BadFieldValueException */ @Test @@ -380,8 +380,8 @@ public void testAltProperties() throws Exception /** * check if merging is ok * - * @throws InappropriateTypeException - * @throws IOException + * @throws IllegalArgumentException + * @throws java.io.IOException */ @Test public void testMergeSchema() throws Exception diff --git a/xmpbox/src/test/java/org/apache/xmpbox/type/AbstractStructuredTypeTester.java b/xmpbox/src/test/java/org/apache/xmpbox/type/AbstractStructuredTypeTester.java index 3eabade2819..35c652fa2d8 100644 --- a/xmpbox/src/test/java/org/apache/xmpbox/type/AbstractStructuredTypeTester.java +++ b/xmpbox/src/test/java/org/apache/xmpbox/type/AbstractStructuredTypeTester.java @@ -69,8 +69,8 @@ public void testInitializedToNull() throws Exception // default method Assert.assertNull(structured.getProperty(fieldName)); // accessor - Method get = clz.getMethod(calculateSimpleGetter(fieldName), new Class[0]); - Object result = get.invoke(structured, new Object[0]); + Method get = clz.getMethod(calculateSimpleGetter(fieldName)); + Object result = get.invoke(structured); Assert.assertNull(result); } @@ -161,13 +161,13 @@ private void internalTestSetter() throws Exception AbstractStructuredType structured = getStructured(); String setter = calculateSimpleSetter(fieldName); Object value = getJavaValue(type); - Method set = clz.getMethod(setter, new Class[] { getJavaType(type) }); - set.invoke(structured, new Object[] { value }); + Method set = clz.getMethod(setter, getJavaType(type)); + set.invoke(structured, value); // check property set Assert.assertEquals(value, ((AbstractSimpleProperty) structured.getProperty(fieldName)).getValue()); // check getter - Method get = clz.getMethod(calculateSimpleGetter(fieldName), new Class[0]); - Object result = get.invoke(structured, new Object[0]); + Method get = clz.getMethod(calculateSimpleGetter(fieldName)); + Object result = get.invoke(structured); Assert.assertTrue(getJavaType(type).isAssignableFrom(result.getClass())); Assert.assertEquals(value, result); } diff --git a/xmpbox/src/test/java/org/apache/xmpbox/type/TestDerivedType.java b/xmpbox/src/test/java/org/apache/xmpbox/type/TestDerivedType.java index 15905ac2de6..5fb442bf082 100644 --- a/xmpbox/src/test/java/org/apache/xmpbox/type/TestDerivedType.java +++ b/xmpbox/src/test/java/org/apache/xmpbox/type/TestDerivedType.java @@ -1,106 +1,106 @@ -/***************************************************************************** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - ****************************************************************************/ - -package org.apache.xmpbox.type; - -import java.lang.reflect.Constructor; -import java.util.ArrayList; -import java.util.Collection; - -import org.apache.xmpbox.XMPMetadata; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) -public class TestDerivedType -{ - - public static final String PREFIX = "myprefix"; - - public static final String NAME = "myname"; - - public static final String VALUE = "myvalue"; - - protected XMPMetadata xmp; - - protected String type = null; - - protected Class clz = null; - - protected Constructor constructor = null; - - public TestDerivedType(Class clz, String type) - { - super(); - this.clz = clz; - this.type = type; - } - - @Parameters - public static Collection initializeParameters() throws Exception - { - Collection result = new ArrayList(); - - result.add(new Object[] { AgentNameType.class, "AgentName" }); - result.add(new Object[] { ChoiceType.class, "Choice" }); - result.add(new Object[] { GUIDType.class, "GUID" }); - result.add(new Object[] { LocaleType.class, "Locale" }); - result.add(new Object[] { MIMEType.class, "MIME" }); - result.add(new Object[] { PartType.class, "Part" }); - result.add(new Object[] { ProperNameType.class, "ProperName" }); - result.add(new Object[] { RenditionClassType.class, "RenditionClass" }); - result.add(new Object[] { URIType.class, "URI" }); - result.add(new Object[] { URLType.class, "URL" }); - result.add(new Object[] { XPathType.class, "XPath" }); - - return result; - - } - - @Before - public void before() throws Exception - { - xmp = XMPMetadata.createXMPMetadata(); - constructor = clz.getConstructor(new Class[] { XMPMetadata.class, String.class, String.class, String.class, - Object.class }); - } - - protected TextType instanciate(XMPMetadata metadata, String namespaceURI, String prefix, String propertyName, - Object value) throws Exception - { - Object[] initargs = new Object[] { metadata, namespaceURI, prefix, propertyName, value }; - return constructor.newInstance(initargs); - } - - @Test - public void test1() throws Exception - { - TextType element = instanciate(xmp, null, PREFIX, NAME, VALUE); - Assert.assertNull(element.getNamespace()); - Assert.assertTrue(element.getValue() instanceof String); - Assert.assertEquals(VALUE, element.getValue()); - - } - -} +/***************************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +package org.apache.xmpbox.type; + +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.Collection; + +import org.apache.xmpbox.XMPMetadata; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestDerivedType +{ + + public static final String PREFIX = "myprefix"; + + public static final String NAME = "myname"; + + public static final String VALUE = "myvalue"; + + protected XMPMetadata xmp; + + protected String type = null; + + protected Class clz = null; + + protected Constructor constructor = null; + + public TestDerivedType(Class clz, String type) + { + super(); + this.clz = clz; + this.type = type; + } + + @Parameters + public static Collection initializeParameters() throws Exception + { + Collection result = new ArrayList(); + + result.add(new Object[] { AgentNameType.class, "AgentName" }); + result.add(new Object[] { ChoiceType.class, "Choice" }); + result.add(new Object[] { GUIDType.class, "GUID" }); + result.add(new Object[] { LocaleType.class, "Locale" }); + result.add(new Object[] { MIMEType.class, "MIME" }); + result.add(new Object[] { PartType.class, "Part" }); + result.add(new Object[] { ProperNameType.class, "ProperName" }); + result.add(new Object[] { RenditionClassType.class, "RenditionClass" }); + result.add(new Object[] { URIType.class, "URI" }); + result.add(new Object[] { URLType.class, "URL" }); + result.add(new Object[] { XPathType.class, "XPath" }); + + return result; + + } + + @Before + public void before() throws Exception + { + xmp = XMPMetadata.createXMPMetadata(); + constructor = clz.getDeclaredConstructor(XMPMetadata.class, String.class, String.class, String.class, + Object.class); + } + + protected TextType instanciate(XMPMetadata metadata, String namespaceURI, String prefix, String propertyName, + Object value) throws Exception + { + Object[] initargs = new Object[] { metadata, namespaceURI, prefix, propertyName, value }; + return constructor.newInstance(initargs); + } + + @Test + public void test1() throws Exception + { + TextType element = instanciate(xmp, null, PREFIX, NAME, VALUE); + Assert.assertNull(element.getNamespace()); + Assert.assertTrue(element.getValue() instanceof String); + Assert.assertEquals(VALUE, element.getValue()); + + } + +} diff --git a/xmpbox/src/test/java/org/apache/xmpbox/type/TestSimpleMetadataProperties.java b/xmpbox/src/test/java/org/apache/xmpbox/type/TestSimpleMetadataProperties.java index 9cdefbeed1e..74feb526fab 100644 --- a/xmpbox/src/test/java/org/apache/xmpbox/type/TestSimpleMetadataProperties.java +++ b/xmpbox/src/test/java/org/apache/xmpbox/type/TestSimpleMetadataProperties.java @@ -49,18 +49,18 @@ public void resetDocument() throws Exception /** * Check the detection of a bad type * - * @throws InappropriateTypeException + * @throws IllegalArgumentException */ @Test(expected = IllegalArgumentException.class) public void testBooleanBadTypeDetection() { - new BooleanType(parent, null, "test", "booleen", "Not a Boolean"); + new BooleanType(parent, null, "test", "boolean", "Not a Boolean"); } /** * Check the detection of a bad type * - * @throws InappropriateTypeException + * @throws IllegalArgumentException */ @Test(expected = IllegalArgumentException.class) public void testDateBadTypeDetection() @@ -71,7 +71,7 @@ public void testDateBadTypeDetection() /** * Check the detection of a bad type * - * @throws InappropriateTypeException + * @throws IllegalArgumentException */ @Test(expected = IllegalArgumentException.class) public void testIntegerBadTypeDetection() @@ -82,7 +82,7 @@ public void testIntegerBadTypeDetection() /** * Check the detection of a bad type * - * @throws InappropriateTypeException + * @throws IllegalArgumentException */ @Test(expected = IllegalArgumentException.class) public void testRealBadTypeDetection() throws Exception @@ -93,7 +93,7 @@ public void testRealBadTypeDetection() throws Exception /** * Check the detection of a bad type * - * @throws InappropriateTypeException + * @throws IllegalArgumentException */ @Test(expected = IllegalArgumentException.class) public void testTextBadTypeDetection() throws Exception @@ -102,7 +102,7 @@ public void testTextBadTypeDetection() throws Exception } /** - * Check if information between objects and the elment generated are equals + * Check if information between objects and the element generated are equals * * @throws Exception */ @@ -114,7 +114,7 @@ public void testElementAndObjectSynchronization() throws Exception int integerv = 1; float realv = Float.parseFloat("1.69"); String textv = "TEXTCONTENT"; - BooleanType bool = parent.getTypeMapping().createBoolean(null, "test", "booleen", boolv); + BooleanType bool = parent.getTypeMapping().createBoolean(null, "test", "boolean", boolv); DateType date = parent.getTypeMapping().createDate(null, "test", "date", datev); IntegerType integer = parent.getTypeMapping().createInteger(null, "test", "integer", integerv); RealType real = parent.getTypeMapping().createReal(null, "test", "real", realv); @@ -142,7 +142,7 @@ public void testCreationFromString() throws Exception String realv = "1.92"; String textv = "text"; - BooleanType bool = new BooleanType(parent, null, "test", "booleen", boolv); + BooleanType bool = new BooleanType(parent, null, "test", "boolean", boolv); DateType date = new DateType(parent, null, "test", "date", datev); IntegerType integer = new IntegerType(parent, null, "test", "integer", integerv); RealType real = new RealType(parent, null, "test", "real", realv); @@ -164,7 +164,7 @@ public void testCreationFromString() throws Exception public void testObjectCreationWithNamespace() throws Exception { String ns = "http://www.test.org/pdfa/"; - BooleanType bool = parent.getTypeMapping().createBoolean(ns, "test", "booleen", true); + BooleanType bool = parent.getTypeMapping().createBoolean(ns, "test", "boolean", true); DateType date = parent.getTypeMapping().createDate(ns, "test", "date", Calendar.getInstance()); IntegerType integer = parent.getTypeMapping().createInteger(ns, "test", "integer", 1); RealType real = parent.getTypeMapping().createReal(ns, "test", "real", (float) 1.6); @@ -179,9 +179,9 @@ public void testObjectCreationWithNamespace() throws Exception } /** - * Throw InappropriateType Exception + * Throw IllegalArgumentException * - * @throws InappropriateTypeException + * @throws IllegalArgumentException */ @Test(expected = IllegalArgumentException.class) public void testExceptionWithCause() throws Exception

    Lucene Field NameDescription