Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 134 additions & 61 deletions pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,6 @@
*/
package org.apache.pdfbox.multipdf;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
Expand Down Expand Up @@ -55,6 +43,18 @@
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
* This class will take a list of pdf documents and merge them, saving the
* result in a new document.
Expand All @@ -68,8 +68,11 @@ public class PDFMergerUtility
private final List<InputStream> sources;
private final List<FileInputStream> fileInputStreams;
private String destinationFileName;
private String mergedFieldPrefix = "pdfBoxMergedField";
private OutputStream destinationStream;
private boolean renameFieldsOnMerge = true;
private boolean ignoreAcroFormErrors = false;
private int nextFieldNum = 1;

/**
* Instantiate a new PDFMergerUtility.
Expand Down Expand Up @@ -100,6 +103,28 @@ public void setDestinationFileName(String destination)
destinationFileName = destination;
}

/**
* Get the prefix used to create new field names when encountering naming collisions during
* merging of the AcroForms. The default value is "pdfBoxMergedField".
*
* @return Returns the mergedFieldPrefix.
*/
public String getMergedFieldPrefix()
{
return mergedFieldPrefix;
}

/**
* Sets the prefix used to create new field names when encountering naming collisions during
* merging of the AcroForms. The default value is "pdfBoxMergedField".
*
* @param mergedFieldPrefix The prefix to set.
*/
public void setMergedFieldPrefix(String mergedFieldPrefix)
{
this.mergedFieldPrefix = mergedFieldPrefix;
}

/**
* Get the destination OutputStream.
*
Expand All @@ -120,6 +145,51 @@ public void setDestinationStream(OutputStream destStream)
destinationStream = destStream;
}

/**
* Indicates if acroform errors are ignored or not.
*
* @return true if acroform errors are ignored
*/
public boolean isIgnoreAcroFormErrors()
{
return ignoreAcroFormErrors;
}

/**
* Set to true to ignore acroform errors.
*
* @param ignoreAcroFormErrorsValue true if acroform errors should be
* ignored
*/
public void setIgnoreAcroFormErrors(boolean ignoreAcroFormErrorsValue)
{
ignoreAcroFormErrors = ignoreAcroFormErrorsValue;
}


/**
* If true, then merged fields will be renamed if another field with the same name
* already exists in the document.
*
* @return true if fields are set to be renamed, false if they will retain their original names
*/
public boolean isRenameFieldsOnMerge()
{
return renameFieldsOnMerge;
}

/**
* Set to false to have all fields retain their original names. True by default. If
* true, then merged fields will be renamed if another field with the same name
* already exists in the document.
*
* @param renameFieldsOnMerge the value to which to set this
*/
public void setRenameFieldsOnMerge(boolean renameFieldsOnMerge)
{
this.renameFieldsOnMerge = renameFieldsOnMerge;
}

/**
* Add a source file to the list of files to merge.
*
Expand Down Expand Up @@ -285,8 +355,22 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE
if (destAcroForm == null && srcAcroForm != null)
{
destCatalog.getCOSObject().setItem(COSName.ACRO_FORM,
cloner.cloneForNewDocument(srcAcroForm.getCOSObject()));

cloner.cloneForNewDocument(srcAcroForm.getCOSObject()));

if (renameFieldsOnMerge)
{
for (PDField field : srcAcroForm.getFields())
{
// reduce likelihood of name collisions by setting next number above this field's
// number if this field was merged by us in another document
String fieldName = field.getPartialName();
if (fieldName.startsWith(mergedFieldPrefix))
{
nextFieldNum = Math.max(nextFieldNum, Integer.parseInt(
fieldName.substring(mergedFieldPrefix.length(), fieldName.length())) + 1);
}
}
}
}
else
{
Expand Down Expand Up @@ -526,8 +610,6 @@ public void appendDocument(PDDocument destination, PDDocument source) throws IOE
}
}

private int nextFieldNum = 1;

/**
* Merge the contents of the source form into the destination form for the
* destination file.
Expand All @@ -541,62 +623,53 @@ private void mergeAcroForm(PDFCloneUtility cloner, PDAcroForm destAcroForm, PDAc
throws IOException
{

List<PDField> srcFields = srcAcroForm.getFields();
List<PDField> srcFields = srcAcroForm.getFields();

if (srcFields != null)
{
// if a form is merged multiple times using PDFBox the newly generated
// fields starting with dummyFieldName may already exist. We need to determine the last unique
// number used and increment that.
final String prefix = "dummyFieldName";
final int prefixLength = prefix.length();

for (PDField destField : destAcroForm.getFieldTree())
{
String fieldName = destField.getPartialName();
if (fieldName.startsWith(prefix))
{
nextFieldNum = Math.max(nextFieldNum, Integer.parseInt(fieldName.substring(prefixLength, fieldName.length()))+1);
}
}

COSArray destFields = (COSArray) destAcroForm.getCOSObject().getItem(COSName.FIELDS);
for (PDField srcField : srcAcroForm.getFieldTree())

for (PDField srcField : srcFields)
{
COSDictionary dstField = (COSDictionary) cloner.cloneForNewDocument(srcField.getCOSObject());
// if the form already has a field with this name then we need to rename this field
// to prevent merge conflicts.
if (destAcroForm.getField(srcField.getFullyQualifiedName()) != null)

// only add the root fields. kid fields are added automatically when parent is added
// check for null parent to make sure field is root
if (srcField.getParent() == null)
{
dstField.setString(COSName.T, prefix + nextFieldNum++);

COSDictionary dstField = (COSDictionary) cloner.cloneForNewDocument(srcField.getCOSObject());

// rename fields on merge if enabled
if (renameFieldsOnMerge)
{
// only use partial name since it is same as fully qualified for root fields
String fieldName = srcField.getPartialName();
// reduce likelihood of name collisions by setting next number above this field's
// number if this field was merged by us in another document
if (fieldName.startsWith(mergedFieldPrefix))
{
nextFieldNum = Math.max(nextFieldNum, Integer.parseInt(
fieldName.substring(mergedFieldPrefix.length(), fieldName.length())) + 1);
}
// if the form already has a field with this name then we need to rename this field
while (destAcroForm.getField(fieldName) != null)
{
fieldName = mergedFieldPrefix + nextFieldNum++;
}
// set new name if there is one
if (!fieldName.equals(srcField.getPartialName()))
{
dstField.setString(COSName.T, fieldName);
}
}

destFields.add(dstField);
}
destFields.add(dstField);
}
destAcroForm.getCOSObject().setItem(COSName.FIELDS,destFields);
destAcroForm.getCOSObject().setItem(COSName.FIELDS, destFields);
}
}

/**
* Indicates if acroform errors are ignored or not.
*
* @return true if acroform errors are ignored
*/
public boolean isIgnoreAcroFormErrors()
{
return ignoreAcroFormErrors;
}

/**
* Set to true to ignore acroform errors.
*
* @param ignoreAcroFormErrorsValue true if acroform errors should be
* ignored
*/
public void setIgnoreAcroFormErrors(boolean ignoreAcroFormErrorsValue)
{
ignoreAcroFormErrors = ignoreAcroFormErrorsValue;
}

/**
* Update the Pg and Obj references to the new (merged) page.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,19 @@
*/
package org.apache.pdfbox.multipdf;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;

import junit.framework.TestCase;

import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.rendering.PDFRenderer;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.function.Consumer;

/**
* Test suite for PDFMergerUtility.
*
Expand Down Expand Up @@ -111,6 +114,41 @@ public void testPDFMergerUtility2() throws IOException
MemoryUsageSetting.setupTempFileOnly());
}

// see PDFBOX-3218
public void testMergeAcroForm() throws IOException
{

PDDocument previous = PDDocument.load(getClass().getClassLoader().getResourceAsStream("input/merge/merge_forms_previous.pdf"));
int expectedFieldsSize = previous.getDocumentCatalog().getAcroForm().getFields().size();

PDDocument srcDoc1 = PDDocument.load(getClass().getClassLoader().getResourceAsStream("input/merge/merge_forms_doc1.pdf"));
expectedFieldsSize += srcDoc1.getDocumentCatalog().getAcroForm().getFields().size();

final PDDocument srcDoc2 = PDDocument.load(getClass().getClassLoader().getResourceAsStream("input/merge/merge_forms_doc2.pdf"));
expectedFieldsSize += srcDoc2.getDocumentCatalog().getAcroForm().getFields().size();

final PDDocument destination = new PDDocument(MemoryUsageSetting.setupMainMemoryOnly());
PDFMergerUtility mergerUtility = new PDFMergerUtility();

mergerUtility.appendDocument(destination, previous);
mergerUtility.appendDocument(destination, srcDoc1);
mergerUtility.appendDocument(destination, srcDoc2);

// assert fields not added multiple times
assertEquals(expectedFieldsSize, destination.getDocumentCatalog().getAcroForm().getFields().size());

final Set<String> fullyQualifiedFieldNames = new HashSet<String>();

destination.getDocumentCatalog().getAcroForm().getFields().forEach(new Consumer<PDField>() {
@Override
public void accept(PDField pdField) {
fullyQualifiedFieldNames.add(pdField.getFullyQualifiedName());
}
});

assertEquals(expectedFieldsSize, fullyQualifiedFieldNames.size());
}

// checks that the result file of a merge has the same rendering as the two
// source files
private void checkMergeIdentical(String filename1, String filename2, String mergeFilename,
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.