diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java index 5004d3ea2d4..bceb9d430d0 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java @@ -92,7 +92,7 @@ public void parse() throws IOException try { Map offsets = readOffsets(); - streamObjects = new ArrayList( numberOfObjects ); + streamObjects = new ArrayList(offsets.size()); for (Entry offset : offsets.entrySet()) { COSBase cosObject = parseObject(offset.getKey()); @@ -128,8 +128,14 @@ private Map readOffsets() throws IOException // but we can't rely on that, so that we have to sort the offsets // as the sequential parsers relies on it, see PDFBOX-4927 Map objectNumbers = new TreeMap(); + long firstObjectPosition = seqSource.getPosition() + firstObject - 1; for (int i = 0; i < numberOfObjects; i++) { + // don't read beyond the part of the stream reserved for the object numbers + if (seqSource.getPosition() >= firstObjectPosition) + { + break; + } long objectNumber = readObjectNumber(); int offset = (int) readLong(); objectNumbers.put(offset, objectNumber);