Skip to content

Commit

Permalink
HP Scan invalid Length workaround
Browse files Browse the repository at this point in the history
  • Loading branch information
mfazekas committed May 3, 2022
1 parent 79d5c37 commit 8b6a31c
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion lib/combine_pdf/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class PDFParser
attr_reader :info_object, :root_object, :names_object, :forms_object, :outlines_object, :metadata

attr_reader :allow_optional_content
attr_reader :relaxed
# when creating a parser, it is important to set the data (String) we wish to parse.
#
# <b>the data is required and it is not possible to set the data at a later stage</b>
Expand All @@ -58,6 +59,7 @@ def initialize(string, options = {})
@version = nil
@scanner = nil
@allow_optional_content = options[:allow_optional_content]
@relaxed = options[:relaxed]
end

# parse the data in the new parser (the data already set through the initialize / new method)
Expand Down Expand Up @@ -361,7 +363,21 @@ def _parse_
# advance by the publshed stream length (if any)
old_pos = @scanner.pos
if(out.last.is_a?(Hash) && out.last[:Length].is_a?(Integer) && out.last[:Length] > 2)
@scanner.pos += out.last[:Length] - 2
begin
@scanner.pos += out.last[:Length] - 2
rescue RangeError => error
raise error unless @relaxed
oldpos = @scanner.pos
skipped = @scanner.skip_until(/endstream/)
if skipped
len = skipped - 'endstream'.length
warn "CombinePDF parser: invalid length: #{out.last[:Length]} for object: #{out.last} should be: #{len}"
@scanner.pos = oldpos
@scanner.pos += len
else
raise ParsingError, "Parsing Error: PDF file error - a stream object with invalid length of #{out.last[:Length]} for object #{out.last} and no endstream found, to work around it"
end
end
end

# the following was dicarded because some PDF files didn't have an EOL marker as required
Expand Down

0 comments on commit 8b6a31c

Please sign in to comment.