Skip to content

Commit

Permalink
Merge pull request #789 from gjtorikian/external-hash-crash
Browse files Browse the repository at this point in the history
Improved PDF hash handling
  • Loading branch information
gjtorikian committed Jan 20, 2023
2 parents 3aa7073 + d0809a7 commit 2e03f16
Show file tree
Hide file tree
Showing 25 changed files with 578 additions and 162 deletions.
1 change: 1 addition & 0 deletions html-proofer.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
spec.add_dependency("addressable", "~> 2.3")
spec.add_dependency("async", "~> 2.1")
spec.add_dependency("nokogiri", "~> 1.13")
spec.add_dependency("pdf-reader", "~> 2.11")
spec.add_dependency("rainbow", "~> 3.0")
spec.add_dependency("typhoeus", "~> 1.3")
spec.add_dependency("yell", "~> 2.0")
Expand Down
12 changes: 8 additions & 4 deletions lib/html_proofer/attribute/url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,16 @@ def file_path
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
# relative links, path is a file
elsif File.exist?(File.expand_path(path,
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
elsif File.exist?(File.expand_path(
path,
@runner.current_source,
)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
File.dirname(@runner.current_filename)
# relative links in nested dir, path is a file
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
elsif File.exist?(File.join(
File.dirname(@runner.current_filename),
path,
)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
File.dirname(@runner.current_filename)
# relative link, path is a directory
else
Expand Down
10 changes: 8 additions & 2 deletions lib/html_proofer/check.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@ def run
end

def add_failure(description, line: nil, status: nil, content: nil)
@failures << Failure.new(@runner.current_filename, short_name, description, line: line, status: status,
content: content)
@failures << Failure.new(
@runner.current_filename,
short_name,
description,
line: line,
status: status,
content: content,
)
end

def short_name
Expand Down
14 changes: 10 additions & 4 deletions lib/html_proofer/check/favicon.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@ def run

if found
if @favicon.url.protocol_relative?
add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
line: @favicon.line, content: @favicon.content)
add_failure(
"favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
line: @favicon.line,
content: @favicon.content,
)
elsif @favicon.url.remote?
add_to_external_urls(@favicon.url, @favicon.line)
elsif !@favicon.url.exists?
add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
content: @favicon.content)
add_failure(
"internal favicon #{@favicon.url.raw_attribute} does not exist",
line: @favicon.line,
content: @favicon.content,
)
end
else
add_failure("no favicon provided")
Expand Down
49 changes: 35 additions & 14 deletions lib/html_proofer/check/images.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,39 @@ def run
next if @img.ignore?

# screenshot filenames should return because of terrible names
add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line,
content: @img.content) if terrible_filename?
add_failure(
"image has a terrible filename (#{@img.url.raw_attribute})",
line: @img.line,
content: @img.content,
) if terrible_filename?

# does the image exist?
if missing_src?
add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
elsif @img.url.protocol_relative?
add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
line: @img.line, content: @img.content)
add_failure(
"image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
line: @img.line,
content: @img.content,
)
elsif @img.url.remote?
add_to_external_urls(@img.url, @img.line)
elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
content: @img.content)
add_failure(
"internal image #{@img.url.raw_attribute} does not exist",
line: @img.line,
content: @img.content,
)
elsif @img.multiple_srcsets? || @img.multiple_sizes?
@img.srcsets_wo_sizes.each do |srcset|
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)

if srcset_url.protocol_relative?
add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
line: @img.line, content: @img.content)
add_failure(
"image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
line: @img.line,
content: @img.content,
)
elsif srcset_url.remote?
add_to_external_urls(srcset_url.url, @img.line)
elsif !srcset_url.exists?
Expand All @@ -44,16 +56,25 @@ def run
# if this is an img element, check that the alt attribute is present
if @img.img_tag? && !ignore_element?
if missing_alt_tag? && !ignore_missing_alt?
add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line,
content: @img.content)
add_failure(
"image #{@img.url.raw_attribute} does not have an alt attribute",
line: @img.line,
content: @img.content,
)
elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line,
content: @img.content)
add_failure(
"image #{@img.url.raw_attribute} has an alt attribute, but no content",
line: @img.line,
content: @img.content,
)
end
end

add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line,
content: @img.content) if @runner.enforce_https? && @img.url.http?
add_failure(
"image #{@img.url.raw_attribute} uses the http scheme",
line: @img.line,
content: @img.content,
) if @runner.enforce_https? && @img.url.http?
end

external_urls
Expand Down
49 changes: 35 additions & 14 deletions lib/html_proofer/check/links.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ def run
end

if @link.url.protocol_relative?
add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
line: @link.line, content: @link.content)
add_failure(
"#{@link.url} is a protocol-relative URL, use explicit https:// instead",
line: @link.line,
content: @link.content,
)
next
end

Expand All @@ -55,8 +58,11 @@ def run
elsif @link.url.internal?
# does the local directory have a trailing slash?
if @link.url.unslashed_directory?(@link.url.absolute_path)
add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
line: @link.line, content: @link.content)
add_failure(
"internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
line: @link.line,
content: @link.content,
)
next
end

Expand Down Expand Up @@ -88,17 +94,26 @@ def check_schemes

def handle_mailto
if @link.url.path.empty?
add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line,
content: @link.content) unless ignore_empty_mailto?
add_failure(
"#{@link.url.raw_attribute} contains no email address",
line: @link.line,
content: @link.content,
) unless ignore_empty_mailto?
elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line,
content: @link.content)
add_failure(
"#{@link.url.raw_attribute} contains an invalid email address",
line: @link.line,
content: @link.content,
)
end
end

def handle_tel
add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line,
content: @link.content) if @link.url.path.empty?
add_failure(
"#{@link.url.raw_attribute} contains no phone number",
line: @link.line,
content: @link.content,
) if @link.url.path.empty?
end

def ignore_empty_mailto?
Expand All @@ -113,13 +128,19 @@ def check_sri
return unless SRI_REL_TYPES.include?(@link.node["rel"])

if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line,
content: @link.content)
add_failure(
"SRI and CORS not provided in: #{@link.url.raw_attribute}",
line: @link.line,
content: @link.content,
)
elsif blank?(@link.node["integrity"])
add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
elsif blank?(@link.node["crossorigin"])
add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line,
content: @link.content)
add_failure(
"CORS not provided for external resource in: #{@link.link.url.raw_attribute}",
line: @link.line,
content: @link.content,
)
end
end

Expand Down
14 changes: 10 additions & 4 deletions lib/html_proofer/check/open_graph.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@ def run
elsif !@open_graph.url.valid?
add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
elsif @open_graph.url.protocol_relative?
add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
line: @open_graph.line, content: @open_graph.content)
add_failure(
"open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
line: @open_graph.line,
content: @open_graph.content,
)
elsif @open_graph.url.remote?
add_to_external_urls(@open_graph.url, @open_graph.line)
else
add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line,
content: @open_graph.content) unless @open_graph.url.exists?
add_failure(
"internal open graph #{@open_graph.url.raw_attribute} does not exist",
line: @open_graph.line,
content: @open_graph.content,
) unless @open_graph.url.exists?
end
end

Expand Down
35 changes: 25 additions & 10 deletions lib/html_proofer/check/scripts.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@ def run
if missing_src?
add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
elsif @script.url.protocol_relative?
add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
line: @script.line, content: @script.content)
add_failure(
"script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
line: @script.line,
content: @script.content,
)
elsif @script.url.remote?
add_to_external_urls(@script.url, @script.line)
check_sri if @runner.check_sri?
elsif !@script.url.exists?
add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
content: @script.content)
add_failure(
"internal script reference #{@script.src} does not exist",
line: @script.line,
content: @script.content,
)
end
end

Expand All @@ -34,14 +40,23 @@ def missing_src?

def check_sri
if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"])
add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line,
content: @script.content)
add_failure(
"SRI and CORS not provided in: #{@script.url.raw_attribute}",
line: @script.line,
content: @script.content,
)
elsif blank?(@script.node["integrity"])
add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line,
content: @script.content)
add_failure(
"Integrity is missing in: #{@script.url.raw_attribute}",
line: @script.line,
content: @script.content,
)
elsif blank?(@script.node["crossorigin"])
add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line,
content: @script.content)
add_failure(
"CORS not provided for external resource in: #{@script.url.raw_attribute}",
line: @script.line,
content: @script.content,
)
end
end
end
Expand Down

0 comments on commit 2e03f16

Please sign in to comment.