Skip to content

Commit

Permalink
Merge branch 'master' into html5
Browse files Browse the repository at this point in the history
  • Loading branch information
gjtorikian committed Dec 10, 2019
2 parents 4f94fed + c120b7c commit ac93f46
Show file tree
Hide file tree
Showing 33 changed files with 213 additions and 137 deletions.
12 changes: 12 additions & 0 deletions .github/FUNDING.yml
@@ -0,0 +1,12 @@
# These are supported funding model platforms

github: gjtorikian
patreon: gjtorikian
open_collective: garen-torikian
#ko_fi: # Replace with a single Ko-fi username
#tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
#community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
#liberapay: # Replace with a single Liberapay username
issuehunt: gjtorikian
#otechie: # Replace with a single Otechie username
#custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
3 changes: 3 additions & 0 deletions .rubocop.yml
Expand Up @@ -11,3 +11,6 @@ RequireParentheses:

Naming/FileName:
Enabled: false

Style/Documentation:
Enabled: false
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -297,6 +297,7 @@ The `HTMLProofer` constructor takes an optional hash of additional options:
| `internal_domains`| An array of Strings containing domains that will be treated as internal urls. | `[]` |
| `log_level` | Sets the logging level, as determined by [Yell](https://github.com/rudionrails/yell). One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. | `:info`
| `only_4xx` | Only reports errors for links that fall within the 4xx status code range. | `false` |
| `root_dir` | The absolute path to the directory serving your html-files. Used when running html-proofer on a file, rather than a directory. | "" |
| `typhoeus_config` | A JSON-formatted string. Parsed using `JSON.parse` and mapped on top of the default configuration values so that they can be overridden. | `{}` |
| `url_ignore` | An array of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored. | `[]` |
| `url_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. | `{}` |
Expand Down
2 changes: 1 addition & 1 deletion Rakefile
Expand Up @@ -11,7 +11,7 @@ require 'rubocop/rake_task'

RuboCop::RakeTask.new(:rubocop)

task default: %i(spec proof_readme)
task default: %i[spec proof_readme]

task :proof_readme do
require 'html-proofer'
Expand Down
9 changes: 3 additions & 6 deletions bin/htmlproofer
Expand Up @@ -48,6 +48,7 @@ Mercenary.program(:htmlproofer) do |p|
p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
p.option 'root_dir', '--root-folder PATH', String, 'The absolute path to the directory serving your html-files. Used when running html-proofer on a file, rather than a directory.'

p.action do |args, opts|
args = ['.'] if args.empty?
Expand All @@ -57,9 +58,7 @@ Mercenary.program(:htmlproofer) do |p|

# prepare everything to go to proofer
p.options.reject { |o| opts[o.config_key].nil? }.each do |option|
if opts[option.config_key].is_a?(Array)
opts[option.config_key] = opts[option.config_key].map { |i| HTMLProofer::Configuration.to_regex?(i) }
end
opts[option.config_key] = opts[option.config_key].map { |i| HTMLProofer::Configuration.to_regex?(i) } if opts[option.config_key].is_a?(Array)
options[option.config_key.to_sym] = opts[option.config_key]
end

Expand All @@ -84,9 +83,7 @@ Mercenary.program(:htmlproofer) do |p|
options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?

unless opts['typhoeus_config'].nil?
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'])
end
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?

unless opts['timeframe'].nil?
options[:cache] ||= {}
Expand Down
25 changes: 12 additions & 13 deletions html-proofer.gemspec
@@ -1,6 +1,6 @@
# frozen_string_literal: true

$LOAD_PATH.push File.expand_path('../lib', __FILE__)
$LOAD_PATH.push File.expand_path('lib', __dir__)
require 'html-proofer/version'

Gem::Specification.new do |gem|
Expand All @@ -12,31 +12,30 @@ Gem::Specification.new do |gem|
gem.summary = %(A set of tests to validate your HTML output. These tests check if your image references are legitimate, if they have alt tags, if your internal links are working, and so on. It's intended to be an all-in-one checker for your documentation output.)
gem.homepage = 'https://github.com/gjtorikian/html-proofer'
gem.license = 'MIT'
gem.executables = ['htmlproofer']
all_files = `git ls-files -z`.split("\x0")
gem.files = all_files.grep(%r{^(bin|lib)/})
gem.executables = all_files.grep(%r{^bin/}) { |f| File.basename(f) }
gem.test_files = gem.files.grep(%r{^(spec)/})
gem.require_paths = ['lib']

gem.add_dependency 'mercenary', '~> 0.3.2'
gem.add_dependency 'nokogumbo', '>= 2.0.0.alpha', '< 3'
gem.add_dependency 'addressable', '~> 2.3'
gem.add_dependency 'mercenary', '~> 0.3'
gem.add_dependency 'nokogumbo', '~> 2.0'
gem.add_dependency 'colorize', '~> 0.8'
gem.add_dependency 'parallel', '~> 1.3'
gem.add_dependency 'rainbow', '~> 3.0'
gem.add_dependency 'typhoeus', '~> 1.3'
gem.add_dependency 'yell', '~> 2.0'
gem.add_dependency 'parallel', '~> 1.3'
gem.add_dependency 'addressable', '~> 2.3'

gem.add_development_dependency 'awesome_print'
gem.add_development_dependency 'codecov'
gem.add_development_dependency 'pry-byebug'
gem.add_development_dependency 'rake'
gem.add_development_dependency 'redcarpet'
gem.add_development_dependency 'rspec', '~> 3.1'
gem.add_development_dependency 'rubocop'
gem.add_development_dependency 'rubocop-standard'
gem.add_development_dependency 'rubocop-performance'
gem.add_development_dependency 'codecov'
gem.add_development_dependency 'rspec', '~> 3.1'
gem.add_development_dependency 'rake'
gem.add_development_dependency 'pry-byebug'
gem.add_development_dependency 'awesome_print'
gem.add_development_dependency 'vcr', '~> 2.9'
gem.add_development_dependency 'rubocop-standard'
gem.add_development_dependency 'timecop', '~> 0.8'
gem.add_development_dependency 'vcr', '~> 2.9'
end
18 changes: 9 additions & 9 deletions lib/html-proofer.rb
Expand Up @@ -17,38 +17,38 @@ def require_all(path)
begin
require 'awesome_print'
require 'pry-byebug'
rescue LoadError; end
rescue LoadError; end # rubocop:disable Lint/HandleExceptions
module HTMLProofer
def check_file(file, options = {})
def self.check_file(file, options = {})
raise ArgumentError unless file.is_a?(String)
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)

options[:type] = :file
HTMLProofer::Runner.new(file, options)
end
module_function :check_file

def check_directory(directory, options = {})
def self.check_directory(directory, options = {})
raise ArgumentError unless directory.is_a?(String)
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)

options[:type] = :directory
HTMLProofer::Runner.new([directory], options)
end
module_function :check_directory

def check_directories(directories, options = {})
def self.check_directories(directories, options = {})
raise ArgumentError unless directories.is_a?(Array)

options[:type] = :directory
directories.each do |directory|
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
end
HTMLProofer::Runner.new(directories, options)
end
module_function :check_directories

def check_links(links, options = {})
def self.check_links(links, options = {})
raise ArgumentError unless links.is_a?(Array)

options[:type] = :links
HTMLProofer::Runner.new(links, options)
end
module_function :check_links
end
20 changes: 6 additions & 14 deletions lib/html-proofer/cache.rb
Expand Up @@ -9,7 +9,7 @@ class Cache
include HTMLProofer::Utils

DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
DEFAULT_CACHE_FILE_NAME = 'cache.log'.freeze
DEFAULT_CACHE_FILE_NAME = 'cache.log'

attr_reader :exists, :cache_log, :storage_dir, :cache_file

Expand Down Expand Up @@ -120,9 +120,8 @@ def retrieve_urls(external_urls)
@cache_log.each_pair do |url, cache|
if within_timeframe?(cache['time'])
next if cache['message'].empty? # these were successes to skip
urls_to_check[url] = cache['filenames'] # these are failures to retry
else
urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
urls_to_check[url] = cache['filenames'] # recheck expired links
end
end
urls_to_check
Expand All @@ -142,23 +141,16 @@ def clean_url(url)
end

def setup_cache!(options)
@storage_dir = if options[:storage_dir]
options[:storage_dir]
else
DEFAULT_STORAGE_DIR
end
@storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR

FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)

cache_file_name = if options[:cache_file]
options[:cache_file]
else
DEFAULT_CACHE_FILE_NAME
end
cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME

@cache_file = File.join(storage_dir, cache_file_name)

return unless File.exist?(cache_file)

contents = File.read(cache_file)
@cache_log = contents.empty? ? {} : JSON.parse(contents)
end
Expand All @@ -174,7 +166,7 @@ def time_ago(measurement, unit)
when :days
@cache_datetime - measurement
when :hours
@cache_datetime - Rational(measurement/24.0)
@cache_datetime - Rational(measurement / 24.0)
end.to_time
end
end
Expand Down
2 changes: 2 additions & 0 deletions lib/html-proofer/check.rb
Expand Up @@ -29,6 +29,7 @@ def add_issue(desc, line: nil, status: -1, content: nil)

def add_to_external_urls(url)
return if @external_urls[url]

add_path_for_url(url)
end

Expand All @@ -45,6 +46,7 @@ def self.subchecks

ObjectSpace.each_object(Class) do |c|
next unless c.superclass == self

classes << c
end

Expand Down
12 changes: 7 additions & 5 deletions lib/html-proofer/check/favicon.rb
Expand Up @@ -6,22 +6,24 @@ def run
@html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |node|
favicon = create_element(node)
next if favicon.ignore?

found = true if favicon.rel.split(' ').last.eql? 'icon'
break if found
end

return if found

return if is_immediate_redirect?
return if immediate_redirect?

add_issue('no favicon specified')
end

private

def is_immediate_redirect?
# allow any instant-redirect meta tag
@html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;' rescue false
# allow any instant-redirect meta tag
def immediate_redirect?
@html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;'
rescue StandardError
false
end

end
12 changes: 6 additions & 6 deletions lib/html-proofer/check/html.rb
Expand Up @@ -2,16 +2,16 @@

class HtmlCheck < ::HTMLProofer::Check
# tags embedded in scripts are used in templating languages: http://git.io/vOovv
SCRIPT_EMBEDS_MSG = /Element script embeds close tag/
INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/
INVALID_PREFIX = /Namespace prefix/
PARSE_ENTITY_REF = /htmlParseEntityRef: no name/
DOCTYPE_MSG = /The doctype must be the first token in the document/
SCRIPT_EMBEDS_MSG = /Element script embeds close tag/.freeze
INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
INVALID_PREFIX = /Namespace prefix/.freeze
PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
DOCTYPE_MSG = /The doctype must be the first token in the document/.freeze

def run
@html.errors.each do |error|
add_issue(error.message, line: error.line) if report?(error.message)
end
end
end

def report?(message)
Expand Down
10 changes: 3 additions & 7 deletions lib/html-proofer/check/images.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: true

class ImageCheck < ::HTMLProofer::Check
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze

def empty_alt_tag?
@img.alt.nil? || @img.alt.strip.empty?
Expand Down Expand Up @@ -38,13 +38,9 @@ def run
add_issue("internal image #{@img.url} does not exist", line: line, content: content)
end

if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?
add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content)
end
add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content) if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?

if @img.check_img_http? && @img.scheme == 'http'
add_issue("image #{@img.url} uses the http scheme", line: line, content: content)
end
add_issue("image #{@img.url} uses the http scheme", line: line, content: content) if @img.check_img_http? && @img.scheme == 'http'
end

external_urls
Expand Down
10 changes: 6 additions & 4 deletions lib/html-proofer/check/links.rb
Expand Up @@ -35,6 +35,7 @@ def run
next if @link.allow_missing_href?
# HTML5 allows dropping the href: http://git.io/vBX0z
next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)

add_issue('anchor has no href attribute', line: line, content: content)
next
end
Expand All @@ -47,9 +48,10 @@ def run
# we need to skip these for now; although the domain main be valid,
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'

add_to_external_urls(@link.href)
next
elsif !@link.internal? && !@link.exists?
elsif @link.internal? && !@link.exists?
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
end

Expand All @@ -74,6 +76,7 @@ def check_schemes(link, line, content)
handle_tel(link, line, content)
when 'http'
return unless @options[:enforce_https]

add_issue("#{link.href} is not an HTTPS link", line: line, content: content)
end
end
Expand Down Expand Up @@ -103,9 +106,7 @@ def external_link_check(link, line, content)
add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
else
target_html = create_nokogiri link.absolute_path
unless hash_check target_html, link.hash
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content)
end
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
end
end

Expand Down Expand Up @@ -135,6 +136,7 @@ def find_fragments(html, fragment_ids)

def check_sri(line, content)
return unless SRI_REL_TYPES.include?(@link.rel)

if !defined?(@link.integrity) && !defined?(@link.crossorigin)
add_issue("SRI and CORS not provided in: #{@link.src}", line: line, content: content)
elsif !defined?(@link.integrity)
Expand Down
1 change: 0 additions & 1 deletion lib/html-proofer/check/opengraph.rb
@@ -1,4 +1,3 @@
# encoding: utf-8
# frozen_string_literal: true

class OpenGraphElement < ::HTMLProofer::Element
Expand Down
10 changes: 5 additions & 5 deletions lib/html-proofer/configuration.rb
Expand Up @@ -66,19 +66,19 @@ def self.to_regex?(item)
end

def self.parse_json_option(option_name, config)
raise ArgumentError.new('Must provide an option name in string format.') unless option_name.is_a?(String)
raise ArgumentError.new('Must provide an option name in string format.') unless !option_name.strip.empty?
raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?

return {} if config.nil?

raise ArgumentError.new('Must provide a JSON configuration in string format.') unless config.is_a?(String)
raise ArgumentError, 'Must provide a JSON configuration in string format.' unless config.is_a?(String)

return {} if config.strip.empty?

begin
JSON.parse(config)
rescue
raise ArgumentError.new("Option '" + option_name + "' did not contain valid JSON.")
rescue StandardError
raise ArgumentError, "Option '" + option_name + "' did not contain valid JSON."
end
end
end
Expand Down

0 comments on commit ac93f46

Please sign in to comment.