-
-
Notifications
You must be signed in to change notification settings - Fork 192
/
links.rb
154 lines (125 loc) · 5.14 KB
/
links.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# frozen_string_literal: true
class LinkCheck < ::HTMLProofer::Check
include HTMLProofer::Utils
def missing_href?
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
end
def placeholder?
(!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
end
def run
@html.css('a, link').each do |node|
@link = create_element(node)
line = node.line
content = node.to_s
next if @link.ignore?
next if placeholder?
next if @link.allow_hash_href? && @link.href == '#'
# is it even a valid URL?
unless @link.valid?
add_issue("#{@link.href} is an invalid URL", line: line, content: content)
next
end
check_schemes(@link, line, content)
# is there even an href?
if missing_href?
next if @link.allow_missing_href?
# HTML5 allows dropping the href: http://git.io/vBX0z
next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
add_issue('anchor has no href attribute', line: line, content: content)
next
end
# intentionally here because we still want valid? & missing_href? to execute
next if @link.non_http_remote?
if !@link.internal? && @link.remote?
check_sri(line, content) if @link.check_sri? && node.name == 'link'
# we need to skip these for now; although the domain main be valid,
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
add_to_external_urls(@link.href)
next
elsif @link.internal? && !@link.exists?
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
end
# does the local directory have a trailing slash?
if @link.unslashed_directory? @link.absolute_path
add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
next
end
# verify the target hash
handle_hash(@link, line, content) if @link.hash
end
external_urls
end
def check_schemes(link, line, content)
case link.scheme
when 'mailto'
handle_mailto(link, line, content)
when 'tel'
handle_tel(link, line, content)
when 'http'
return unless @options[:enforce_https]
add_issue("#{link.href} is not an HTTPS link", line: line, content: content)
end
end
def handle_mailto(link, line, content)
if link.path.empty?
add_issue("#{link.href} contains no email address", line: line, content: content)
elsif !link.path.include?('@')
add_issue("#{link.href} contains an invalid email address", line: line, content: content)
end
end
def handle_tel(link, line, content)
add_issue("#{link.href} contains no phone number", line: line, content: content) if link.path.empty?
end
def handle_hash(link, line, content)
if link.internal? && !hash_check(link.html, link.hash)
add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
elsif link.external?
external_link_check(link, line, content)
end
end
def external_link_check(link, line, content)
if !link.exists?
add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
else
target_html = create_nokogiri link.absolute_path
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
end
end
def hash_check(html, href_hash)
decoded_href_hash = Addressable::URI.unescape(href_hash)
fragment_ids = [href_hash, decoded_href_hash]
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
fragment_ids.include?('top') || !find_fragments(html, fragment_ids).empty?
end
def find_fragments(html, fragment_ids)
xpaths = fragment_ids.flat_map do |frag_id|
escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
[
"//*[case_sensitive_equals(@id, concat(#{escaped_frag_id}))]",
"//*[case_sensitive_equals(@name, concat(#{escaped_frag_id}))]"
]
end
xpaths << XpathFunctions.new
html.xpath(*xpaths)
end
# Whitelist for affected elements from Subresource Integrity specification
# https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
SRI_REL_TYPES = %(stylesheet)
def check_sri(line, content)
return unless SRI_REL_TYPES.include?(@link.rel)
if !defined?(@link.integrity) && !defined?(@link.crossorigin)
add_issue("SRI and CORS not provided in: #{@link.src}", line: line, content: content)
elsif !defined?(@link.integrity)
add_issue("Integrity is missing in: #{@link.src}", line: line, content: content)
elsif !defined?(@link.crossorigin)
add_issue("CORS not provided for external resource in: #{@link.src}", line: line, content: content)
end
end
class XpathFunctions
def case_sensitive_equals(node_set, str_to_match)
node_set.find_all { |node| node.to_s. == str_to_match.to_s }
end
end
end