/
uris.rb
139 lines (121 loc) · 5.64 KB
/
uris.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
require 'berkeley_library/logging'
require 'berkeley_library/util/uris/appender'
require 'berkeley_library/util/uris/requester'
require 'berkeley_library/util/uris/validator'
module BerkeleyLibrary
module Util
module URIs
include BerkeleyLibrary::Logging
UTF_8 = Encoding::UTF_8
class << self
include URIs
end
# Appends the specified paths to the path of the specified URI, removing any extraneous slashes
# and merging additional query parameters, and returns a new URI with that path and the same scheme,
# host, query, fragment, etc. as the original.
#
# @param uri [URI, String] the original URI
# @param elements [Array<String, Symbol>] the URI elements to join.
# @return [URI] a new URI appending the joined path elements.
# @raise URI::InvalidComponentError if appending the specified elements would create an invalid URI
def append(uri, *elements)
Appender.new(uri, *elements).to_uri
end
# Performs a GET request and returns the response body as a string.
#
# @param uri [URI, String] the URI to GET
# @param params [Hash] the query parameters to add to the URI. (Note that the URI may already include query parameters.)
# @param headers [Hash] the request headers.
# @param log [Boolean] whether to log each request URL and response code
# @return [String] the body as a string.
# @raise [RestClient::Exception] in the event of an unsuccessful request.
def get(uri, params: {}, headers: {}, log: true)
Requester.get(uri, params: params, headers: headers, log: log)
end
# Performs a HEAD request and returns the response status as an integer.
# Note that unlike {Requester#get}, this does not raise an error in the
# event of an unsuccessful request.
#
# @param uri [URI, String] the URI to HEAD
# @param params [Hash] the query parameters to add to the URI. (Note that the URI may already include query parameters.)
# @param headers [Hash] the request headers.
# @param log [Boolean] whether to log each request URL and response code
# @return [Integer] the response code as an integer.
def head(uri, params: {}, headers: {}, log: true)
Requester.head(uri, params: params, headers: headers, log: log)
end
# Performs a GET request and returns the response, even in the event of
# a failed request.
#
# @param uri [URI, String] the URI to GET
# @param params [Hash] the query parameters to add to the URI. (Note that the URI may already include query parameters.)
# @param headers [Hash] the request headers.
# @param log [Boolean] whether to log each request URL and response code
# @return [RestClient::Response] the response
def get_response(uri, params: {}, headers: {}, log: true)
Requester.get_response(uri, params: params, headers: headers, log: log)
end
# Performs a HEAD request and returns the response, even in the event of
# a failed request.
#
# @param uri [URI, String] the URI to HEAD
# @param params [Hash] the query parameters to add to the URI. (Note that the URI may already include query parameters.)
# @param headers [Hash] the request headers.
# @param log [Boolean] whether to log each request URL and response code
# @return [RestClient::Response] the response
def head_response(uri, params: {}, headers: {}, log: true)
Requester.head_response(uri, params: params, headers: headers, log: log)
end
# Returns the specified URL as a URI, or `nil` if the URL is `nil`.
# @param url [String, URI, nil] the URL.
# @return [URI] the URI, or `nil`.
# @raise [URI::InvalidURIError] if `url` is not `nil` and cannot be
# parsed as a URI.
def uri_or_nil(url)
Validator.uri_or_nil(url)
end
# Escapes the specified string so that it can be used as a URL path segment,
# replacing disallowed characters (including /) with percent-encodings as needed.
def path_escape(s)
raise ArgumentError, "Can't escape #{s.inspect}: not a string" unless s.respond_to?(:encoding)
s = s.encode(UTF_8) unless s.encoding == UTF_8
''.tap do |escaped|
s.bytes.each do |b|
escaped << (should_escape?(b, :path_segment) ? '%%%02X' % b : b.chr)
end
end
end
# Returns the specified URL as a URI, or `nil` if the URL cannot
# be parsed.
# @param url [Object, nil] the URL.
# @return [URI, nil] the URI, or `nil`.
def safe_parse_uri(url)
# noinspection RubyMismatchedArgumentType
uri_or_nil(url)
rescue URI::InvalidURIError => e
logger.warn("Error parsing URL #{url.inspect}", e)
nil
end
private
# TODO: extend to cover other modes - host, zone, path, password, query, fragment
# cf. https://github.com/golang/go/blob/master/src/net/url/url.go
ALLOWED_BYTES_BY_MODE = {
path_segment: [0x24, 0x26, 0x2b, 0x3a, 0x3d, 0x40] # @ & = + $
}.freeze
def should_escape?(b, mode)
return false if unreserved?(b)
return false if ALLOWED_BYTES_BY_MODE[mode].include?(b)
true
end
# rubocop:disable Metrics/CyclomaticComplexity
def unreserved?(byte)
return true if byte >= 0x41 && byte <= 0x5a # A-Z
return true if byte >= 0x61 && byte <= 0x7a # a-z
return true if byte >= 0x30 && byte <= 0x39 # 0-9
return true if [0x2d, 0x2e, 0x5f, 0x7e].include?(byte) # - . _ ~
false
end
# rubocop:enable Metrics/CyclomaticComplexity
end
end
end