/
uri_parser.cr
247 lines (226 loc) · 5.05 KB
/
uri_parser.cr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
class URI
# :nodoc:
struct Parser
# Parser is based on https://url.spec.whatwg.org/ .
# Step names and variables are roughly the same as that document.
# notable deviations from the spec
# does not parse windows slashes
# does not validate port < 2**16-1
# does not validate IPv4 or v6 hosts are valid
# ports greater than 2^16-1 are not errors
property uri : URI
@input : UInt8*
def initialize(input)
@uri = URI.new
@input = input.strip.to_unsafe
@ptr = 0
end
def c
@input[@ptr]
end
def run
parse_scheme_start
self
end
private def parse_scheme_start
if alpha?
parse_scheme
else
parse_no_scheme
end
end
private def parse_scheme
start = @ptr
loop do
if alpha? || numeric? || c === '-' || c === '.' || c === '+'
@ptr += 1
elsif c === ':'
@uri.scheme = from_input(start).downcase
if @input[@ptr + 1] === '/'
@ptr += 2
return parse_path_or_authority
else
@ptr += 1
return parse_path
end
else
@ptr = 0
return parse_no_scheme
end
end
end
private def parse_path_or_authority
if c === '/'
@uri.host = ""
parse_authority
else
@ptr -= 1
parse_path
end
end
private def parse_no_scheme
case c
when '#'
parse_fragment
else
parse_relative
end
end
private def parse_authority
@ptr += 1
start = @ptr
loop do
if c === '@'
@ptr = start
return parse_userinfo
elsif end_of_host?
@ptr = start
return parse_host
else
@ptr += 1
end
end
end
private def parse_userinfo
start = @ptr
password_flag = false
loop do
if c === '@'
if password_flag
@uri.password = URI.decode_www_form(from_input(start))
else
@uri.user = URI.decode_www_form(from_input(start))
end
@ptr += 1
return parse_host
elsif c === ':'
@uri.user = URI.decode_www_form(from_input(start))
password_flag = true
@ptr += 1
start = @ptr
else
@ptr += 1
end
end
end
private def parse_host
start = @ptr
bracket_flag = false
return parse_path if c === '/'
loop do
if c === ':' && !bracket_flag
@uri.host = URI.decode(from_input(start))
@ptr += 1
return parse_port
elsif end_of_host?
@uri.host = URI.decode(from_input(start))
return parse_path
else
bracket_flag = true if c === '['
bracket_flag = false if c === ']'
@ptr += 1
end
end
end
private def parse_port
start = @ptr
loop do
if numeric?
@ptr += 1
elsif end_of_host?
unless start == @ptr
@uri.port = (start...@ptr).reduce(0) do |memo, i|
(memo * 10) + (@input[i] - '0'.ord)
end
end
return parse_path
else
raise URI::Error.new("Invalid URI: bad port at character #{@ptr}")
end
end
end
private def parse_relative
case c
when '\0'
nil
when '/'
parse_relative_slash
when '?'
parse_query
when '#'
parse_fragment
else
parse_path
end
end
private def parse_relative_slash
if @input[@ptr + 1] === '/'
@ptr += 1
@uri.host ||= ""
parse_authority
else
parse_path
end
end
private def parse_path
start = @ptr
loop do
case c
when '\0'
@uri.path = from_input(start)
return nil
when '?'
@uri.path = from_input(start)
return parse_query
when '#'
@uri.path = from_input(start)
return parse_fragment
else
@ptr += 1
end
end
end
private def parse_query
@ptr += 1
start = @ptr
loop do
case c
when '\0'
@uri.query = from_input(start)
return nil
when '#'
@uri.query = from_input(start)
return parse_fragment
else
@ptr += 1
end
end
end
private def parse_fragment
@ptr += 1
start = @ptr
loop do
case c
when '\0'
@uri.fragment = from_input(start)
return nil
else
@ptr += 1
end
end
end
private def from_input(start)
String.new(@input + start, @ptr - start)
end
private def alpha?
('a'.ord <= c && c <= 'z'.ord) ||
('A'.ord <= c && c <= 'Z'.ord)
end
private def numeric?
'0'.ord <= c && c <= '9'.ord
end
private def end_of_host?
c === '\0' || c === '/' || c === '?' || c === '#'
end
end
end