/
parser.cr
129 lines (112 loc) · 3.84 KB
/
parser.cr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
module MIME::Multipart
# Parses multipart MIME messages.
#
# ### Example
#
# ```
# require "mime/multipart"
#
# multipart = "--aA40\r\nContent-Type: text/plain\r\n\r\nbody\r\n--aA40--"
# parser = MIME::Multipart::Parser.new(IO::Memory.new(multipart), "aA40")
#
# while parser.has_next?
# parser.next do |headers, io|
# headers["Content-Type"] # => "text/plain"
# io.gets_to_end # => "body"
# end
# end
# ```
#
# Please note that the IO object yielded by `#next` is only valid until the
# block returns.
class Parser
# Creates a new `Multipart::Parser` which parses *io* with multipart
# boundary *boundary*.
def initialize(@io : IO, @boundary : String)
@state = State::PREAMBLE
@dash_boundary = "--#{@boundary}"
@delimiter = "\r\n#{@dash_boundary}"
end
# Parses the next body part and yields headers as `HTTP::Headers` and the
# body text as an `IO`.
#
# This method yields once instead of returning the values, because the IO
# object yielded to the block is only valid while the block is executing.
# The IO object will be closed as soon as the block returns. To store the
# content of the body part for longer than the block, the IO must be read
# into memory.
#
# ```
# require "mime/multipart"
#
# multipart = "--aA40\r\nContent-Type: text/plain\r\n\r\nbody\r\n--aA40--"
# parser = MIME::Multipart::Parser.new(IO::Memory.new(multipart), "aA40")
# parser.next do |headers, io|
# headers["Content-Type"] # => "text/plain"
# io.gets_to_end # => "body"
# end
# ```
def next(&)
raise Multipart::Error.new "Multipart parser already finished parsing" if @state.finished?
raise Multipart::Error.new "Multipart parser is in an errored state" if @state.errored?
if @state.preamble?
# Discard preamble
preamble_io = IO::Delimited.new(@io, read_delimiter: @dash_boundary)
preamble_io.skip_to_end
fail("no parts") if close_delimiter?
@state = State::BODY_PART
end
if @state.body_part?
body_io = IO::Delimited.new(@io, read_delimiter: @delimiter)
headers = parse_headers(body_io)
begin
yield headers, body_io
ensure
body_io.skip_to_end
body_io.close
@state = State::FINISHED if close_delimiter?
end
end
rescue ex
@state = State::ERRORED
raise ex
end
# True if `#next` can be called legally.
def has_next? : Bool
!@state.finished? && !@state.errored?
end
private def parse_headers(io)
headers = HTTP::Headers.new
while line = io.gets(chomp: false)
if line == "\r\n"
# Finished parsing
return headers
end
name, value = HTTP.parse_header(line)
headers.add(name, value)
end
headers
end
# This method is used directly after reading a boundary, to determine if
# it's a close delimiter or not.
#
# If it's not a close delimiter, it eats the transport padding and crlf
# after a delimiter.
private def close_delimiter?
transport_padding_crlf = @io.gets("\r\n")
fail("EOF reading delimiter") unless transport_padding_crlf
if transport_padding_crlf != "\r\n"
return true if transport_padding_crlf.starts_with?("--")
fail("EOF reading delimiter padding") unless transport_padding_crlf.ends_with?("\r\n")
0.upto(transport_padding_crlf.bytesize - 3) do |i| # 3 constant to ignore "\r\n" at end
byte = transport_padding_crlf.to_unsafe[i]
fail("padding contained non-whitespace character") unless byte.in?(' '.ord, '\t'.ord)
end
end
false
end
private def fail(msg)
raise Multipart::Error.new "Failed to parse multipart message: " + msg
end
end
end