Permalink
Browse files

Revise uses of encoding APIs.

When originally implemented, we still supported Ruby 1.8, which
necessitated checking for encoding methods and using a regex to validate
UTF-8. These checks are now gone.

We tagged many strings as binary when not strictly necessary, either
because we were just going to iterate their bytes or because we were
going to hand them off to the caller which should just write them
directly to a socket. Strings used as buffers to accumulate streaming
input are still tagged as binary to avoid encoding
collision/conversion.

The places where we do need to tag as UTF-8 (i.e. just before emitting
to the application) remain, but copy the string if necessary. This
allows us to work with frozen strings.

Finally, strings passed in via the Driver#text method should be
*transcoded* to UTF-8 if necessary, not merely tagged. The Ruby
String#encode method produces a new string so this should also be safe
with frozen strings.
  • Loading branch information...
1 parent 115d82b commit 9ce857b3d4c48f8c086fac1bc19b4e4d59160c13 @jcoglan jcoglan committed May 19, 2016
@@ -33,13 +33,12 @@ def Mask.mask(payload, mask)
end
end
- unless String.instance_methods.include?(:force_encoding)
- require root + '/utf8_match'
- end
-
MAX_LENGTH = 0x3ffffff
STATES = [:connecting, :open, :closing, :closed]
+ BINARY = 'ASCII-8BIT'
+ UNICODE = 'UTF-8'
+
ConnectEvent = Struct.new(nil)
OpenEvent = Struct.new(nil)
MessageEvent = Struct.new(:data)
@@ -94,13 +93,14 @@ def start
return false unless @ready_state == 0
response = handshake_response
return false unless response
- @socket.write(Driver.encode(response, :binary))
+ @socket.write(response)
open unless @stage == -1
true
end
def text(message)
- frame(message)
+ message = message.encode(UNICODE) unless message.encoding.name == UNICODE
+ frame(message, :text)
end
def binary(message)
@@ -159,14 +159,15 @@ def self.encode(string, encoding = nil)
case string
when Array then
string = string.pack('C*')
- encoding ||= :binary
+ encoding ||= BINARY
when String then
- encoding ||= :utf8
+ encoding ||= UNICODE
+ end
+ unless string.encoding.name == encoding
+ string = string.dup if string.frozen?
+ string.force_encoding(encoding)
end
- encodings = {:utf8 => 'UTF-8', :binary => 'ASCII-8BIT'}
- string.force_encoding(encodings[encoding]) if string.respond_to?(:force_encoding)
- return nil if encoding == :utf8 and not valid_utf8?(string)
- string
+ string.valid_encoding? ? string : nil
end
def self.validate_options(options, valid_keys)
@@ -177,14 +178,6 @@ def self.validate_options(options, valid_keys)
end
end
- def self.valid_utf8?(string)
- if defined?(UTF8_MATCH)
- UTF8_MATCH =~ string ? true : false
- else
- string.valid_encoding?
- end
- end
-
def self.websocket?(env)
connection = env['HTTP_CONNECTION'] || ''
upgrade = env['HTTP_UPGRADE'] || ''
@@ -53,7 +53,7 @@ def proxy(origin, options = {})
def start
return false unless @ready_state == -1
- @socket.write(Driver.encode(handshake_request, :binary))
+ @socket.write(handshake_request)
@ready_state = 0
true
end
@@ -56,7 +56,7 @@ def parse(chunk)
when 2 then
if octet == 0xFF
@stage = 0
- emit(:message, MessageEvent.new(Driver.encode(@buffer, :utf8)))
+ emit(:message, MessageEvent.new(Driver.encode(@buffer, UNICODE)))
else
if @length
@skipped += 1
@@ -8,7 +8,7 @@ def initialize(socket, options = {})
super
input = @socket.env['rack.input']
@stage = -1
- @body = Driver.encode(input ? input.read : '', :binary)
+ @body = (input ? input.read : String.new('')).force_encoding(BINARY)
@headers.clear
@headers['Upgrade'] = 'WebSocket'
@@ -70,7 +70,7 @@ def handshake_signature
def send_handshake_body
return unless signature = handshake_signature
- @socket.write(Driver.encode(signature, :binary))
+ @socket.write(signature)
@stage = 0
open
parse(@body[BODY_SIZE..-1]) if @body.bytesize > BODY_SIZE
@@ -129,10 +129,6 @@ def parse(chunk)
end
end
- def text(message)
- frame(message, :text)
- end
-
def binary(message)
frame(message, :binary)
end
@@ -356,7 +352,7 @@ def emit_frame(buffer)
when OPCODES[:close] then
code = (bytesize >= 2) ? payload.unpack(PACK_FORMATS[2]).first : nil
- reason = (bytesize > 2) ? Driver.encode(bytes[2..-1] || [], :utf8) : nil
+ reason = (bytesize > 2) ? Driver.encode(bytes[2..-1] || [], UNICODE) : nil
unless (bytesize == 0) or
(code && code >= MIN_RESERVED_ERROR && code <= MAX_RESERVED_ERROR) or
@@ -374,7 +370,7 @@ def emit_frame(buffer)
frame(payload, :pong)
when OPCODES[:pong] then
- message = Driver.encode(payload, :utf8)
+ message = Driver.encode(payload, UNICODE)
callback = @ping_callbacks[message]
@ping_callbacks.delete(message)
callback.call if callback
@@ -391,7 +387,7 @@ def emit_message
case message.opcode
when OPCODES[:text] then
- payload = Driver.encode(payload, :utf8)
+ payload = Driver.encode(payload, UNICODE)
when OPCODES[:binary]
payload = payload.bytes.to_a
end
@@ -14,7 +14,7 @@ def initialize
@rsv2 = false
@rsv3 = false
@opcode = nil
- @data = Driver.encode('', :binary)
+ @data = String.new('').force_encoding(BINARY)
end
def <<(frame)
@@ -44,7 +44,7 @@ def start
start = "CONNECT #{@origin.host}:#{port} HTTP/1.1"
headers = [start, @headers.to_s, '']
- @socket.write(Driver.encode(headers.join("\r\n"), :binary))
+ @socket.write(headers.join("\r\n"))
true
end
@@ -58,7 +58,7 @@ def parse(chunk)
end
def write(buffer)
- @socket.write(Driver.encode(buffer, :binary))
+ @socket.write(buffer)
end
private
@@ -6,13 +6,13 @@ class StreamReader
MINIMUM_AUTOMATIC_PRUNE_OFFSET = 128
def initialize
- @buffer = Driver.encode('', :binary)
+ @buffer = String.new('').force_encoding(BINARY)
@offset = 0
end
def put(chunk)
return unless chunk and chunk.bytesize > 0
- @buffer << Driver.encode(chunk, :binary)
+ @buffer << chunk.force_encoding(BINARY)
end
# Read bytes from the data:
@@ -42,7 +42,7 @@ def prune
buffer_size = @buffer.bytesize
if @offset > buffer_size
- @buffer = Driver.encode('', :binary)
+ @buffer = String.new('').force_encoding(BINARY)
else
@buffer = @buffer.byteslice(@offset, buffer_size - @offset)
end
@@ -1,6 +0,0 @@
-module WebSocket
- class Driver
- # http://www.w3.org/International/questions/qa-forms-utf-8.en.php
- UTF8_MATCH = /^([\x00-\x7F]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/
- end
-end
@@ -90,7 +90,7 @@
"WebSocket-Origin: http://www.example.com\r\n" +
"WebSocket-Location: ws://www.example.com/socket\r\n" +
"\r\n")
- expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00Hi\xFF", :binary)
+ expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00Hi\xFF", WebSocket::Driver::BINARY)
driver.frame("Hi")
driver.start
@@ -137,7 +137,7 @@
"Sec-WebSocket-Location: ws://www.example.com/socket\r\n" +
"\r\n")
expect(socket).to receive(:write).with(response)
- expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00Hi\xFF", :binary)
+ expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00Hi\xFF", WebSocket::Driver::BINARY)
driver.frame("Hi")
driver.start
@@ -191,7 +191,7 @@
it "sends any frames queued before the handshake was complete" do
expect(socket).to receive(:write).with(response)
- expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00hello\xFF", :binary)
+ expect(socket).to receive(:write).with(WebSocket::Driver.encode "\x00hello\xFF", WebSocket::Driver::BINARY)
driver.frame("hello")
driver.parse(body)
expect(@bytes).to eq [0, 104, 101, 108, 108, 111, 255]

0 comments on commit 9ce857b

Please sign in to comment.