Permalink
Browse files

Fix UTF-16 LE and BE handling.

Thanks to John Lenton for the test cases.
  • Loading branch information...
1 parent 53feefa commit bd61a856f807e525beaee41959452c88c83d46cf @niemeyer niemeyer committed Dec 1, 2015
Showing with 17 additions and 2 deletions.
  1. +12 −0 decode_test.go
  2. +5 −2 readerc.go
View
@@ -559,6 +559,18 @@ var unmarshalTests = []struct {
"a: []",
&struct{ A []int }{[]int{}},
},
+
+ // UTF-16-LE
+ {
+ "\xff\xfe\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n\x00",
+ M{"ñoño":"very yes"},
+ },
+
+ // UTF-16-BE
+ {
+ "\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n",
+ M{"ñoño":"very yes"},
+ },
}
type M map[interface{}]interface{}
View
@@ -247,7 +247,7 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
if parser.encoding == yaml_UTF16LE_ENCODING {
low, high = 0, 1
} else {
- high, low = 1, 0
+ low, high = 1, 0
}
// The UTF-16 encoding is not as simple as one might
@@ -357,23 +357,26 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
if value <= 0x7F {
// 0000 0000-0000 007F . 0xxxxxxx
parser.buffer[buffer_len+0] = byte(value)
+ buffer_len += 1
} else if value <= 0x7FF {
// 0000 0080-0000 07FF . 110xxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6))
parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F))
+ buffer_len += 2
} else if value <= 0xFFFF {
// 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12))
parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F))
parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F))
+ buffer_len += 3
} else {
// 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18))
parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F))
parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F))
parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F))
+ buffer_len += 4
}
- buffer_len += width
parser.unread++
}

0 comments on commit bd61a85

Please sign in to comment.