Skip to content

Commit

Permalink
Fix UTF-16 LE and BE handling.
Browse files Browse the repository at this point in the history
Thanks to John Lenton for the test cases.
  • Loading branch information
niemeyer committed Dec 1, 2015
1 parent 53feefa commit bd61a85
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
12 changes: 12 additions & 0 deletions decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,18 @@ var unmarshalTests = []struct {
"a: []",
&struct{ A []int }{[]int{}},
},

// UTF-16-LE
{
"\xff\xfe\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n\x00",
M{"ñoño":"very yes"},
},

// UTF-16-BE
{
"\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n",
M{"ñoño":"very yes"},
},
}

type M map[interface{}]interface{}
Expand Down
7 changes: 5 additions & 2 deletions readerc.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
if parser.encoding == yaml_UTF16LE_ENCODING {
low, high = 0, 1
} else {
high, low = 1, 0
low, high = 1, 0
}

// The UTF-16 encoding is not as simple as one might
Expand Down Expand Up @@ -357,23 +357,26 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
if value <= 0x7F {
// 0000 0000-0000 007F . 0xxxxxxx
parser.buffer[buffer_len+0] = byte(value)
buffer_len += 1
} else if value <= 0x7FF {
// 0000 0080-0000 07FF . 110xxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6))
parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F))
buffer_len += 2
} else if value <= 0xFFFF {
// 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12))
parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F))
parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F))
buffer_len += 3
} else {
// 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18))
parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F))
parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F))
parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F))
buffer_len += 4
}
buffer_len += width

parser.unread++
}
Expand Down

0 comments on commit bd61a85

Please sign in to comment.