Skip to content

Commit 6f13b6d

Browse files
committed
When parsing UTF-16, generate invalid codepoint for lonely low surrogate
Test passes now.
1 parent 24b2ba9 commit 6f13b6d

File tree

1 file changed

+28
-14
lines changed

1 file changed

+28
-14
lines changed

src/hb-utf-private.hh

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,20 +121,27 @@ hb_utf_next (const uint16_t *text,
121121
{
122122
hb_codepoint_t c = *text++;
123123

124-
if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff)))
124+
if (likely (!hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdfff)))
125125
{
126-
/* high surrogate */
126+
*unicode = c;
127+
return text;
128+
}
129+
130+
if (likely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff)))
131+
{
132+
/* High-surrogate in c */
127133
hb_codepoint_t l;
128134
if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff))))
129135
{
130-
/* low surrogate */
136+
/* Low-surrogate in l */
131137
*unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00);
132138
text++;
133-
} else
134-
*unicode = -1;
135-
} else
136-
*unicode = c;
139+
return text;
140+
}
141+
}
137142

143+
/* Lonely / out-of-order surrogate. */
144+
*unicode = -1;
138145
return text;
139146
}
140147

@@ -145,20 +152,27 @@ hb_utf_prev (const uint16_t *text,
145152
{
146153
hb_codepoint_t c = *--text;
147154

148-
if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff)))
155+
if (likely (!hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdfff)))
149156
{
150-
/* low surrogate */
157+
*unicode = c;
158+
return text;
159+
}
160+
161+
if (likely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff)))
162+
{
163+
/* Low-surrogate in c */
151164
hb_codepoint_t h;
152165
if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff))))
153166
{
154-
/* high surrogate */
167+
/* High-surrogate in h */
155168
*unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00);
156169
text--;
157-
} else
158-
*unicode = -1;
159-
} else
160-
*unicode = c;
170+
return text;
171+
}
172+
}
161173

174+
/* Lonely / out-of-order surrogate. */
175+
*unicode = -1;
162176
return text;
163177
}
164178

0 commit comments

Comments
 (0)