Skip to content

Commit

Permalink
lib: json-parser - Insert properly escaped unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
cmouse authored and Timo Sirainen committed Nov 7, 2017
1 parent 2ede3b4 commit 32bd32d
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 10 deletions.
35 changes: 29 additions & 6 deletions src/lib/json-parser.c
Expand Up @@ -755,29 +755,52 @@ static void json_append_escaped_char(string_t *dest, unsigned char src)
str_append(dest, "\\\\");
break;
default:
if (src < 32)
if (src < 0x20 || src >= 0x80)
str_printfa(dest, "\\u%04x", src);
else
str_append_c(dest, src);
break;
}
}

void json_append_escaped_ucs4(string_t *dest, unichar_t chr)
{
unichar_t high,low;
if (chr < 0x80)
json_append_escaped_char(dest, (unsigned char)chr);
else if (chr >= UTF16_SURROGATE_BASE) {
uni_split_surrogate(chr, &high, &low);
str_printfa(dest, "\\u%04x\\u%04x", high, low);
} else {
str_printfa(dest, "\\u%04x", chr);
}
}

void ostream_escaped_json_format(string_t *dest, unsigned char src)
{
json_append_escaped_char(dest, src);
}

void json_append_escaped(string_t *dest, const char *src)
{
for (; *src != '\0'; src++)
json_append_escaped_char(dest, *src);
json_append_escaped_data(dest, (const unsigned char*)src, strlen(src));
}

void json_append_escaped_data(string_t *dest, const unsigned char *src, size_t size)
{
size_t i;

for (i = 0; i < size; i++)
json_append_escaped_char(dest, src[i]);
int bytes = 0;
unichar_t chr;

for (i = 0; i < size;) {
bytes = uni_utf8_get_char_n(src+i, size-i, &chr);
/* if it was valid unichar, encode + move forward by bytes */
if (bytes > 0) {
json_append_escaped_ucs4(dest, chr);
i += bytes;
/* encode as byte data */
} else {
json_append_escaped_char(dest, src[i++]);
}
}
}
4 changes: 4 additions & 0 deletions src/lib/json-parser.h
@@ -1,6 +1,8 @@
#ifndef JSON_PARSER_H
#define JSON_PARSER_H

#include "unichar.h"

enum json_type {
/* { key: */
JSON_TYPE_OBJECT_KEY,
Expand Down Expand Up @@ -46,6 +48,8 @@ void json_parse_skip_next(struct json_parser *parser);
int json_parse_next_stream(struct json_parser *parser,
struct istream **input_r);

/* Append UCS4 to already opened JSON string. */
void json_append_escaped_ucs4(string_t *dest, unichar_t chr);
/* Append data to already opened JSON string. src should be valid UTF-8 data. */
void json_append_escaped(string_t *dest, const char *src);
/* Same as json_append_escaped(), but append non-\0 terminated input. */
Expand Down
8 changes: 4 additions & 4 deletions src/lib/test-json-parser.c
Expand Up @@ -266,20 +266,20 @@ static void test_json_append_escaped(void)
string_t *str = t_str_new(32);

test_begin("json_append_escaped()");
json_append_escaped(str, "\b\f\r\n\t\"\\\001\002-\xC3\xA4");
test_assert(strcmp(str_c(str), "\\b\\f\\r\\n\\t\\\"\\\\\\u0001\\u0002-\xC3\xA4") == 0);
json_append_escaped(str, "\b\f\r\n\t\"\\\001\002-\xC3\xA4\xf0\x90\x90\xb7");
test_assert(strcmp(str_c(str), "\\b\\f\\r\\n\\t\\\"\\\\\\u0001\\u0002-\\u00e4\\ud801\\udc37") == 0);
test_end();
}

static void test_json_append_escaped_data(void)
{
static const unsigned char test_input[] =
"\b\f\r\n\t\"\\\000\001\002-\xC3\xA4";
"\b\f\r\n\t\"\\\000\001\002-\xC3\xA4\xf0\x90\x90\xb7";
string_t *str = t_str_new(32);

test_begin("json_append_escaped()");
json_append_escaped_data(str, test_input, sizeof(test_input)-1);
test_assert(strcmp(str_c(str), "\\b\\f\\r\\n\\t\\\"\\\\\\u0000\\u0001\\u0002-\xC3\xA4") == 0);
test_assert(strcmp(str_c(str), "\\b\\f\\r\\n\\t\\\"\\\\\\u0000\\u0001\\u0002-\\u00e4\\ud801\\udc37") == 0);
test_end();
}

Expand Down

0 comments on commit 32bd32d

Please sign in to comment.