Skip to content

Commit

Permalink
encoding.c: Support the ISO-10646-UCS-2 encoding.
Browse files Browse the repository at this point in the history
  • Loading branch information
rhdunn committed Mar 30, 2017
1 parent fa5d31a commit b74f756
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/include/espeak-ng/espeak_ng.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ typedef enum
ESPEAKNG_ENCODING_KOI8_R,
ESPEAKNG_ENCODING_ISCII,
ESPEAKNG_ENCODING_UTF_8,
ESPEAKNG_ENCODING_ISO_10646_UCS_2,
} espeak_ng_ENCODING;

ESPEAK_NG_API espeak_ng_ENCODING
Expand Down
18 changes: 17 additions & 1 deletion src/libespeak-ng/encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ MNEM_TAB mnem_encoding[] = {
{ "ISO_8859-16", ESPEAKNG_ENCODING_ISO_8859_16 },
{ "ISO_8859-16:2001", ESPEAKNG_ENCODING_ISO_8859_16 },
{ "ISO646-US", ESPEAKNG_ENCODING_US_ASCII },
{ "ISO-10646-UCS-2", ESPEAKNG_ENCODING_ISO_10646_UCS_2 },
{ "ISO-8859-1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "ISO-8859-2", ESPEAKNG_ENCODING_ISO_8859_2 },
{ "ISO-8859-3", ESPEAKNG_ENCODING_ISO_8859_3 },
Expand Down Expand Up @@ -106,6 +107,7 @@ MNEM_TAB mnem_encoding[] = {
{ "csKOI8R", ESPEAKNG_ENCODING_KOI8_R },
{ "csTIS620", ESPEAKNG_ENCODING_ISO_8859_11 },
{ "csUTF8", ESPEAKNG_ENCODING_UTF_8 },
{ "csUnicode", ESPEAKNG_ENCODING_ISO_10646_UCS_2 },
{ "arabic", ESPEAKNG_ENCODING_ISO_8859_6 },
{ "cyrillic", ESPEAKNG_ENCODING_ISO_8859_5 },
{ "greek", ESPEAKNG_ENCODING_ISO_8859_7 },
Expand Down Expand Up @@ -566,6 +568,19 @@ string_decoder_getc_utf_8(espeak_ng_TEXT_DECODER *decoder)
return 0xFFFD;
}

static uint32_t
string_decoder_getc_iso_10646_ucs_2(espeak_ng_TEXT_DECODER *decoder)
{
if (decoder->current + 1 >= decoder->end) {
decoder->current = decoder->end;
return 0xFFFD;
}

uint8_t c1 = *decoder->current++ & 0xFF;
uint8_t c2 = *decoder->current++ & 0xFF;
return c1 + (c2 << 8);
}

typedef struct
{
uint32_t (*get)(espeak_ng_TEXT_DECODER *decoder);
Expand Down Expand Up @@ -594,6 +609,7 @@ static const encoding_t string_decoders[] = {
{ string_decoder_getc_codepage, KOI8_R },
{ string_decoder_getc_codepage, ISCII },
{ string_decoder_getc_utf_8, NULL },
{ string_decoder_getc_iso_10646_ucs_2, NULL },
};

espeak_ng_TEXT_DECODER *
Expand Down Expand Up @@ -621,7 +637,7 @@ text_decoder_decode_string(espeak_ng_TEXT_DECODER *decoder,
int length,
espeak_ng_ENCODING encoding)
{
if (encoding > ESPEAKNG_ENCODING_UTF_8)
if (encoding > ESPEAKNG_ENCODING_ISO_10646_UCS_2)
return ENS_UNKNOWN_TEXT_ENCODING;

const encoding_t *enc = string_decoders + encoding;
Expand Down
27 changes: 27 additions & 0 deletions tests/encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,32 @@ test_utf_8_encoding()
destroy_text_decoder(decoder);
}

void
test_iso_10646_ucs_2_encoding()
{
printf("testing ISO-10646-UCS-2 encoding\n");

assert(espeak_ng_EncodingFromName("ISO-10646-UCS-2") == ESPEAKNG_ENCODING_ISO_10646_UCS_2);
assert(espeak_ng_EncodingFromName("csUnicode") == ESPEAKNG_ENCODING_ISO_10646_UCS_2);

espeak_ng_TEXT_DECODER *decoder = create_text_decoder();

assert(text_decoder_decode_string(decoder, "a\00G\00\xA0\00\x22\x21\x23\x21", 9, ESPEAKNG_ENCODING_ISO_10646_UCS_2) == ENS_OK);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'a');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'G');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xA0);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x2122);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xFFFD);
assert(text_decoder_eof(decoder) == 1);

destroy_text_decoder(decoder);
}

int
main(int argc, char **argv)
{
Expand Down Expand Up @@ -730,6 +756,7 @@ main(int argc, char **argv)
test_iso_8859_16_encoding();

test_utf_8_encoding();
test_iso_10646_ucs_2_encoding();

printf("done\n");

Expand Down

0 comments on commit b74f756

Please sign in to comment.