Skip to content

Commit

Permalink
encoding.c: Support the ISO 8859-5 encoding.
Browse files Browse the repository at this point in the history
  • Loading branch information
rhdunn committed Mar 28, 2017
1 parent b5589fc commit 51295d9
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/include/espeak-ng/espeak_ng.h
Expand Up @@ -194,6 +194,7 @@ typedef enum
ESPEAKNG_ENCODING_ISO_8859_2,
ESPEAKNG_ENCODING_ISO_8859_3,
ESPEAKNG_ENCODING_ISO_8859_4,
ESPEAKNG_ENCODING_ISO_8859_5,
} espeak_ng_ENCODING;

ESPEAK_NG_API espeak_ng_ENCODING
Expand Down
31 changes: 31 additions & 0 deletions src/libespeak-ng/encoding.c
Expand Up @@ -36,6 +36,7 @@ MNEM_TAB mnem_encoding[] = {
{ "ISO_8859-2", ESPEAKNG_ENCODING_ISO_8859_2 },
{ "ISO_8859-3", ESPEAKNG_ENCODING_ISO_8859_3 },
{ "ISO_8859-4", ESPEAKNG_ENCODING_ISO_8859_4 },
{ "ISO_8859-5", ESPEAKNG_ENCODING_ISO_8859_5 },
{ "ISO646-US", ESPEAKNG_ENCODING_US_ASCII },
{ "ISO-8859-1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "ISO-8859-1:1987", ESPEAKNG_ENCODING_ISO_8859_1 },
Expand All @@ -45,6 +46,8 @@ MNEM_TAB mnem_encoding[] = {
{ "ISO-8859-3:1988", ESPEAKNG_ENCODING_ISO_8859_3 },
{ "ISO-8859-4", ESPEAKNG_ENCODING_ISO_8859_4 },
{ "ISO-8859-4:1988", ESPEAKNG_ENCODING_ISO_8859_4 },
{ "ISO-8859-5", ESPEAKNG_ENCODING_ISO_8859_5 },
{ "ISO-8859-5:1988", ESPEAKNG_ENCODING_ISO_8859_5 },
{ "US-ASCII", ESPEAKNG_ENCODING_US_ASCII },
{ "cp367", ESPEAKNG_ENCODING_US_ASCII },
{ "cp819", ESPEAKNG_ENCODING_ISO_8859_1 },
Expand All @@ -53,11 +56,14 @@ MNEM_TAB mnem_encoding[] = {
{ "csISOLatin2", ESPEAKNG_ENCODING_ISO_8859_2 },
{ "csISOLatin3", ESPEAKNG_ENCODING_ISO_8859_3 },
{ "csISOLatin4", ESPEAKNG_ENCODING_ISO_8859_4 },
{ "csISOLatinCyrillic",ESPEAKNG_ENCODING_ISO_8859_5 },
{ "cyrillic", ESPEAKNG_ENCODING_ISO_8859_5 },
{ "iso-ir-6", ESPEAKNG_ENCODING_US_ASCII },
{ "iso-ir-100", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "iso-ir-101", ESPEAKNG_ENCODING_ISO_8859_2 },
{ "iso-ir-109", ESPEAKNG_ENCODING_ISO_8859_3 },
{ "iso-ir-110", ESPEAKNG_ENCODING_ISO_8859_4 },
{ "iso-ir-144", ESPEAKNG_ENCODING_ISO_8859_5 },
{ "latin1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "latin2", ESPEAKNG_ENCODING_ISO_8859_2 },
{ "latin3", ESPEAKNG_ENCODING_ISO_8859_3 },
Expand Down Expand Up @@ -152,6 +158,27 @@ static const uint32_t ISO_8859_4[0x80] = {
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, // f8
};

// Reference: http://www.iana.org/go/rfc1345
// Reference: http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT
static const uint32_t ISO_8859_5[0x80] = {
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, // 80
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, // 88
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, // 90
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, // 98
0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, // a0
0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, // a8
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, // b0
0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, // b8
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, // c0
0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, // c8
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, // d0
0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, // d8
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, // e0
0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, // e8
0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, // f0
0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, // f8
};

static uint32_t
text_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
{
Expand Down Expand Up @@ -219,6 +246,10 @@ initialize_encoding(espeak_ng_TEXT_DECODER *decoder,
decoder->get = text_decoder_getc_codepage;
decoder->codepage = ISO_8859_4;
break;
case ESPEAKNG_ENCODING_ISO_8859_5:
decoder->get = text_decoder_getc_codepage;
decoder->codepage = ISO_8859_5;
break;
default:
return 0;
}
Expand Down
31 changes: 31 additions & 0 deletions tests/encoding.c
Expand Up @@ -217,6 +217,36 @@ test_iso_8859_4_encoding()
destroy_text_decoder(decoder);
}

void
test_iso_8859_5_encoding()
{
printf("testing ISO-8859-5 encoding\n");

assert(espeak_ng_EncodingFromName("ISO-8859-5:1988") == ESPEAKNG_ENCODING_ISO_8859_5);
assert(espeak_ng_EncodingFromName("ISO-8859-5") == ESPEAKNG_ENCODING_ISO_8859_5);
assert(espeak_ng_EncodingFromName("ISO_8859-5") == ESPEAKNG_ENCODING_ISO_8859_5);
assert(espeak_ng_EncodingFromName("iso-ir-144") == ESPEAKNG_ENCODING_ISO_8859_5);
assert(espeak_ng_EncodingFromName("cyrillic") == ESPEAKNG_ENCODING_ISO_8859_5);
assert(espeak_ng_EncodingFromName("csISOLatinCyrillic") == ESPEAKNG_ENCODING_ISO_8859_5);

espeak_ng_TEXT_DECODER *decoder = create_text_decoder();

assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_5) == ENS_OK);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'a');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'G');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x92);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xA0);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x043E);
assert(text_decoder_eof(decoder) == 1);

destroy_text_decoder(decoder);
}

int
main(int argc, char **argv)
{
Expand All @@ -227,6 +257,7 @@ main(int argc, char **argv)
test_iso_8859_2_encoding();
test_iso_8859_3_encoding();
test_iso_8859_4_encoding();
test_iso_8859_5_encoding();
printf("done\n");

return EXIT_SUCCESS;
Expand Down

0 comments on commit 51295d9

Please sign in to comment.