Skip to content

Commit

Permalink
Replace the *2 wchar/wctype APIs with their standard versions. These …
Browse files Browse the repository at this point in the history
…will be fixed using compatibility headers.
  • Loading branch information
rhdunn committed Feb 13, 2017
1 parent d2a919b commit 5975f07
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 114 deletions.
3 changes: 2 additions & 1 deletion src/libespeak-ng/compiledict.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>

#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>
Expand Down Expand Up @@ -585,7 +586,7 @@ static int compile_line(char *linebuf, char *dict_line, int *hash)
ix = utf8_in(&c2, p);
if (c2 == 0)
break;
if (iswupper2(c2))
if (iswupper(c2))
utf8_out(towlower2(c2), p);
else
all_upper_case = 0;
Expand Down
11 changes: 6 additions & 5 deletions src/libespeak-ng/dictionary.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
#include <wchar.h>

#include <espeak-ng/espeak_ng.h>
Expand Down Expand Up @@ -614,7 +615,7 @@ const char *GetTranslatedPhonemeString(int phoneme_mode)
p += utf8_in(&c, p);
if (use_tie != 0) {
// look for non-inital alphabetic character, but not diacritic, superscript etc.
if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha2(c))
if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha(c))
buf += utf8_out(use_tie, buf);
}
buf += utf8_out(c, buf);
Expand Down Expand Up @@ -832,7 +833,7 @@ int Unpronouncable(Translator *tr, char *word, int posn)
break;
}

if ((c != '\'') && !iswalpha2(c))
if ((c != '\'') && !iswalpha(c))
return 0;
}

Expand Down Expand Up @@ -1787,7 +1788,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_
failed = 1;
break;
case RULE_NONALPHA:
if (!iswalpha2(letter_w)) {
if (!iswalpha(letter_w)) {
add_points = (21-distance_right);
post_ptr += letter_xbytes;
} else
Expand Down Expand Up @@ -1996,7 +1997,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_
failed = 1;
break;
case RULE_NONALPHA:
if (!iswalpha2(letter_w)) {
if (!iswalpha(letter_w)) {
add_points = (21-distance_right);
pre_ptr -= letter_xbytes;
} else
Expand Down Expand Up @@ -2300,7 +2301,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c

if (tr->letter_bits_offset > 0) {
// not a Latin alphabet, switch to the default Latin alphabet language
if ((letter <= 0x241) && iswalpha2(letter)) {
if ((letter <= 0x241) && iswalpha(letter)) {
sprintf(phonemes, "%c%s", phonSWITCH, tr->langopts.ascii_language);
return 0;
}
Expand Down
6 changes: 3 additions & 3 deletions src/libespeak-ng/numbers.c
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)

if (control & 2) {
// include CAPITAL information
if (iswupper2(letter))
if (iswupper(letter))
Lookup(tr, "_cap", capital);
}
letter = towlower2(letter);
Expand Down Expand Up @@ -845,7 +845,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
if (ph_buf[0] == 0) {
speak_letter_number = 1;
if (!(al_flags & AL_NO_SYMBOL)) {
if (iswalpha2(letter))
if (iswalpha(letter))
Lookup(translator, "_?A", ph_buf);

if ((ph_buf[0] == 0) && !iswspace(letter))
Expand Down Expand Up @@ -2055,7 +2055,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
utf8_in(&next_char, p);

if (!iswalpha2(next_char) && (thousands_exact == 0))
if (!iswalpha(next_char) && (thousands_exact == 0))
strcat(ph_out, str_pause); // don't add pause for 100s, 6th, etc.
}

Expand Down
97 changes: 11 additions & 86 deletions src/libespeak-ng/readclause.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,88 +289,13 @@ static const short wchar_toupper[] = {
0, 0
};

// use internal data for iswalpha up to U+024F
// iswalpha() on Windows is unreliable (U+AA, U+BA).
int iswalpha2(int c)
{
if (c < 0x80)
return isalpha(c);
if ((c > 0x3040) && (c <= 0xa700))
return 1; // japanese, chinese characters
if (c > MAX_WALPHA)
return iswalpha(c);
return walpha_tab[c-0x80];
}

int iswlower2(int c)
{
if (c < 0x80)
return islower(c);
if (c > MAX_WALPHA)
return iswlower(c);
if (walpha_tab[c-0x80] == 0xff)
return 1;
return 0;
}

int iswupper2(int c)
{
int x;
if (c < 0x80)
return isupper(c);
if (c > MAX_WALPHA)
return iswupper(c);
if (((x = walpha_tab[c-0x80]) > 0) && (x < 0xfe))
return 1;
return 0;
}

int towlower2(unsigned int c)
{
int x;
int ix;

// check for non-standard upper to lower case conversions
if (c == 'I') {
if (translator->langopts.dotless_i)
c = 0x131; // I -> ı
}

if (c < 0x80)
return tolower(c);
if (c == 'I' && translator->langopts.dotless_i)
return 0x131; // I -> ı

if (c > MAX_WALPHA)
return towlower(c);

if ((x = walpha_tab[c-0x80]) >= 0xfe)
return c; // this is not an upper case letter

if (x == 0xfd) {
// special cases, lookup translation table
for (ix = 0; wchar_tolower[ix] != 0; ix += 2) {
if (wchar_tolower[ix] == (int)c)
return wchar_tolower[ix+1];
}
}
return c + x; // convert to lower case
}

int towupper2(unsigned int c)
{
int ix;
if (c > MAX_WALPHA)
return towupper(c);

// check whether a previous character code is the upper-case equivalent of this character
if (towlower2(c-32) == (int)c)
return c-32; // yes, use it
if (towlower2(c-1) == (int)c)
return c-1;
for (ix = 0; wchar_toupper[ix] != 0; ix += 2) {
if (wchar_toupper[ix] == (int)c)
return wchar_toupper[ix+1];
}
return c; // no
return towlower(c);
}

static int IsRomanU(unsigned int c)
Expand Down Expand Up @@ -2015,7 +1940,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
while (!Eof() && (c1 != '>'))
c1 = GetC();
c2 = ' ';
} else if ((c2 == '/') || iswalpha2(c2)) {
} else if ((c2 == '/') || iswalpha(c2)) {
// check for space in the output buffer for embedded commands produced by the SSML tag
if (ix > (n_buf - 20)) {
// Perhaps not enough room, end the clause before the SSML tag
Expand Down Expand Up @@ -2167,9 +2092,9 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
}
}

if (iswupper2(c1)) {
if (iswupper(c1)) {
tr->clause_upper_count++;
if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper2(cprev)) {
if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper(cprev)) {
char text_buf[40];
char text_buf2[30];
if (LookupSpecial(tr, "_cap", text_buf2) != NULL) {
Expand All @@ -2181,7 +2106,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
}
}
}
} else if (iswalpha2(c1))
} else if (iswalpha(c1))
tr->clause_lower_count++;

if (option_phoneme_input) {
Expand Down Expand Up @@ -2238,7 +2163,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
// i.e. is dot followed by an upper-case letter?

if (!iswspace(c1)) {
if (!IsAlpha(c1) || !iswlower2(c1)) {
if (!IsAlpha(c1) || !iswlower(c1)) {
UngetC(c2);
ungot_char2 = c1;
buf[end_clause_index] = ' '; // delete the end-clause punctuation
Expand Down Expand Up @@ -2320,7 +2245,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
punct_data |= CLAUSE_DOT;

if (nl_count == 0) {
if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower2(c_next)))) {
if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower(c_next)))) {
// lang=hu, fix for ordinal numbers, eg: "december 2., szerda", ignore ',' after ordinal number
c1 = CHAR_COMMA_BREAK;
is_end_clause = 0;
Expand All @@ -2332,11 +2257,11 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
// dot after a number indicates an ordinal number
if (!iswdigit(cprev))
is_end_clause = 0; // Roman number followed by dot
else if (iswlower2(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
is_end_clause = 0; // only if followed by lower-case, (or if there is a XML tag)
} else if (c_next == '\'')
is_end_clause = 0; // eg. u.s.a.'s
if (iswlower2(c_next)) {
if (iswlower(c_next)) {
// next word has no capital letter, this dot is probably from an abbreviation
is_end_clause = 0;
}
Expand Down
28 changes: 14 additions & 14 deletions src/libespeak-ng/translate.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ int IsAlpha(unsigned int c)
0
};

if (iswalpha2(c))
if (iswalpha(c))
return 1;

if (c < 0x300)
Expand Down Expand Up @@ -610,7 +610,7 @@ int IsAllUpper(const char *word)
int c;
while ((*word != 0) && !isspace2(*word)) {
word += utf8_in(&c, word);
if (!iswupper2(c))
if (!iswupper(c))
return 0;
}
return 1;
Expand Down Expand Up @@ -904,7 +904,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
}
}

if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha2(first_char)) {
if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) {
if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
// emphasize words which are in capitals
emphasize_allcaps = FLAG_EMPHASIZED;
Expand Down Expand Up @@ -1331,7 +1331,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
tr->expect_past--;
}

if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha2(first_char) && (first_char != 'i')) {
if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) {
// English Specific !!!!
// any single letter before a dot is an abbreviation, except 'I'
dictionary_flags[0] |= FLAG_ALLOW_DOT;
Expand Down Expand Up @@ -1587,7 +1587,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
while (*p2 != ' ') p2++;

utf8_in(&c_word2, p2+1); // first character of the next word;
if (!iswalpha2(c_word2))
if (!iswalpha(c_word2))
ok = 0;

if (ok != 0) {
Expand Down Expand Up @@ -1957,7 +1957,7 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,

// there is a list of character codes to be substituted with alternative codes

if (iswupper2(c_lower = c)) {
if (iswupper(c_lower = c)) {
c_lower = towlower2(c);
upper_case = 1;
}
Expand All @@ -1984,14 +1984,14 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,
// there is a second character to be inserted
// don't convert the case of the second character unless the next letter is also upper case
c2 = new_c >> 16;
if (upper_case && iswupper2(next_in))
c2 = towupper2(c2);
if (upper_case && iswupper(next_in))
c2 = towupper(c2);
*insert = c2;
new_c &= 0xffff;
}

if (upper_case)
new_c = towupper2(new_c);
new_c = towupper(new_c);

*wordflags |= FLAG_CHAR_REPLACED;
return new_c;
Expand Down Expand Up @@ -2046,7 +2046,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c,
case L('n', 'l'):
// look for 'n and replace by a special character (unicode: schwa)

if (!iswalpha2(prev_in)) {
if (!iswalpha(prev_in)) {
utf8_in(&next2, &ptr[1]);

if ((c == '\'') && IsSpace(next2)) {
Expand Down Expand Up @@ -2400,7 +2400,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
if (!IsBracket(prev_out)) // ?? perhaps only set FLAG_NOSPACE for . - / (hyphenated words, URLs, etc)
next_word_flags |= FLAG_NOSPACE;
} else {
if (iswupper2(c))
if (iswupper(c))
word_flags |= FLAG_FIRST_UPPER;

if ((prev_out == ' ') && iswdigit(sbuf[ix-2]) && !iswdigit(prev_in)) {
Expand Down Expand Up @@ -2430,7 +2430,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
}
}

if (iswupper2(c)) {
if (iswupper(c)) {
c = towlower2(c);

if ((j = tr->langopts.param[LOPT_CAPS_IN_WORD]) > 0) {
Expand All @@ -2440,7 +2440,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
syllable_marked = 1;
}
} else {
if (iswlower2(prev_in)) {
if (iswlower(prev_in)) {
// lower case followed by upper case in a word
if (UpperCaseInWord(tr, &sbuf[ix], c) == 1) {
// convert to lower case and continue
Expand All @@ -2450,7 +2450,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
space_inserted = 1;
prev_in_save = c;
}
} else if ((c != ' ') && iswupper2(prev_in) && iswlower2(next_in)) {
} else if ((c != ' ') && iswupper(prev_in) && iswlower(next_in)) {
int next2_in;
utf8_in(&next2_in, &source[source_index + next_in_nbytes]);

Expand Down
6 changes: 1 addition & 5 deletions src/libespeak-ng/translate.h
Original file line number Diff line number Diff line change
Expand Up @@ -724,12 +724,8 @@ int IsDigit09(unsigned int c);
int IsAlpha(unsigned int c);
int IsVowel(Translator *tr, int c);
int IsSuperscript(int letter);
int iswalpha2(int c);
int isspace2(unsigned int c);
int iswlower2(int c);
int iswupper2(int c);
int towlower2(unsigned int c);
int towupper2(unsigned int c);
int towlower2(unsigned int c); // Supports Turkish I
const char *GetTranslatedPhonemeString(int phoneme_mode);
const char *WordToString2(unsigned int word);
ALPHABET *AlphabetFromChar(int c);
Expand Down

0 comments on commit 5975f07

Please sign in to comment.