Replace the *2 wchar/wctype APIs with their standard versions. These …

…will be fixed using compatibility headers.
espeak-ng · Feb 13, 2017 · 5975f07 · 5975f07
1 parent d2a919b
commit 5975f07
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 114 deletions.
diff --git a/src/libespeak-ng/compiledict.c b/src/libespeak-ng/compiledict.c
@@ -26,6 +26,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wctype.h>
 
 #include <espeak-ng/espeak_ng.h>
 #include <espeak-ng/speak_lib.h>
@@ -585,7 +586,7 @@ static int compile_line(char *linebuf, char *dict_line, int *hash)
 			ix = utf8_in(&c2, p);
 			if (c2 == 0)
 				break;
-			if (iswupper2(c2))
+			if (iswupper(c2))
 				utf8_out(towlower2(c2), p);
 			else
 				all_upper_case = 0;

diff --git a/src/libespeak-ng/dictionary.c b/src/libespeak-ng/dictionary.c
@@ -24,6 +24,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wctype.h>
 #include <wchar.h>
 
 #include <espeak-ng/espeak_ng.h>
@@ -614,7 +615,7 @@ const char *GetTranslatedPhonemeString(int phoneme_mode)
 			p += utf8_in(&c, p);
 			if (use_tie != 0) {
 				// look for non-inital alphabetic character, but not diacritic, superscript etc.
-				if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha2(c))
+				if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha(c))
 					buf += utf8_out(use_tie, buf);
 			}
 			buf += utf8_out(c, buf);
@@ -832,7 +833,7 @@ int Unpronouncable(Translator *tr, char *word, int posn)
 			break;
 		}
 
-		if ((c != '\'') && !iswalpha2(c))
+		if ((c != '\'') && !iswalpha(c))
 			return 0;
 	}
 
@@ -1787,7 +1788,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_
 						failed = 1;
 					break;
 				case RULE_NONALPHA:
-					if (!iswalpha2(letter_w)) {
+					if (!iswalpha(letter_w)) {
 						add_points = (21-distance_right);
 						post_ptr += letter_xbytes;
 					} else
@@ -1996,7 +1997,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_
 						failed = 1;
 					break;
 				case RULE_NONALPHA:
-					if (!iswalpha2(letter_w)) {
+					if (!iswalpha(letter_w)) {
 						add_points = (21-distance_right);
 						pre_ptr -= letter_xbytes;
 					} else
@@ -2300,7 +2301,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c
 
 						if (tr->letter_bits_offset > 0) {
 							// not a Latin alphabet, switch to the default Latin alphabet language
-							if ((letter <= 0x241) && iswalpha2(letter)) {
+							if ((letter <= 0x241) && iswalpha(letter)) {
 								sprintf(phonemes, "%c%s", phonSWITCH, tr->langopts.ascii_language);
 								return 0;
 							}

diff --git a/src/libespeak-ng/numbers.c b/src/libespeak-ng/numbers.c
@@ -701,7 +701,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
 
 	if (control & 2) {
 		// include CAPITAL information
-		if (iswupper2(letter))
+		if (iswupper(letter))
 			Lookup(tr, "_cap", capital);
 	}
 	letter = towlower2(letter);
@@ -845,7 +845,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
 		if (ph_buf[0] == 0) {
 			speak_letter_number = 1;
 			if (!(al_flags & AL_NO_SYMBOL)) {
-				if (iswalpha2(letter))
+				if (iswalpha(letter))
 					Lookup(translator, "_?A", ph_buf);
 
 				if ((ph_buf[0] == 0) && !iswspace(letter))
@@ -2055,7 +2055,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
 		if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
 			utf8_in(&next_char, p);
 
-		if (!iswalpha2(next_char) && (thousands_exact == 0))
+		if (!iswalpha(next_char) && (thousands_exact == 0))
 			strcat(ph_out, str_pause); // don't add pause for 100s,  6th, etc.
 	}
 

diff --git a/src/libespeak-ng/readclause.c b/src/libespeak-ng/readclause.c
@@ -289,88 +289,13 @@ static const short wchar_toupper[] = {
 	0, 0
 };
 
-// use internal data for iswalpha up to U+024F
-// iswalpha() on Windows is unreliable  (U+AA, U+BA).
-int iswalpha2(int c)
-{
-	if (c < 0x80)
-		return isalpha(c);
-	if ((c > 0x3040) && (c <= 0xa700))
-		return 1; // japanese, chinese characters
-	if (c > MAX_WALPHA)
-		return iswalpha(c);
-	return walpha_tab[c-0x80];
-}
-
-int iswlower2(int c)
-{
-	if (c < 0x80)
-		return islower(c);
-	if (c > MAX_WALPHA)
-		return iswlower(c);
-	if (walpha_tab[c-0x80] == 0xff)
-		return 1;
-	return 0;
-}
-
-int iswupper2(int c)
-{
-	int x;
-	if (c < 0x80)
-		return isupper(c);
-	if (c > MAX_WALPHA)
-		return iswupper(c);
-	if (((x = walpha_tab[c-0x80]) > 0) && (x < 0xfe))
-		return 1;
-	return 0;
-}
-
 int towlower2(unsigned int c)
 {
-	int x;
-	int ix;
-
 	// check for non-standard upper to lower case conversions
-	if (c == 'I') {
-		if (translator->langopts.dotless_i)
-			c = 0x131; // I -> ı
-	}
-
-	if (c < 0x80)
-		return tolower(c);
+	if (c == 'I' && translator->langopts.dotless_i)
+		return 0x131; // I -> ı
 
-	if (c > MAX_WALPHA)
-		return towlower(c);
-
-	if ((x = walpha_tab[c-0x80]) >= 0xfe)
-		return c; // this is not an upper case letter
-
-	if (x == 0xfd) {
-		// special cases, lookup translation table
-		for (ix = 0; wchar_tolower[ix] != 0; ix += 2) {
-			if (wchar_tolower[ix] == (int)c)
-				return wchar_tolower[ix+1];
-		}
-	}
-	return c + x; // convert to lower case
-}
-
-int towupper2(unsigned int c)
-{
-	int ix;
-	if (c > MAX_WALPHA)
-		return towupper(c);
-
-	// check whether a previous character code is the upper-case equivalent of this character
-	if (towlower2(c-32) == (int)c)
-		return c-32; // yes, use it
-	if (towlower2(c-1) == (int)c)
-		return c-1;
-	for (ix = 0; wchar_toupper[ix] != 0; ix += 2) {
-		if (wchar_toupper[ix] == (int)c)
-			return wchar_toupper[ix+1];
-	}
-	return c; // no
+	return towlower(c);
 }
 
 static int IsRomanU(unsigned int c)
@@ -2015,7 +1940,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 					while (!Eof() && (c1 != '>'))
 						c1 = GetC();
 					c2 = ' ';
-				} else if ((c2 == '/') || iswalpha2(c2)) {
+				} else if ((c2 == '/') || iswalpha(c2)) {
 					// check for space in the output buffer for embedded commands produced by the SSML tag
 					if (ix > (n_buf - 20)) {
 						// Perhaps not enough room, end the clause before the SSML tag
@@ -2167,9 +2092,9 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 			}
 		}
 
-		if (iswupper2(c1)) {
+		if (iswupper(c1)) {
 			tr->clause_upper_count++;
-			if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper2(cprev)) {
+			if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper(cprev)) {
 				char text_buf[40];
 				char text_buf2[30];
 				if (LookupSpecial(tr, "_cap", text_buf2) != NULL) {
@@ -2181,7 +2106,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 					}
 				}
 			}
-		} else if (iswalpha2(c1))
+		} else if (iswalpha(c1))
 			tr->clause_lower_count++;
 
 		if (option_phoneme_input) {
@@ -2238,7 +2163,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 				// i.e. is dot followed by an upper-case letter?
 
 				if (!iswspace(c1)) {
-					if (!IsAlpha(c1) || !iswlower2(c1)) {
+					if (!IsAlpha(c1) || !iswlower(c1)) {
 						UngetC(c2);
 						ungot_char2 = c1;
 						buf[end_clause_index] = ' '; // delete the end-clause punctuation
@@ -2320,7 +2245,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 					punct_data |= CLAUSE_DOT;
 
 				if (nl_count == 0) {
-					if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower2(c_next)))) {
+					if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower(c_next)))) {
 						// lang=hu, fix for ordinal numbers, eg:  "december 2., szerda", ignore ',' after ordinal number
 						c1 = CHAR_COMMA_BREAK;
 						is_end_clause = 0;
@@ -2332,11 +2257,11 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 							// dot after a number indicates an ordinal number
 							if (!iswdigit(cprev))
 								is_end_clause = 0; // Roman number followed by dot
-							else if (iswlower2(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
+							else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
 								is_end_clause = 0; // only if followed by lower-case, (or if there is a XML tag)
 						} else if (c_next == '\'')
 							is_end_clause = 0;    // eg. u.s.a.'s
-						if (iswlower2(c_next)) {
+						if (iswlower(c_next)) {
 							// next word has no capital letter, this dot is probably from an abbreviation
 							is_end_clause = 0;
 						}

diff --git a/src/libespeak-ng/translate.c b/src/libespeak-ng/translate.c
@@ -363,7 +363,7 @@ int IsAlpha(unsigned int c)
 		0
 	};
 
-	if (iswalpha2(c))
+	if (iswalpha(c))
 		return 1;
 
 	if (c < 0x300)
@@ -610,7 +610,7 @@ int IsAllUpper(const char *word)
 	int c;
 	while ((*word != 0) && !isspace2(*word)) {
 		word += utf8_in(&c, word);
-		if (!iswupper2(c))
+		if (!iswupper(c))
 			return 0;
 	}
 	return 1;
@@ -904,7 +904,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
 			}
 		}
 
-		if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha2(first_char)) {
+		if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) {
 			if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
 				// emphasize words which are in capitals
 				emphasize_allcaps = FLAG_EMPHASIZED;
@@ -1331,7 +1331,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
 			tr->expect_past--;
 	}
 
-	if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha2(first_char) && (first_char != 'i')) {
+	if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) {
 		// English Specific !!!!
 		// any single letter before a dot is an abbreviation, except 'I'
 		dictionary_flags[0] |= FLAG_ALLOW_DOT;
@@ -1587,7 +1587,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
 			while (*p2 != ' ') p2++;
 
 			utf8_in(&c_word2, p2+1); // first character of the next word;
-			if (!iswalpha2(c_word2))
+			if (!iswalpha(c_word2))
 				ok = 0;
 
 			if (ok != 0) {
@@ -1957,7 +1957,7 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,
 
 	// there is a list of character codes to be substituted with alternative codes
 
-	if (iswupper2(c_lower = c)) {
+	if (iswupper(c_lower = c)) {
 		c_lower = towlower2(c);
 		upper_case = 1;
 	}
@@ -1984,14 +1984,14 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,
 		// there is a second character to be inserted
 		// don't convert the case of the second character unless the next letter is also upper case
 		c2 = new_c >> 16;
-		if (upper_case && iswupper2(next_in))
-			c2 = towupper2(c2);
+		if (upper_case && iswupper(next_in))
+			c2 = towupper(c2);
 		*insert = c2;
 		new_c &= 0xffff;
 	}
 
 	if (upper_case)
-		new_c = towupper2(new_c);
+		new_c = towupper(new_c);
 
 	*wordflags |= FLAG_CHAR_REPLACED;
 	return new_c;
@@ -2046,7 +2046,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c,
 	case L('n', 'l'):
 		// look for 'n  and replace by a special character (unicode: schwa)
 
-		if (!iswalpha2(prev_in)) {
+		if (!iswalpha(prev_in)) {
 			utf8_in(&next2, &ptr[1]);
 
 			if ((c == '\'') && IsSpace(next2)) {
@@ -2400,7 +2400,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
 						if (!IsBracket(prev_out)) // ?? perhaps only set FLAG_NOSPACE for . - /  (hyphenated words, URLs, etc)
 							next_word_flags |= FLAG_NOSPACE;
 					} else {
-						if (iswupper2(c))
+						if (iswupper(c))
 							word_flags |= FLAG_FIRST_UPPER;
 
 						if ((prev_out == ' ') && iswdigit(sbuf[ix-2]) && !iswdigit(prev_in)) {
@@ -2430,7 +2430,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
 					}
 				}
 
-				if (iswupper2(c)) {
+				if (iswupper(c)) {
 					c = towlower2(c);
 
 					if ((j = tr->langopts.param[LOPT_CAPS_IN_WORD]) > 0) {
@@ -2440,7 +2440,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
 							syllable_marked = 1;
 						}
 					} else {
-						if (iswlower2(prev_in)) {
+						if (iswlower(prev_in)) {
 							// lower case followed by upper case in a word
 							if (UpperCaseInWord(tr, &sbuf[ix], c) == 1) {
 								// convert to lower case and continue
@@ -2450,7 +2450,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
 								space_inserted = 1;
 								prev_in_save = c;
 							}
-						} else if ((c != ' ') && iswupper2(prev_in) && iswlower2(next_in)) {
+						} else if ((c != ' ') && iswupper(prev_in) && iswlower(next_in)) {
 							int next2_in;
 							utf8_in(&next2_in, &source[source_index + next_in_nbytes]);
 

diff --git a/src/libespeak-ng/translate.h b/src/libespeak-ng/translate.h
@@ -724,12 +724,8 @@ int IsDigit09(unsigned int c);
 int IsAlpha(unsigned int c);
 int IsVowel(Translator *tr, int c);
 int IsSuperscript(int letter);
-int iswalpha2(int c);
 int isspace2(unsigned int c);
-int iswlower2(int c);
-int iswupper2(int c);
-int towlower2(unsigned int c);
-int towupper2(unsigned int c);
+int towlower2(unsigned int c); // Supports Turkish I
 const char *GetTranslatedPhonemeString(int phoneme_mode);
 const char *WordToString2(unsigned int word);
 ALPHABET *AlphabetFromChar(int c);