From 8a9094609463f32fddbfece87fe1915b761b3a61 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 21 May 2019 02:00:45 +0200 Subject: [PATCH] - allow certain replacements to be valid for CanPrint The Romanian letters with comma below, the second Greek Sigma variant and a few special Cyrillic letters have replacements that should not result in rejection. --- src/gamedata/fonts/font.cpp | 2 +- src/gamedata/fonts/fontinternals.h | 2 ++ src/gamedata/fonts/v_font.cpp | 55 ++++++++++++++++++++++-------- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/gamedata/fonts/font.cpp b/src/gamedata/fonts/font.cpp index 4e9cef0344a..da8fd25ee73 100644 --- a/src/gamedata/fonts/font.cpp +++ b/src/gamedata/fonts/font.cpp @@ -1044,7 +1044,7 @@ bool FFont::CanPrint(const uint8_t *string) const else if (chr != '\n') { int cc = GetCharCode(chr, true); - if (chr != cc && iswalpha(chr)) + if (chr != cc && iswalpha(chr) && cc != getAlternative(chr)) { return false; } diff --git a/src/gamedata/fonts/fontinternals.h b/src/gamedata/fonts/fontinternals.h index 63a69733b2d..6d68c6aa868 100644 --- a/src/gamedata/fonts/fontinternals.h +++ b/src/gamedata/fonts/fontinternals.h @@ -41,4 +41,6 @@ class FImageSource; void RecordTextureColors (FImageSource *pic, uint32_t *usedcolors); bool myislower(int code); +bool myisupper(int code); int stripaccent(int code); +int getAlternative(int code); diff --git a/src/gamedata/fonts/v_font.cpp b/src/gamedata/fonts/v_font.cpp index 9328c0b4bb8..71dd5a43355 100644 --- a/src/gamedata/fonts/v_font.cpp +++ b/src/gamedata/fonts/v_font.cpp @@ -782,6 +782,9 @@ void InitLowerUpper() if (upperforlower[lower] == lower) upperforlower[lower] = upper; isuppermap[upper] = islowermap[lower] = true; } + // Special treatment for the two variants of the small sigma in Greek. + islowermap[0x3c2] = true; + upperforlower[0x3c2] = 0x3a3; } @@ -791,7 +794,14 @@ bool myislower(int code) return false; } -// Returns a character without an accent mark (or one with a similar looking accent in some cases where direct support is unlikely. +bool myisupper(int code) +{ + if (code >= 0 && code < 65536) return isuppermap[code]; + return false; +} + + +// Returns a character without an accent mark (or one with a similar looking accent in some cases where direct support is unlikely.) int stripaccent(int code) { @@ -842,7 +852,7 @@ int stripaccent(int code) else if (code >= 0x100 && code < 0x180) { // For the double-accented Hungarian letters it makes more sense to first map them to the very similar looking Umlauts. - // (And screw the crappy specs here that do not allow UTF-8 multibyte characters here.) + // (And screw the crappy specs that do not allow UTF-8 multibyte character literals here.) if (code == 0x150) code = 0xd6; else if (code == 0x151) code = 0xf6; else if (code == 0x170) code = 0xdc; @@ -853,22 +863,38 @@ int stripaccent(int code) return accentless[code - 0x100]; } } - else if (code >= 0x200 && code < 0x21c) + else if (code >= 0x200 && code < 0x218) { - // 0x200-0x217 are probably irrelevant but easy to map to other characters more likely to exist. 0x218-0x21b are relevant for Romanian but also have a fallback within ranges that are more likely to be supported. - static const uint16_t u200map[] = {0xc4, 0xe4, 0xc2, 0xe2, 0xcb, 0xeb, 0xca, 0xea, 0xcf, 0xef, 0xce, 0xee, 0xd6, 0xf6, 0xd4, 0xe4, 'R', 'r', 'R', 'r', 0xdc, 0xfc, 0xdb, 0xfb, 0x15e, 0x15f, 0x162, 0x163}; + // 0x200-0x217 are irrelevant but easy to map to other characters more likely to exist. + static const uint16_t u200map[] = {0xc4, 0xe4, 0xc2, 0xe2, 0xcb, 0xeb, 0xca, 0xea, 0xcf, 0xef, 0xce, 0xee, 0xd6, 0xf6, 0xd4, 0xe4, 'R', 'r', 'R', 'r', 0xdc, 0xfc, 0xdb, 0xfb}; return u200map[code - 0x200]; } - else switch (code) + return getAlternative(code); +} + +int getAlternative(int code) +{ + // This is for determining replacements that do not make CanPrint fail. + switch (code) { - case 0x2014: - return '-'; // long hyphen + default: + return code; - case 0x201c: - case 0x201d: - case 0x201e: - return '"'; // typographic quotation marks + case 0x17f: // The 'long s' can be safely remapped to the regular variant, not that this gets used in any real text... + return 's'; + case 0x218: // Romanian S with comma below may get remapped to S with cedilla. + return 0x15e; + + case 0x219: + return 0x15f; + + case 0x21a: // Romanian T with comma below may get remapped to T with cedilla. + return 0x162; + + case 0x21b: + return 0x163; + case 0x3c2: return 0x3c3; // Lowercase Sigma character in Greek, which changes depending on its positioning in a word; if the font is uppercase only or features a smallcaps style, the second variant of the letter will remain unused @@ -890,7 +916,7 @@ int stripaccent(int code) case 0x408: return 'J'; - + case 0x450: return 0xe8; @@ -908,7 +934,7 @@ int stripaccent(int code) case 0x458: return 'j'; - + } // skip the rest of Latin characters because none of them are relevant for modern languages, except Vietnamese which cannot be represented with the tiny bitmap fonts anyway. @@ -916,6 +942,7 @@ int stripaccent(int code) return code; } + FFont *V_GetFont(const char *name, const char *fontlumpname) { if (!stricmp(name, "DBIGFONT")) name = "BigFont";