Skip to content

Commit

Permalink
introduce encoding_is_utf8() internal
Browse files Browse the repository at this point in the history
apparently _nl_normalize_charset() of glibc removes
everything but alnums, and converts to lowercase.
replace strcmp() against "UTF-8" with encoding_is_utf8(),
which matches this behavior, and supports the optional
"cs" ("character set") prefix.

Closes #2780
  • Loading branch information
dankamongmen committed Jun 5, 2024
1 parent bc4dc23 commit 9845b5b
Showing 1 changed file with 28 additions and 4 deletions.
32 changes: 28 additions & 4 deletions src/lib/notcurses.c
Original file line number Diff line number Diff line change
Expand Up @@ -1095,6 +1095,30 @@ int ncplane_destroy_family(ncplane *ncp){
return ret;
}

// glibc's _nl_normalize_charset() converts to lowercase, removing everything
// but alnums. furthermore, "cs" is a valid prefix meaning "character set".
static bool
encoding_is_utf8(const char *enc){
if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso
enc += 2; // skip initial "cs" if present.
}
const char utfstr[] = "utf8";
const char* match = utfstr;
while(*enc){
if(isalnum(*enc)){ // we only care about alnums
if(tolower(*enc) != tolower(*match)){
return false;
}
++match;
}
++enc;
}
if(*match){
return false;
}
return true;
}

// it's critical that we're using UTF-8 encoding if at all possible. since the
// client might not have called setlocale(2) (if they weren't reading the
// directions...), go ahead and try calling setlocale(LC_ALL, "") and then
Expand All @@ -1112,7 +1136,7 @@ void init_lang(void){
}
#endif
const char* encoding = nl_langinfo(CODESET);
if(encoding && !strcmp(encoding, "UTF-8")){
if(encoding && encoding_is_utf8(encoding)){
return; // already utf-8, great!
}
const char* lang = getenv("LANG");
Expand All @@ -1127,13 +1151,13 @@ void init_lang(void){
}
#endif
encoding = nl_langinfo(CODESET);
if(encoding && !strcmp(encoding, "UTF-8")){
if(encoding && encoding_is_utf8(encoding)){
loginfo("set locale from LANG; client should call setlocale(2)!");
return;
}
setlocale(LC_CTYPE, "C.UTF-8");
encoding = nl_langinfo(CODESET);
if(encoding && !strcmp(encoding, "UTF-8")){
if(encoding && encoding_is_utf8(encoding)){
loginfo("forced UTF-8 encoding; client should call setlocale(2)!");
return;
}
Expand Down Expand Up @@ -1214,7 +1238,7 @@ notcurses_early_init(const struct notcurses_options* opts, FILE* fp, unsigned* u
}
//fprintf(stderr, "getenv LC_ALL: %s LC_CTYPE: %s\n", getenv("LC_ALL"), getenv("LC_CTYPE"));
const char* encoding = nl_langinfo(CODESET);
if(encoding && !strcmp(encoding, "UTF-8")){
if(encoding && encoding_is_utf8(encoding)){
*utf8 = true;
}else{
*utf8 = false;
Expand Down

0 comments on commit 9845b5b

Please sign in to comment.