Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit 53de874

Browse files
committed
The default locale "" should be a UTF-8 locale.
"ls -q" (or "adb shell -tt ls") was mangling non-ASCII because mbrtowc was returning multibyte characters as their individual bytes. This was because toybox asks for "" rather than "C.UTF-8", and for some reason we were interpreting that as "C" rather than "C.UTF-8". Test: bionic tests, ls Change-Id: Ic60e3b90cd5fe689e5489fad0d5d91062b9594ed
1 parent bd839d1 commit 53de874

File tree

3 files changed

+18
-12
lines changed

3 files changed

+18
-12
lines changed

libc/bionic/locale.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737

3838
#include "private/bionic_macros.h"
3939

40-
// We currently support a single locale, the "C" locale (also known as "POSIX").
40+
// We only support two locales, the "C" locale (also known as "POSIX"),
41+
// and the "C.UTF-8" locale (also known as "en_US.UTF-8").
4142

4243
static bool __bionic_current_locale_is_utf8 = true;
4344

@@ -100,12 +101,16 @@ static void __locale_init() {
100101
g_locale.int_n_sign_posn = CHAR_MAX;
101102
}
102103

103-
static bool __is_supported_locale(const char* locale) {
104-
return (strcmp(locale, "") == 0 ||
105-
strcmp(locale, "C") == 0 ||
106-
strcmp(locale, "C.UTF-8") == 0 ||
107-
strcmp(locale, "en_US.UTF-8") == 0 ||
108-
strcmp(locale, "POSIX") == 0);
104+
static bool __is_supported_locale(const char* locale_name) {
105+
return (strcmp(locale_name, "") == 0 ||
106+
strcmp(locale_name, "C") == 0 ||
107+
strcmp(locale_name, "C.UTF-8") == 0 ||
108+
strcmp(locale_name, "en_US.UTF-8") == 0 ||
109+
strcmp(locale_name, "POSIX") == 0);
110+
}
111+
112+
static bool __is_utf8_locale(const char* locale_name) {
113+
return (*locale_name == '\0' || strstr(locale_name, "UTF-8"));
109114
}
110115

111116
lconv* localeconv() {
@@ -133,7 +138,7 @@ locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/
133138
return NULL;
134139
}
135140

136-
return new __locale_t(strstr(locale_name, "UTF-8") != NULL ? 4 : 1);
141+
return new __locale_t(__is_utf8_locale(locale_name) ? 4 : 1);
137142
}
138143

139144
char* setlocale(int category, const char* locale_name) {
@@ -150,7 +155,7 @@ char* setlocale(int category, const char* locale_name) {
150155
errno = ENOENT;
151156
return NULL;
152157
}
153-
__bionic_current_locale_is_utf8 = (strstr(locale_name, "UTF-8") != NULL);
158+
__bionic_current_locale_is_utf8 = __is_utf8_locale(locale_name);
154159
}
155160

156161
return const_cast<char*>(__bionic_current_locale_is_utf8 ? "C.UTF-8" : "C");

libc/bionic/wchar.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps) {
6161
static mbstate_t __private_state;
6262
mbstate_t* state = (ps == NULL) ? &__private_state : ps;
6363

64-
// Our wchar_t is UTF-32
64+
// Our wchar_t is UTF-32.
6565
return mbrtoc32(reinterpret_cast<char32_t*>(pwc), s, n, state);
6666
}
6767

tests/locale_test.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,10 @@ TEST(locale, setlocale) {
5959
EXPECT_EQ(EINVAL, errno);
6060

6161
#if defined(__BIONIC__)
62-
// The "" locale is implementation-defined. For bionic, it's the C locale.
62+
// The "" locale is implementation-defined. For bionic, it's the C.UTF-8 locale, which is
63+
// pretty much all we support anyway.
6364
// glibc will give us something like "en_US.UTF-8", depending on the user's configuration.
64-
EXPECT_STREQ("C", setlocale(LC_ALL, ""));
65+
EXPECT_STREQ("C.UTF-8", setlocale(LC_ALL, ""));
6566
#endif
6667
EXPECT_STREQ("C", setlocale(LC_ALL, "C"));
6768
EXPECT_STREQ("C", setlocale(LC_ALL, "POSIX"));

0 commit comments

Comments
 (0)