Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various encodings conversion fixes #3716

Merged
merged 11 commits into from
Apr 21, 2024
16 changes: 6 additions & 10 deletions src/document.c
Original file line number Diff line number Diff line change
Expand Up @@ -998,19 +998,15 @@ static gboolean load_text_file(const gchar *locale_filename, const gchar *displa
}

if (! encodings_convert_to_utf8_auto(&filedata->data, &filedata->len, forced_enc,
&filedata->enc, &filedata->bom, &filedata->readonly))
&filedata->enc, &filedata->bom, &filedata->readonly, &err))
{
if (forced_enc)
{
ui_set_statusbar(TRUE, _("The file \"%s\" is not valid %s."),
display_filename, forced_enc);
}
ui_set_statusbar(TRUE, _("Failed to load file \"%s\" as %s: %s."),
display_filename, forced_enc, err->message);
else
{
ui_set_statusbar(TRUE,
_("The file \"%s\" does not look like a text file or the file encoding is not supported."),
display_filename);
}
ui_set_statusbar(TRUE, _("Failed to load file \"%s\": %s."),
display_filename, err->message);
g_error_free(err);
g_free(filedata->data);
return FALSE;
}
Expand Down
61 changes: 29 additions & 32 deletions src/encodings.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,37 +195,27 @@ static gboolean encodings_charset_equals(const gchar *a, const gchar *b)

GeanyEncodingIndex encodings_get_idx_from_charset(const gchar *charset)
{
gint i;

if (charset == NULL)
return GEANY_ENCODING_UTF_8;

i = 0;
while (i < GEANY_ENCODINGS_MAX)
for (gint i = 0; i < GEANY_ENCODINGS_MAX; i++)
{
if (encodings_charset_equals(charset, encodings[i].charset))
return i;

++i;
}
return GEANY_ENCODING_UTF_8;
}


const GeanyEncoding *encodings_get_from_charset(const gchar *charset)
{
gint i;

if (charset == NULL)
return &encodings[GEANY_ENCODING_UTF_8];

i = 0;
while (i < GEANY_ENCODINGS_MAX)
for (gint i = 0; i < GEANY_ENCODINGS_MAX; i++)
{
if (encodings_charset_equals(charset, encodings[i].charset))
return &encodings[i];

++i;
}

return NULL;
Expand Down Expand Up @@ -300,12 +290,10 @@ void encodings_select_radio_item(const gchar *charset)

g_return_if_fail(charset != NULL);

i = 0;
while (i < GEANY_ENCODINGS_MAX)
for (i = 0; i < GEANY_ENCODINGS_MAX; i++)
{
if (utils_str_equal(charset, encodings[i].charset))
break;
i++;
}
if (i == GEANY_ENCODINGS_MAX)
i = GEANY_ENCODING_UTF_8; /* fallback to UTF-8 */
Expand Down Expand Up @@ -621,7 +609,7 @@ void encodings_encoding_store_cell_data_func(GtkCellLayout *cell_layout,

static gchar *convert_to_utf8_from_charset(const gchar *buffer, gssize size,
const gchar *charset, gboolean fast,
gsize *utf8_size)
gsize *utf8_size, GError **error)
{
gchar *utf8_content = NULL;
GError *conv_error = NULL;
Expand All @@ -637,18 +625,22 @@ static gchar *convert_to_utf8_from_charset(const gchar *buffer, gssize size,
if (fast)
{
utf8_content = converted_contents;
if (conv_error != NULL) g_error_free(conv_error);
if (conv_error != NULL) g_propagate_error(error, conv_error);
}
else if (conv_error != NULL || ! g_utf8_validate(converted_contents, bytes_written, NULL))
{
if (conv_error != NULL)
{
geany_debug("Couldn't convert from %s to UTF-8 (%s).", charset, conv_error->message);
g_error_free(conv_error);
g_propagate_error(error, conv_error);
conv_error = NULL;
}
else
{
geany_debug("Couldn't convert from %s to UTF-8.", charset);
g_set_error(error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
_("Data contains NULs"));
b4n marked this conversation as resolved.
Show resolved Hide resolved
}

utf8_content = NULL;
g_free(converted_contents);
Expand Down Expand Up @@ -684,7 +676,7 @@ gchar *encodings_convert_to_utf8_from_charset(const gchar *buffer, gssize size,
{
/* If fast=FALSE, we can safely ignore the size as the output cannot contain NULs.
* Otherwise, the caller already agrees on partial data anyway. */
return convert_to_utf8_from_charset(buffer, size, charset, fast, NULL);
return convert_to_utf8_from_charset(buffer, size, charset, fast, NULL, NULL);
}


Expand All @@ -704,7 +696,7 @@ static gchar *encodings_check_regexes(const gchar *buffer, gsize size)


static gchar *encodings_convert_to_utf8_with_suggestion(const gchar *buffer, gssize size,
const gchar *suggested_charset, gchar **used_encoding, gsize *utf8_size)
const gchar *suggested_charset, gchar **used_encoding, gsize *utf8_size, GError **error)
{
const gchar *locale_charset = NULL;
const gchar *charset;
Expand Down Expand Up @@ -771,7 +763,7 @@ static gchar *encodings_convert_to_utf8_with_suggestion(const gchar *buffer, gss

geany_debug("Trying to convert %" G_GSIZE_FORMAT " bytes of data from %s into UTF-8.",
size, charset);
utf8_content = convert_to_utf8_from_charset(buffer, size, charset, FALSE, utf8_size);
utf8_content = convert_to_utf8_from_charset(buffer, size, charset, FALSE, utf8_size, NULL);

if (G_LIKELY(utf8_content != NULL))
{
Expand All @@ -788,6 +780,9 @@ static gchar *encodings_convert_to_utf8_with_suggestion(const gchar *buffer, gss
}
}

g_set_error(error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
_("Data contains NULs or the encoding is not supported"));

return NULL;
}

Expand All @@ -812,7 +807,7 @@ gchar *encodings_convert_to_utf8(const gchar *buffer, gssize size, gchar **used_
/* first try to read the encoding from the file content */
regex_charset = encodings_check_regexes(buffer, size);
/* we know this cannot succeed if there are NULs in the output, so ignoring the size is OK */
utf8 = encodings_convert_to_utf8_with_suggestion(buffer, size, regex_charset, used_encoding, NULL);
utf8 = encodings_convert_to_utf8_with_suggestion(buffer, size, regex_charset, used_encoding, NULL, NULL);
g_free(regex_charset);

return utf8;
Expand Down Expand Up @@ -898,21 +893,23 @@ typedef struct

/* convert data with the specified encoding */
static gboolean
handle_forced_encoding(BufferData *buffer, const gchar *forced_enc)
handle_forced_encoding(BufferData *buffer, const gchar *forced_enc, GError **error)
{
GeanyEncodingIndex enc_idx;

if (utils_str_equal(forced_enc, "UTF-8"))
{
if (! g_utf8_validate(buffer->data, buffer->size, NULL))
{
g_set_error(error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
_("Data contains NULs or is not valid UTF-8"));
return FALSE;
}
}
else
{
gchar *converted_text = convert_to_utf8_from_charset(
buffer->data, buffer->size, forced_enc, FALSE, &buffer->size);
buffer->data, buffer->size, forced_enc, FALSE, &buffer->size, error);
if (converted_text == NULL)
{
return FALSE;
Expand All @@ -931,7 +928,7 @@ handle_forced_encoding(BufferData *buffer, const gchar *forced_enc)

/* detect encoding and convert to UTF-8 if necessary */
static gboolean
handle_encoding(BufferData *buffer, GeanyEncodingIndex enc_idx)
handle_encoding(BufferData *buffer, GeanyEncodingIndex enc_idx, GError **error)
{
g_return_val_if_fail(buffer->enc == NULL, FALSE);
g_return_val_if_fail(buffer->bom == FALSE, FALSE);
Expand Down Expand Up @@ -961,7 +958,7 @@ handle_encoding(BufferData *buffer, GeanyEncodingIndex enc_idx)
else /* the BOM indicated something else than UTF-8 */
{
gchar *converted_text = convert_to_utf8_from_charset(
buffer->data, buffer->size, buffer->enc, FALSE, &buffer->size);
buffer->data, buffer->size, buffer->enc, FALSE, &buffer->size, NULL);
if (converted_text != NULL)
{
SETPTR(buffer->data, converted_text);
Expand Down Expand Up @@ -990,7 +987,7 @@ handle_encoding(BufferData *buffer, GeanyEncodingIndex enc_idx)
{
/* detect the encoding */
gchar *converted_text = encodings_convert_to_utf8_with_suggestion(buffer->data,
buffer->size, regex_charset, &buffer->enc, &buffer->size);
buffer->size, regex_charset, &buffer->enc, &buffer->size, error);

if (converted_text == NULL)
{
Expand Down Expand Up @@ -1023,7 +1020,7 @@ handle_bom(BufferData *buffer)


/* loads textfile data, verifies and converts to forced_enc or UTF-8. Also handles BOM. */
static gboolean handle_buffer(BufferData *buffer, const gchar *forced_enc)
static gboolean handle_buffer(BufferData *buffer, const gchar *forced_enc, GError **error)
{
GeanyEncodingIndex tmp_enc_idx;

Expand All @@ -1040,12 +1037,12 @@ static gboolean handle_buffer(BufferData *buffer, const gchar *forced_enc)
buffer->bom = FALSE;
buffer->enc = g_strdup(encodings[GEANY_ENCODING_NONE].charset);
}
else if (! handle_forced_encoding(buffer, forced_enc))
else if (! handle_forced_encoding(buffer, forced_enc, error))
{
return FALSE;
}
}
else if (! handle_encoding(buffer, tmp_enc_idx))
else if (! handle_encoding(buffer, tmp_enc_idx, error))
{
return FALSE;
}
Expand Down Expand Up @@ -1073,7 +1070,7 @@ static gboolean handle_buffer(BufferData *buffer, const gchar *forced_enc)
*/
GEANY_EXPORT_SYMBOL
gboolean encodings_convert_to_utf8_auto(gchar **buf, gsize *size, const gchar *forced_enc,
gchar **used_encoding, gboolean *has_bom, gboolean *has_nuls)
gchar **used_encoding, gboolean *has_bom, gboolean *has_nuls, GError **error)
{
BufferData buffer;

Expand All @@ -1082,7 +1079,7 @@ gboolean encodings_convert_to_utf8_auto(gchar **buf, gsize *size, const gchar *f
buffer.enc = NULL;
buffer.bom = FALSE;

if (! handle_buffer(&buffer, forced_enc))
if (! handle_buffer(&buffer, forced_enc, error))
return FALSE;

*size = buffer.size;
Expand Down
3 changes: 2 additions & 1 deletion src/encodingsprivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ void encodings_encoding_store_cell_data_func(GtkCellLayout *cell_layout, GtkCell
gboolean encodings_is_unicode_charset(const gchar *string);

gboolean encodings_convert_to_utf8_auto(gchar **buf, gsize *size, const gchar *forced_enc,
gchar **used_encoding, gboolean *has_bom, gboolean *has_nuls);
gchar **used_encoding, gboolean *has_bom, gboolean *has_nuls,
GError **error);

GeanyEncodingIndex encodings_scan_unicode_bom(const gchar *string, gsize len, guint *bom_len);

Expand Down
7 changes: 5 additions & 2 deletions src/templates.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,18 @@ static gchar *read_file(const gchar *locale_fname)
gchar *contents;
gsize length;
GString *str;
GError *err = NULL;

if (! g_file_get_contents(locale_fname, &contents, &length, NULL))
return NULL;

if (! encodings_convert_to_utf8_auto(&contents, &length, NULL, NULL, NULL, NULL))
if (! encodings_convert_to_utf8_auto(&contents, &length, NULL, NULL, NULL, NULL, &err))
{
gchar *utf8_fname = utils_get_utf8_from_locale(locale_fname);

ui_set_statusbar(TRUE, _("Failed to convert template file \"%s\" to UTF-8"), utf8_fname);
ui_set_statusbar(TRUE, _("Failed to convert template file \"%s\" to UTF-8: %s"),
utf8_fname, err->message);
g_error_free(err);
g_free(utf8_fname);
g_free(contents);
return NULL;
Expand Down
10 changes: 8 additions & 2 deletions tests/test_encodings.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,17 @@ static gboolean assert_convert_to_utf8_auto_impl(
gboolean has_bom = FALSE;
gboolean partial = FALSE;
gboolean ret;
GError *err = NULL;

g_log(domain, G_LOG_LEVEL_INFO, "%s:%d:%s: converting %lu bytes", file, line, func, input_size);
ret = encodings_convert_to_utf8_auto(&buf, &size, forced_enc, &used_encoding, &has_bom, &partial);
ret = encodings_convert_to_utf8_auto(&buf, &size, forced_enc, &used_encoding, &has_bom, &partial, &err);
fflush(stdout);
if (ret)
if (! ret)
{
g_log(domain, G_LOG_LEVEL_INFO, "%s:%d:%s: conversion failed: %s", file, line, func, err->message);
g_error_free(err);
}
else
{
assert_cmpmem_eq_with_caller(buf, expected_output, MIN(size, expected_size),
domain, file, line, func);
Expand Down