Skip to content

Commit

Permalink
lib: Add str_truncate_utf8().
Browse files Browse the repository at this point in the history
It works similar to str_truncate(), but it makes sure the truncated string
remains valid UTF-8.
  • Loading branch information
stephanbosch authored and villesavolainen committed Feb 6, 2019
1 parent 46e2d31 commit f9b58a2
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/lib/str.c
Expand Up @@ -3,6 +3,7 @@
#include "lib.h"
#include "buffer.h"
#include "printf-format-fix.h"
#include "unichar.h"
#include "str.h"

#include <stdio.h>
Expand Down Expand Up @@ -146,3 +147,12 @@ void str_vprintfa(string_t *str, const char *fmt, va_list args)
/* drop the unused data, including terminating NUL */
buffer_set_used_size(str, pos + ret);
}

void str_truncate_utf8(string_t *str, size_t len)
{
size_t size = str_len(str);

if (size <= len)
return;
str_truncate(str, uni_utf8_data_truncate(str_data(str), size, len));
}
6 changes: 6 additions & 0 deletions src/lib/str.h
Expand Up @@ -78,4 +78,10 @@ static inline void str_truncate(string_t *str, size_t len)
buffer_set_used_size(str, len);
}

/* Truncate the string to specified length, but also make sure the truncation
doesn't happen in the middle of an UTF-8 character sequence. In that case,
the string will end up being up to a few bytes smaller than len. If it's
already smaller to begin with, do nothing. */
void str_truncate_utf8(string_t *str, size_t len);

#endif
44 changes: 44 additions & 0 deletions src/lib/test-str.c
@@ -1,6 +1,7 @@
/* Copyright (c) 2012-2018 Dovecot authors, see the included COPYING file */

#include "test-lib.h"
#include "unichar.h"
#include "str.h"

static void test_str_append(void)
Expand Down Expand Up @@ -127,6 +128,48 @@ static void test_str_truncate(void)
test_end();
}

static void test_str_truncate_utf8(void)
{
string_t *str = t_str_new(8);
int i;

test_begin("str_truncate_utf8()");
str_append(str, "123456");
for (i = 100; i >= 6; i--) {
str_truncate_utf8(str, i);
test_assert_idx(str_len(str) == 6, i);
}
for (; i >= 0; i--) {
str_truncate_utf8(str, i);
test_assert_idx(str_len(str) == (unsigned int)i, i);
}

str_append(str, "\xE4\xB8\x80\xE4\xBa\x8C\xE4\xB8\x89"
"\xE5\x9b\x9b\xE4\xBa\x94\xE5\x85\xAD");
for (i = 100; i >= 18; i--) {
str_truncate_utf8(str, i);
test_assert_idx(str_len(str) == 18, i);
}
for (; i >= 0; i--) {
str_truncate_utf8(str, i);
test_assert_idx(str_len(str) % 3 == 0, i);
test_assert_idx((str_len(str) / 3) == ((unsigned int)i / 3), i);
}

str_append(str, "\xE4\xB8\x80""1""\xE4\xBa\x8C""2""\xE4\xB8\x89""3"
"\xE5\x9b\x9b""4""\xE4\xBa\x94""5""\xE5\x85\xAD""6");
for (i = 100; i >= 24; i--) {
str_truncate_utf8(str, i);
test_assert_idx(str_len(str) == 24, i);
}
for (; i >= 0; i--) {
str_truncate_utf8(str, i);
test_assert_idx(uni_utf8_data_is_valid(str_data(str),
str_len(str)), i);
}
test_end();
}

void test_str(void)
{
test_str_append();
Expand All @@ -135,4 +178,5 @@ void test_str(void)
test_str_delete();
test_str_append_max();
test_str_truncate();
test_str_truncate_utf8();
}

0 comments on commit f9b58a2

Please sign in to comment.