Skip to content

Commit

Permalink
charset: don't split multi-octet UTF-8 words in Q-encoded MIME headers
Browse files Browse the repository at this point in the history
  • Loading branch information
rsto committed Mar 11, 2019
1 parent 0348bd7 commit 3ca31fe
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 17 deletions.
17 changes: 17 additions & 0 deletions cunit/charset.testc
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,14 @@ static void test_encode_mimeheader(void)
char *s = charset_encode_mimeheader(_in, 0, 0); \
CU_ASSERT_PTR_NOT_NULL(s); \
CU_ASSERT_STRING_EQUAL(s, _exp); \
const char *p, *lf; \
for (lf = s, p = s; *p != '\0'; p++) { \
if (*p == '\n') { \
CU_ASSERT(p - lf <= 76); \
lf = p; \
} \
} \
CU_ASSERT(p - lf <= 76); \
free(s); \
}

Expand All @@ -287,6 +295,15 @@ static void test_encode_mimeheader(void)
/* wrap */
TESTCASE("abc\r\n xyz", "=?UTF-8?Q?abc?=\r\n =?UTF-8?Q?xyz?=");

/* three-byte UTF-8 word barely fits line length limit */
TESTCASE("0123456789012345678901234567890123456789012345678901234\xe2\x82\xac",
"=?UTF-8?Q?0123456789012345678901234567890123456789012345678901234=E2=82=AC?=");

/* three-byte UTF-8 word must not be split */
TESTCASE("01234567890123456789012345678901234567890123456789012345\xe2\x82\xac",
"=?UTF-8?Q?01234567890123456789012345678901234567890123456789012345?="
"\r\n ""=?UTF-8?Q?=E2=82=AC?=");

#undef TESTCASE
}

Expand Down
45 changes: 28 additions & 17 deletions lib/charset.c
Original file line number Diff line number Diff line change
Expand Up @@ -3058,20 +3058,26 @@ static char *qp_encode(const char *data, size_t len, int isheader,
unsigned char this = data[n];
unsigned char next = (n < len - 1) ? data[n+1] : '\0';

if (cnt >= ENCODED_MAX_LINE_LEN) {
if (!isheader) {
/* add soft line break to body */
buf_appendcstr(&buf, "=\r\n");
cnt = 0;
}
else if (!ISUTF8CONTINUATION(this)) {
/* split encoded token with fold */
buf_appendcstr(&buf, "?=");
buf_appendcstr(&buf, "\r\n ");
buf_appendcstr(&buf, "=?UTF-8?Q?");
/* Insert line break before exceeding line length limits */
if (isheader) {
/* RFC2047 forbids splitting multi-octet characters */
int needbytes;
if (this < 0x80) needbytes = 0;
else if (this < 0xc0) needbytes = 0; // UTF-8 continuation
else if (this < 0xe0) needbytes = 3;
else if (this < 0xf0) needbytes = 6;
else if (this < 0xf8) needbytes = 9;
else needbytes = 0; // impossible UTF-8 encoding
if (cnt + needbytes >= ENCODED_MAX_LINE_LEN) {
buf_appendcstr(&buf, "?=\r\n =?UTF-8?Q?");
cnt = 11;
}
}
else if (cnt >= ENCODED_MAX_LINE_LEN) {
/* add soft line break to body */
buf_appendcstr(&buf, "=\r\n");
cnt = 0;
}

if ((QPSAFECHAR[this]
/* per RFC 2047: '?' and '_' in header aren't safe */
Expand Down Expand Up @@ -3171,12 +3177,17 @@ EXPORTED char *charset_encode_mimephrase(const char *data)
for (n = 0; data[n]; n++) {
unsigned char this = data[n];

if (cnt >= ENCODED_MAX_LINE_LEN) {
if (!ISUTF8CONTINUATION(this)) {
/* split encoded token with fold */
buf_appendcstr(&buf, "?=\r\n =?UTF-8?Q?");
cnt = 11;
}
/* RFC2047 forbids splitting multi-octet characters */
int needbytes;
if (this < 0x80) needbytes = 0;
else if (this < 0xc0) needbytes = 0; // UTF-8 continuation
else if (this < 0xe0) needbytes = 3;
else if (this < 0xf0) needbytes = 6;
else if (this < 0xf8) needbytes = 9;
else needbytes = 0; // impossible UTF-8 encoding
if (cnt + needbytes >= ENCODED_MAX_LINE_LEN) {
buf_appendcstr(&buf, "?=\r\n =?UTF-8?Q?");
cnt = 11;
}

if (QPMIMEPHRASESAFECHAR[this]) {
Expand Down

0 comments on commit 3ca31fe

Please sign in to comment.