Skip to content

Commit

Permalink
Make sure visual length is correctly calculated when "compressing" na…
Browse files Browse the repository at this point in the history
…mes. Fixes #17.
  • Loading branch information
acli committed Aug 6, 2020
1 parent 410b4cb commit 447ecd0
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 22 deletions.
5 changes: 5 additions & 0 deletions rt-page.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/* rt-page.c
* vi: set sw=4 ts=8 ai sm noet :
*/
/* This software is copyrighted as detailed in the LICENSE file. */

Expand All @@ -17,6 +18,7 @@
#include "env.h"
#include "util.h"
#include "util2.h"
#include "utf.h"
#include "opt.h"
#include "only.h"
#include "addng.h"
Expand Down Expand Up @@ -1815,6 +1817,9 @@ int sel;
int subj_width = tc_COLS - 8 - UseSelNum;
int from_width = tc_COLS / 5;
int date_width = tc_COLS / 5;
#ifdef USE_UTF_HACK
utf_init("utf-8", "utf-8"); /* FIXME */
#endif

maybe_eol();
if (subj_width < 32)
Expand Down
163 changes: 148 additions & 15 deletions rt-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ int max;
register char* mid;
register char* d;
register int len, namelen, midlen;
#ifdef USE_UTF_HACK
int vis_len, vis_namelen, vis_midlen;
#else
#define vis_len len
#define vis_namelen namelen
#define vis_midlen midlen
#endif
int notlast;

try_again:
Expand All @@ -103,7 +110,12 @@ int max;
s = name + len - 1;
while (isspace(*s)) s--;
s[1] = '\0';
if (s - name + 1 <= max)
#ifdef USE_UTF_HACK
vis_len = visual_length_between(s, name) + 1;
#else
vis_len = s - name + 1;
#endif
if (vis_len <= max)
return name;

/* Look for characters that likely mean the end of the name
Expand All @@ -113,13 +125,23 @@ int max;
** "Ridge, Ross" and since "R HTMU" is worse than "Ridge" we do
** it anyways.
*/
for (d = name + 1; *d; d++) {
#ifdef USE_UTF_HACK
d = name + byte_length_at(name);
#else
d = name + 1;
#endif
for ( ; *d; ) {
if (*d == ',' || *d == ';' || *d == '(' || *d == '@'
|| (*d == '-' && (d[1] == '-' || d[1] == ' '))) {
*d-- = '\0';
s = d;
break;
}
#ifdef USE_UTF_HACK
d += byte_length_at(d);
#else
d++;
#endif
}

/* Find the last name */
Expand All @@ -128,14 +150,31 @@ int max;
while (isspace(*s)) s--;
s[1] = '\0';
len = s - name + 1;
if (len <= max)
#ifdef USE_UTF_HACK
vis_len = visual_length_between(s, name) + 1;
#endif
if (vis_len <= max)
return name;
/* If the last name is an abbreviation it's not the one we want. */
if (*s == '.')
notlast = 1;
while (!isspace(*s)) {
if (s == name) { /* only one name */
#ifdef USE_UTF_HACK
/* FIXME - need to move into some function */
int i;
int j;
for (i = j = 0; ; ) {
int w = byte_length_at(name + i);
int v = visual_width_at(name + i);
if (w == 0 || j + v > max) break;
i += w;
j += v;
}
name[i] = '\0';
#else
name[max] = '\0';
#endif
return name;
}
if (isdigit(*s)) /* probably a phone number */
Expand All @@ -152,31 +191,51 @@ int max;
s--;
}
mid = name;
while (!isspace(*mid)) mid++;
while (!isspace(*mid)) {
#ifdef USE_UTF_HACK
mid += byte_length_at(mid);
#else
mid++;
#endif
}
namelen = mid - name + 1;
#ifdef USE_UTF_HACK
vis_namelen = visual_length_between(mid, name) + 1;
#endif
if (mid == s+1) { /* no middle name */
mid = 0;
midlen = 0;
} else {
*mid++ = '\0';
while (isspace(*mid)) {
len--;
#ifdef USE_UTF_HACK
mid += byte_length_at(mid);
#else
mid++;
#endif
}
midlen = s - mid + 2;
#ifdef USE_UTF_HACK
vis_midlen = visual_length_between(s, mid) + 2;
#endif
/* If first name is an initial and middle isn't and it all fits
** without the first initial, drop it. */
if (len > max && mid != s) {
if (len - namelen <= max
if (vis_len > max && mid != s) {
if (vis_len - vis_namelen <= max
&& ((mid[1] != '.' && (!name[1] || (name[1] == '.' && !name[2])))
|| (*mid == '"' && *s == '"'))) {
len -= namelen;
name = mid;
namelen = midlen;
#ifdef USE_UTF_HACK
vis_len = vis_namelen;
vis_namelen = vis_midlen;
#endif
mid = 0;
}
else if (*mid == '"' && *s == '"') {
if (midlen > max) {
if (vis_midlen > max) {
name = mid+1;
*s = '\0';
goto try_again;
Expand All @@ -185,53 +244,107 @@ int max;
last = mid;
namelen = 0;
mid = 0;
#ifdef USE_UTF_HACK
vis_len = vis_midlen;
vis_namelen = 0;
#endif
}
}
}
s[1] = '\0';
if (mid && len > max) {
if (mid && vis_len > max) {
/* Turn middle names into intials */
len -= s - mid + 2;
#ifdef USE_UTF_HACK
vis_len -= visual_length_between(s, mid) + 2;
#endif
d = s = mid;
while (*s) {
#ifdef USE_UTF_HACK
int w;
int v;
#endif
if (isalpha(*s)) {
if (d != mid)
if (d != mid) {
#ifdef USE_UTF_HACK
int w = byte_length_at(s);
memset(d, ' ', w);
d += w;
#else
*d++ = ' ';
#endif
}
#ifdef USE_UTF_HACK
w = byte_length_at(s);
bcopy(s, d, w);
d += w;
s += w;
#else
*d++ = *s++;
#endif
}
while (*s && !isspace(*s)) {
#ifdef USE_UTF_HACK
s += byte_length_at(s);
#else
s++;
#endif
}
while (*s && !isspace(*s)) s++;
while (isspace(*s)) s++;
}
if (d != mid) {
*d = '\0';
midlen = d - mid + 1;
len += midlen;
#ifdef USE_UTF_HACK
vis_midlen = visual_length_between(d, mid) + 1;
vis_len += vis_midlen;
#endif
} else
mid = 0;
}
if (len > max) {
if (vis_len > max) {
/* If the first name fits without the middle initials, drop them */
if (mid && len - midlen <= max) {
if (mid && vis_len - vis_midlen <= max) {
len -= midlen;
#ifdef USE_UTF_HACK
vis_len -= vis_midlen;
#endif
mid = 0;
} else if (namelen > 0) {
/* Turn the first name into an initial */
#ifdef USE_UTF_HACK
int w = byte_length_at(name);
len -= namelen - (w + 1);
name[w] = '\0';
namelen = w + 1;
vis_namelen = visual_width_at(name) + 1;
#else
len -= namelen - 2;
name[1] = '\0';
namelen = 2;
if (len > max) {
#endif
if (vis_len > max) {
/* Dump the middle initials (if present) */
if (mid) {
len -= midlen;
#ifdef USE_UTF_HACK
vis_len -= vis_midlen;
#endif
mid = 0;
}
if (len > max) {
if (vis_len > max) {
/* Finally just truncate the last name */
/*FIXME*/
last[max - 2] = '\0';
}
}
} else
} else {
namelen = 0;
#ifdef USE_UTF_HACK
vis_namelen = 0;
#endif
}
}

/* Paste the names back together */
Expand All @@ -244,9 +357,29 @@ int max;
d += midlen;
d[-1] = ' ';
}
#ifdef USE_UTF_HACK
/* FIXME - need to move into some function */
do {
int i;
int j;
for (i = j = 0; j < max; ) {
int w = byte_length_at(last + i);
int v = visual_width_at(last + i);
if (j + v > max) break;
bcopy(last, d, w);
i += w;
j += v;
}
d[i] = '\0';
} while (0);
#else
safecpy(d, last, max); /* "max - (d-name)" would be overkill */
#endif
return name;
}
#undef vis_len;
#undef vis_namelen
#undef vis_midlen

/* Compress an email address, trying to keep as much of the local part of
** the addresses as possible. The order of precence is @ ! %, but
Expand Down
4 changes: 4 additions & 0 deletions tests/test_utf.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ static char *test_byte_length_at__ascii () {

static char *test_byte_length_at__iso8859_1 () {
mu_assert("error, byte_length_at(\"á\") != 2", byte_length_at("á") == 2);
mu_assert("error, byte_length_at(\"î\") != 2", byte_length_at("î") == 2);
return 0;
}

Expand Down Expand Up @@ -213,6 +214,9 @@ static char *test_visual_length_of__ascii () {

static char *test_visual_length_of__iso_8859_1 () {
mu_assert("error, visual_length_of(\"liberté\") != 7", visual_length_of("liberté") == 7);
mu_assert("error, visual_length_of(\"bébé\") != 4", visual_length_of("bébé") == 4);
mu_assert("error, visual_length_of(\"be\314\201be\314\201\") /* combining acute */ != 4", visual_length_of("be\314\201be\314\201") == 4);
mu_assert("error, visual_length_of(\"\314\201\") /* combining acute */ != 0", visual_length_of("\314\201") == 0);
return 0;
}

Expand Down
35 changes: 28 additions & 7 deletions utf.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,18 +186,17 @@ const char *s;
if (!it) {
;
} else if (IS_UTF8(gs.in)) {
size_t n = strlen(s);
if (n > 0 && (*s & 0x80) == 0) {
if ((*s & 0x80) == 0) {
;
} else if (n > 1 && (*s & 0xE0) == 0xC0 && OK(s + 1)) {
} else if ((*s & 0xE0) == 0xC0 && OK(s + 1)) {
it = 2;
} else if (n > 2 && (*s & 0xF0) == 0xE0 && OK(s + 1) && OK(s + 2)) {
} else if ((*s & 0xF0) == 0xE0 && OK(s + 1) && OK(s + 2)) {
it = 3;
} else if (n > 3 && (*s & 0xF8) == 0xF0 && OK(s + 1) && OK(s + 2) && OK(s + 3)) {
} else if ((*s & 0xF8) == 0xF0 && OK(s + 1) && OK(s + 2) && OK(s + 3)) {
it = 4;
} else if (n > 4 && (*s & 0xFC) == 0xF8 && OK(s + 1) && OK(s + 2) && OK(s + 3) && OK(s + 4)) {
} else if ((*s & 0xFC) == 0xF8 && OK(s + 1) && OK(s + 2) && OK(s + 3) && OK(s + 4)) {
it = 5;
} else if (n > 5 && (*s & 0xFE) == 0xFC && OK(s + 1) && OK(s + 2) && OK(s + 3) && OK(s + 4) && OK(s + 5)) {
} else if ((*s & 0xFE) == 0xFC && OK(s + 1) && OK(s + 2) && OK(s + 3) && OK(s + 4) && OK(s + 5)) {
it = 6;
} else {
/* FIXME - invalid UTF-8 */
Expand Down Expand Up @@ -258,6 +257,28 @@ const char *s;
return it;
}

int
visual_length_between(s1, s2)
const char *s1;
const char *s2;
{
int it = 0;
if (s1 && s2) {
if (s1 > s2) {
const char *t = s1;
s1 = s2;
s2 = t;
}
for (; *s1 && s1 < s2; ) {
int w = byte_length_at(s1);
int v = visual_width_at(s1);
it += v;
s1 += w;
}
}
return it;
}

CODE_POINT
code_point_at(s)
const char *s;
Expand Down
1 change: 1 addition & 0 deletions utf.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ bool at_norm_char(const char *);
int byte_length_at(const char *);
int visual_width_at(const char *);
int visual_length_of(const char *);
int visual_length_between(const char *, const char *);
int insert_unicode_at(char *, CODE_POINT);

#define INVALID_CODE_POINT ((CODE_POINT) ~0L)
Expand Down

0 comments on commit 447ecd0

Please sign in to comment.