Permalink
Browse files

Handle UTF-8 in cursor movement & backspace

It's a little bit ugly, because we're storing everything as UTF-8 internally.
 We should probably change it all to UCS-4, and convert to (and from) UTF-8 at
 the last (resp. first) moment.  However, that'd be a big overhaul, and it's
 not clear what we should do when the input is malformed as UTF-8.
The present solution works for now, but it's one of those bits of code you
 *really* hope won't break and need maintenance.
  • Loading branch information...
1 parent 51f866c commit e1f1541938363a15de4dcc91304fe29c5086e4ed @ec429 committed Dec 18, 2012
Showing with 90 additions and 43 deletions.
  1. +0 −1 buffer.c
  2. +1 −0 buffer.h
  3. +78 −39 input.c
  4. +1 −0 input.h
  5. +0 −2 plans
  6. +8 −0 strbuf.c
  7. +2 −1 strbuf.h
View
1 buffer.c
@@ -12,7 +12,6 @@
#include "ctbuf.h"
ctchar *highlight(const char *src, size_t *len); // use colours to highlight \escapes. Returns a malloc-like pointer
-bool isutf8(const char *src, size_t *len); // determine if a string starts with a non-ASCII UTF8 character; if so, give its length (in bytes) in len. If this function returns false, the value of *len is undefined
int init_ring(ring *r)
{
View
1 buffer.h
@@ -118,3 +118,4 @@ void titlebar(void);
int findptab(int b, const char *src);
int makeptab(int b, const char *src);
void timestamp(char stamp[STAMP_LEN], time_t t);
+bool isutf8(const char *src, size_t *len); // determine if a string starts with a non-ASCII UTF8 character; if so, give its length (in bytes) in len. If this function returns false, the value of *len is undefined
View
117 input.c
@@ -9,6 +9,10 @@
#include "input.h"
#include "logging.h"
+size_t i_firstlen(ichar src);
+size_t i_lastlen(ichar src);
+void i_move(iline *inp, ssize_t bytes);
+
int inputchar(iline *inp, int *state)
{
int c=getchar();
@@ -55,13 +59,19 @@ int inputchar(iline *inp, int *state)
if(c!='\t')
ttab=false;
if(mod==KEY_BS) // backspace
- back_ichar(&inp->left);
+ {
+ size_t ll=i_lastlen(inp->left);
+ for(size_t i=0;i<ll;i++)
+ back_ichar(&inp->left);
+ }
else if((mod<0) && (c<32)) // this also stomps on the newline
{
back_ichar(&inp->left);
if(c==8) // C-h ~= backspace
{
- back_ichar(&inp->left);
+ size_t ll=i_lastlen(inp->left);
+ for(size_t i=0;i<ll;i++)
+ back_ichar(&inp->left);
}
if(c==1) // C-a ~= home
{
@@ -253,53 +263,29 @@ int inputchar(iline *inp, int *state)
}
}
else if(mod==KEY_RIGHT)
- {
- if(inp->right.data && *inp->right.data)
- {
- append_char(&inp->left.data, &inp->left.l, &inp->left.i, inp->right.data[0]);
- char *nr=strdup(inp->right.data+1);
- free(inp->right.data);
- inp->right.data=nr;
- inp->right.i--;
- inp->right.l=0;
- }
- }
+ i_move(inp, i_firstlen(inp->right));
else if(mod==KEY_LEFT)
- {
- if(inp->left.i)
- {
- unsigned char e=back_ichar(&inp->left);
- if(e)
- {
- char *nr=(char *)malloc(inp->right.i+2);
- *nr=e;
- if(inp->right.data)
- {
- strcpy(nr+1, inp->right.data);
- free(inp->right.data);
- }
- else
- {
- nr[1]=0;
- }
- inp->right.data=nr;
- inp->right.i++;
- inp->right.l=inp->right.i+1;
- }
- }
- }
+ i_move(inp, -i_lastlen(inp->left));
else if(mod==KEY_HOME)
i_home(inp);
else if(mod==KEY_END)
i_end(inp);
else if(mod==KEY_DELETE)
{
- if(inp->right.data && inp->right.i)
+ size_t fl=i_firstlen(inp->right);
+ if(inp->right.data&&(inp->right.i>fl))
{
- char *nr=strdup(inp->right.data+1);
+ char *nr=strdup(inp->right.data+fl);
free(inp->right.data);
inp->right.data=nr;
- inp->right.l=inp->right.i--;
+ inp->right.i-=fl;
+ inp->right.l=inp->right.i;
+ }
+ else
+ {
+ free(inp->right.data);
+ inp->right.data=NULL;
+ inp->right.l=inp->right.i=0;
}
}
else if(mod==KEY_CPGUP)
@@ -1825,6 +1811,59 @@ char back_ichar(ichar *buf)
return(c);
}
+char front_ichar(ichar *buf)
+{
+ char c=0;
+ if(buf->i)
+ {
+ c=buf->data[0];
+ memmove(buf->data, buf->data+1, buf->i--);
+ }
+ return(c);
+}
+
+size_t i_firstlen(ichar src)
+{
+ if(!src.i) return(0);
+ size_t u;
+ if(isutf8(src.data, &u)) return(u);
+ return(1);
+}
+
+size_t i_lastlen(ichar src)
+{
+ size_t start=max(src.i, 4)-4, prev=start;
+ size_t u;
+ while(start<src.i)
+ {
+ prev=start;
+ if(isutf8(src.data+start, &u)) start+=u;
+ else if(src.data[start]&0x80) start++;
+ else
+ {
+ start++;
+ if(start+1>=src.i) break;
+ }
+ }
+ return(start-prev);
+}
+
+void i_move(iline *inp, ssize_t bytes)
+{
+ bool fw=(bytes>0);
+ size_t b=fw?bytes:-bytes; // can't use abs() because we don't know what length a size_t is (do we need labs()? llabs()?)
+ char c;
+ for(size_t i=0;i<b;i++)
+ if(fw)
+ {
+ if((c=front_ichar(&inp->right)))
+ append_char(&inp->left.data, &inp->left.l, &inp->left.i, c);
+ }
+ else
+ if((c=back_ichar(&inp->left)))
+ prepend_char(&inp->right.data, &inp->right.l, &inp->right.i, c);
+}
+
void ifree(iline *buf)
{
free(buf->left.data);
View
1 input.h
@@ -57,6 +57,7 @@ void initibuf(ibuffer *i);
void addtoibuf(ibuffer *i, char *data);
void freeibuf(ibuffer *i);
char back_ichar(ichar *buf); // returns the deleted char
+char front_ichar(ichar *buf); // returns the deleted char
void ifree(iline *buf);
void i_home(iline *inp);
View
2 plans
@@ -14,8 +14,6 @@ Remember channel keys for /rejoin. An argument to /rejoin overrides (but doesn'
Redesign irc_connect and friends. At present, we have #ifdef ASYNCH_NL crossing function boundaries in an unpleasantly gnarly way.
-Proper handling of Unicode in character-based things like cursor-movement, backspace.
-
Use unicode-charmap (somewhere in system i18n) to find out character widths, for wordline().
/connrest. Force the conn_rest to be called (to deal with eg. worldofspectrum - see above).
View
8 strbuf.c
@@ -7,6 +7,7 @@
*/
#include <stdlib.h>
+#include <string.h> // for memmove()
#include "strbuf.h"
void append_char(char **buf, size_t *l, size_t *i, char c)
@@ -38,6 +39,13 @@ void append_char(char **buf, size_t *l, size_t *i, char c)
}
}
+void prepend_char(char **buf, size_t *l, size_t *i, char c)
+{
+ append_char(buf, l, i, 0);
+ memmove((*buf)+1, *buf, *i);
+ (*buf)[0]=c;
+}
+
void append_str(char **buf, size_t *l, size_t *i, const char *str)
{
while(str && *str) // not the most tremendously efficient implementation, but conceptually simple at least
View
3 strbuf.h
@@ -13,5 +13,6 @@
char *fgetl(FILE *); // gets a line of string data; returns a malloc-like pointer
char *slurp(FILE *); // gets an entire file of string data; returns a malloc-like pointer
void init_char(char **buf, size_t *l, size_t *i); // initialises a string buffer in heap. *buf becomes a malloc-like pointer
-void append_char(char **buf, size_t *l, size_t *i, char c); // adds a character to a string buffer in heap (and realloc()s if needed)
+void append_char(char **buf, size_t *l, size_t *i, char c); // adds a character to the end of a string buffer in heap (and realloc()s if needed)
+void prepend_char(char **buf, size_t *l, size_t *i, char c); // adds a character to the start of a string buffer in heap (and realloc()s if needed)
void append_str(char **buf, size_t *l, size_t *i, const char *str); // adds a string to a string buffer in heap (and realloc()s if needed)

0 comments on commit e1f1541

Please sign in to comment.