Skip to content

Commit

Permalink
Teach spawn_executable about Unicode
Browse files Browse the repository at this point in the history
Also corrected compressed files on Windows
  • Loading branch information
bufflig committed Nov 30, 2010
1 parent 9622ab2 commit fed20c7
Show file tree
Hide file tree
Showing 8 changed files with 424 additions and 148 deletions.
91 changes: 39 additions & 52 deletions erts/emulator/beam/erl_bif_port.c
Expand Up @@ -610,6 +610,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump)
int binary_io;
int soft_eof;
Sint linebuf;
Eterm edir = NIL;
byte dir[MAXPATHLEN];

/* These are the defaults */
Expand Down Expand Up @@ -686,19 +687,10 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump)

} else if (option == am_arg0) {
char *a0;
int n;
if (is_nil(*tp)) {
n = 0;
} else if( (n = is_string(*tp)) == 0) {

if ((a0 = erts_convert_filename_to_native(*tp, ERTS_ALC_T_TMP, 1)) == NULL) {
goto badarg;
}
a0 = (char *) erts_alloc(ERTS_ALC_T_TMP,
(n + 1) * sizeof(byte));
if (intlist_to_buf(*tp, a0, n) != n) {
erl_exit(1, "%s:%d: Internal error\n",
__FILE__, __LINE__);
}
a0[n] = '\0';
if (opts.argv == NULL) {
opts.argv = erts_alloc(ERTS_ALC_T_TMP,
2 * sizeof(char **));
Expand All @@ -711,22 +703,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump)
opts.argv[0] = a0;
}
} else if (option == am_cd) {
Eterm iolist;
DeclareTmpHeap(heap,4,p);
int r;

UseTmpHeap(4,p);
heap[0] = *tp;
heap[1] = make_list(heap+2);
heap[2] = make_small(0);
heap[3] = NIL;
iolist = make_list(heap);
r = io_list_to_buf(iolist, (char*) dir, MAXPATHLEN);
UnUseTmpHeap(4,p);
if (r < 0) {
goto badarg;
}
opts.wd = (char *) dir;
edir = *tp;
} else {
goto badarg;
}
Expand Down Expand Up @@ -838,19 +815,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump)
goto badarg;
}
name = tp[1];
if (is_atom(name)) {
name_buf = (char *) erts_alloc(ERTS_ALC_T_TMP,
atom_tab(atom_val(name))->len+1);
sys_memcpy((void *) name_buf,
(void *) atom_tab(atom_val(name))->name,
atom_tab(atom_val(name))->len);
name_buf[atom_tab(atom_val(name))->len] = '\0';
} else if ((i = is_string(name))) {
name_buf = (char *) erts_alloc(ERTS_ALC_T_TMP, i + 1);
if (intlist_to_buf(name, name_buf, i) != i)
erl_exit(1, "%s:%d: Internal error\n", __FILE__, __LINE__);
name_buf[i] = '\0';
} else {
if ((name_buf = erts_convert_filename_to_native(name,ERTS_ALC_T_TMP,0)) == NULL) {
goto badarg;
}
opts.spawn_type = ERTS_SPAWN_EXECUTABLE;
Expand Down Expand Up @@ -892,7 +857,33 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump)
/* Argument vector only if explicit spawn_executable */
goto badarg;
}


if (edir != NIL) {
/* A working directory is expressed differently if spawn_executable, i.e. Unicode is handles
for spawn_executable... */
if (opts.spawn_type != ERTS_SPAWN_EXECUTABLE) {
Eterm iolist;
DeclareTmpHeap(heap,4,p);
int r;

UseTmpHeap(4,p);
heap[0] = edir;
heap[1] = make_list(heap+2);
heap[2] = make_small(0);
heap[3] = NIL;
iolist = make_list(heap);
r = io_list_to_buf(iolist, (char*) dir, MAXPATHLEN);
UnUseTmpHeap(4,p);
if (r < 0) {
goto badarg;
}
opts.wd = (char *) dir;
} else {
if ((opts.wd = erts_convert_filename_to_native(edir,ERTS_ALC_T_TMP,0)) == NULL) {
goto badarg;
}
}
}

if (driver != &spawn_driver && opts.exit_status) {
goto badarg;
Expand Down Expand Up @@ -941,6 +932,9 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump)
if (opts.argv) {
free_args(opts.argv);
}
if (opts.wd && opts.wd != ((char *)dir)) {
erts_free(ERTS_ALC_T_TMP, (void *) opts.wd);
}
return port_num;

badarg:
Expand All @@ -950,6 +944,7 @@ open_port(Process* p, Eterm name, Eterm settings, int *err_nump)
#undef OPEN_PORT_ERROR
}

/* Arguments can be given i unicode and as raw binaries, convert filename is used to convert */
static char **convert_args(Eterm l)
{
char **pp;
Expand All @@ -966,22 +961,14 @@ static char **convert_args(Eterm l)
pp[i++] = erts_default_arg0;
while (is_list(l)) {
str = CAR(list_val(l));

if (is_nil(str)) {
n = 0;
} else if( (n = is_string(str)) == 0) {
/* Not a string... */
if ((b = erts_convert_filename_to_native(str,ERTS_ALC_T_TMP,1)) == NULL) {
int j;
for (j = 1; j < i; ++j)
erts_free(ERTS_ALC_T_TMP, pp[j]);
erts_free(ERTS_ALC_T_TMP, pp);
return NULL;
}
b = (char *) erts_alloc(ERTS_ALC_T_TMP, (n + 1) * sizeof(byte));
pp[i++] = (char *) b;
if (intlist_to_buf(str, b, n) != n)
erl_exit(1, "%s:%d: Internal error\n", __FILE__, __LINE__);
b[n] = '\0';
}
pp[i++] = b;
l = CDR(list_val(l));
}
pp[i] = NULL;
Expand Down
73 changes: 73 additions & 0 deletions erts/emulator/beam/erl_unicode.c
Expand Up @@ -2023,6 +2023,79 @@ BIF_RETTYPE binary_to_existing_atom_2(BIF_ALIST_2)
* Simpler non-interruptable routines for UTF-8 and
* Windowish UTF-16 (restricted)
**********************************************************/
/*
* This function is the heart of the Unicode support for
* open_port - spawn_executable. It converts both the name
* of the executable and the arguments according to the same rules
* as for filename conversion. That means as if your arguments are
* to be raw, you supply binaries, else unicode characters are allowed up to
* the encoding maximum (256 of the unicode max).
* Depending on the filename encoding standard, the vector is then
* converted to whatever is used, which might mean win_utf16 if on windows.
* Do not peek into the argument vector or filenam with ordinary
* string routines, that will certainly fail on some OS.
*/

char *erts_convert_filename_to_native(Eterm name, ErtsAlcType_t alloc_type, int allow_empty)
{
int encoding = erts_get_native_filename_encoding();
char* name_buf = NULL;

if (is_atom(name) || is_list(name) || (allow_empty && is_nil(name))) {
Sint need;
if ((need = erts_native_filename_need(name,encoding)) < 0) {
return NULL;
}
if (encoding == ERL_FILENAME_WIN_WCHAR) {
need += 2;
} else {
++need;
}
name_buf = (char *) erts_alloc(alloc_type, need);
erts_native_filename_put(name,encoding,(byte *)name_buf);
name_buf[need-1] = 0;
if (encoding == ERL_FILENAME_WIN_WCHAR) {
name_buf[need-2] = 0;
}
} else if (is_binary(name)) {
byte *temp_alloc = NULL;
byte *bytes;
byte *err_pos;
Uint size,num_chars;

size = binary_size(name);
bytes = erts_get_aligned_binary_bytes(name, &temp_alloc);
if (encoding != ERL_FILENAME_WIN_WCHAR) {
/*Add 0 termination only*/
name_buf = (char *) erts_alloc(alloc_type, size+1);
memcpy(name_buf,bytes,size);
name_buf[size]=0;
} else if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK ||
erts_get_user_requested_filename_encoding() == ERL_FILENAME_LATIN1) {
byte *p;
/* What to do now? Maybe latin1, so just take byte for byte instead */
name_buf = (char *) erts_alloc(alloc_type, (size+1)*2);
p = (byte *) name_buf;
while (size--) {
*p++ = *bytes++;
*p++ = 0;
}
*p++ = 0;
*p++ = 0;
} else { /* WIN_WCHAR and valid UTF8 */
name_buf = (char *) erts_alloc(alloc_type, (num_chars+1)*2);
erts_copy_utf8_to_utf16_little((byte *) name_buf, bytes, num_chars);
name_buf[num_chars*2] = 0;
name_buf[num_chars*2+1] = 0;
}
erts_free_aligned_binary_bytes(temp_alloc);
} else {
return NULL;
}
return name_buf;
}


Sint erts_native_filename_need(Eterm ioterm, int encoding)
{
Eterm *objp;
Expand Down
2 changes: 2 additions & 0 deletions erts/emulator/beam/global.h
Expand Up @@ -1602,6 +1602,8 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding);
void erts_copy_utf8_to_utf16_little(byte *target, byte *bytes, int num_chars);
int erts_analyze_utf8(byte *source, Uint size,
byte **err_pos, Uint *num_chars, int *left);
char *erts_convert_filename_to_native(Eterm name, ErtsAlcType_t alloc_type, int allow_empty);

#define ERTS_UTF8_OK 0
#define ERTS_UTF8_INCOMPLETE 1
#define ERTS_UTF8_ERROR 2
Expand Down
4 changes: 2 additions & 2 deletions erts/emulator/drivers/common/efile_drv.c
Expand Up @@ -1401,7 +1401,7 @@ static void invoke_readlink(void *data)
d->result_ok = efile_readlink(&d->errInfo, d->b, resbuf+1,
RESBUFSIZE-1);
if (d->result_ok != 0)
strcpy((char *) d->b + 1, resbuf+1);
FILENAME_COPY((char *) d->b + 1, resbuf+1);
}

static void invoke_altname(void *data)
Expand All @@ -1413,7 +1413,7 @@ static void invoke_altname(void *data)
d->result_ok = efile_altname(&d->errInfo, d->b, resbuf+1,
RESBUFSIZE-1);
if (d->result_ok != 0)
strcpy((char *) d->b + 1, resbuf+1);
FILENAME_COPY((char *) d->b + 1, resbuf+1);
}

static void invoke_pwritev(void *data) {
Expand Down
56 changes: 53 additions & 3 deletions erts/emulator/drivers/common/gzio.c
Expand Up @@ -28,6 +28,7 @@

#ifdef __WIN32__
#define HAVE_CONFLICTING_FREAD_DECLARATION
#define FILENAMES_16BIT 1
#endif

#ifdef STDC
Expand Down Expand Up @@ -102,6 +103,40 @@ local uLong getLong OF((gz_stream *s));
# define ERTS_GZREAD(File, Buf, Count) fread((Buf), 1, (Count), (File))
#endif

/*
* Ripped from efile_drv.c
*/

#ifdef FILENAMES_16BIT
# define FILENAME_BYTELEN(Str) filename_len_16bit(Str)
# define FILENAME_COPY(To,From) filename_cpy_16bit((To),(From))
# define FILENAME_CHARSIZE 2

static int filename_len_16bit(const char *str)
{
const char *p = str;
while(*p != '\0' || p[1] != '\0') {
p += 2;
}
return (p - str);
}

static void filename_cpy_16bit(char *to, const char *from)
{
while(*from != '\0' || from[1] != '\0') {
*to++ = *from++;
*to++ = *from++;
}
*to++ = *from++;
*to++ = *from++;
}

#else
# define FILENAME_BYTELEN(Str) strlen(Str)
# define FILENAME_COPY(To,From) strcpy(To,From)
# define FILENAME_CHARSIZE 1
#endif

/* ===========================================================================
Opens a gzip (.gz) file for reading or writing. The mode parameter
is as in fopen ("rb" or "wb"). The file is given either by file descriptor
Expand Down Expand Up @@ -144,11 +179,11 @@ local gzFile gz_open (path, mode)
s->position = 0;
s->destroy = destroy;

s->path = (char*)ALLOC(strlen(path)+1);
s->path = (char*)ALLOC(FILENAME_BYTELEN(path)+FILENAME_CHARSIZE);
if (s->path == NULL) {
return s->destroy(s), (gzFile)Z_NULL;
}
strcpy(s->path, path); /* do this early for debugging */
FILENAME_COPY(s->path, path); /* do this early for debugging */

s->mode = '\0';
do {
Expand Down Expand Up @@ -194,7 +229,22 @@ local gzFile gz_open (path, mode)
s->stream.avail_out = Z_BUFSIZE;

errno = 0;
#ifdef UNIX
#if defined(FILENAMES_16BIT)
{
char wfmode[160];
int i=0,j;
for(j=0;fmode[j] != '\0';++j) {
wfmode[i++]=fmode[j];
wfmode[i++]='\0';
}
wfmode[i++] = '\0';
wfmode[i++] = '\0';
s->file = F_OPEN(path, wfmode);
if (s->file == NULL) {
return s->destroy(s), (gzFile)Z_NULL;
}
}
#elif defined(UNIX)
if (s->mode == 'r') {
s->file = open(path, O_RDONLY);
} else {
Expand Down

0 comments on commit fed20c7

Please sign in to comment.