diff --git a/AUTHORS b/AUTHORS index 93d547b2d0..de27076bf6 100644 --- a/AUTHORS +++ b/AUTHORS @@ -26,7 +26,7 @@ dircolors: H. Peter Anvin dirname: David MacKenzie, Jim Meyering du: Torbjorn Granlund, David MacKenzie, Paul Eggert, Jim Meyering echo: Brian Fox, Chet Ramey -env: Richard Mlynarik, David MacKenzie +env: Richard Mlynarik, David MacKenzie, Assaf Gordon expand: David MacKenzie expr: Mike Parker, James Youngman, Paul Eggert factor: Paul Rubin, Torbjörn Granlund, Niels Möller diff --git a/NEWS b/NEWS index 64671d0d0f..9de2fa05df 100644 --- a/NEWS +++ b/NEWS @@ -53,6 +53,10 @@ GNU coreutils NEWS -*- outline -*- env supports a new -v/--debug option to show verbose information about each processing step. + env supports a new -S/--split-string=S option to split a single argument + string into multiple arguments. Used to pass multiple arguments in scripts + (shebang lines). + md5sum accepts a new option: --zero (-z) to delimit the output lines with a NUL instead of a newline character. This also disables file name escaping. This also applies to sha*sum and b2sum. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 0e23b1b296..3821b00c3d 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -16901,9 +16901,18 @@ the exit status of @var{command} otherwise @example env [@var{option}]@dots{} [@var{name}=@var{value}]@dots{} @c [@var{command} [@var{args}]@dots{}] +env -[v]S'[@var{option}]@dots{} [@var{name}=@var{value}]@dots{} @c +[@var{command} [@var{args}]@dots{}]' env @end example +@command{env} is commonly used on first line of scripts (shebang line): +@example +#!/usr/bin/env @var{command} +#!/usr/bin/env -[v]S[@var{option}]@dots{} [@var{name}=@var{value}]@dots{} @c +@var{command} [@var{args}]@dots{} +@end example + Operands of the form @samp{@var{variable}=@var{value}} set the environment variable @var{variable} to value @var{value}. @var{value} may be empty (@samp{@var{variable}=}). Setting a variable @@ -17002,6 +17011,8 @@ env -u EDITOR PATH=/energy -- e=mc2 bar baz @end itemize +@subsection General options + The program accepts the following options. Also see @ref{Common options}. Options must precede operands. @@ -17059,6 +17070,21 @@ executing: uname Linux @end example +When combined with @option{-S} it is recommended to list @option{-v} +first, e.g. @command{env -vS'string'}. + +@item -S @var{string} +@itemx --split-string=@var{string} +@opindex -S +@opindex --split-string +@cindex shebang arguments +@cindex scripts arguments +@cindex env in scripts +process and split @var{string} into separate arguments used to pass +multiple arguments on shebang lines. @command{env} supports FreeBSD's +syntax of several escape sequences and environment variable +expansions. See below for details and examples. + @end table @cindex exit status of @command{env} @@ -17072,6 +17098,352 @@ Exit status: the exit status of @var{command} otherwise @end display +@subsection @option{-S}/@option{--split-string} usage in scripts + +The @option{-S}/@option{--split-string} options enable using multiple +arguments on the first one of scripts (shebang line, @samp{#!}). + +When a script's interpreter is in a known location, scripts typically +contain the absolute file name in their first line: + +@multitable {Python Script:} {#!/usr/bin/python3} +@item Shell script: +@tab +@example +#!/bin/sh +echo hello +@end example + +@item Perl script: +@tab +@example +#!/usr/bin/perl +print "hello\n"; +@end example + +@item Python script: +@tab +@example +#!/usr/bin/python3 +print("hello") +@end example + +@end multitable + +When a script's interpreter is in a non-standard location +in the @env{PATH} environment variable, it is recommended +to use @command{env} on the first line of the script to +find the executable and run it: + +@multitable {Python Script:} {#!/usr/bin/env python3} +@item Shell script: +@tab +@example +#!/usr/bin/env bash +echo hello +@end example + +@item Perl script: +@tab +@example +#!/usr/bin/env perl +print "hello\n"; +@end example + +@item Python script: +@tab +@example +#!/usr/bin/env python3 +print("hello") +@end example + +@end multitable + +Most operating systems (e.g. GNU/Linux, BSDs) treat all text after the +first space as a single argument. When using @command{env} in a script +it is thus not possible to specify multiple arguments. + +In the following example: +@example +#!/usr/bin/env perl -T -w +print "hello\n"; +@end example + +The operating system treats @samp{perl -T -w} as one argument (the +program's name), and executing the script fails with: + +@example +/usr/bin/env: 'perl -T -w': No such file or directory +@end example + +The @option{-S} option instructs @command{env} to split the single string +into multiple arguments. The following example works as expected: + +@example +$ cat hello.pl +#!/usr/bin/env -S perl -T -w +print "hello\n"; + +$ chmod a+x hello.pl +$ ./hello.pl +hello +@end example + +And is equivalent to running @command{perl -T -w hello.pl} on the command line +prompt. + +@unnumberedsubsubsec Testing and troubleshooting + +@cindex single quotes, and @command{env -S} +@cindex @command{env -S}, and single quotes +@cindex @option{-S}, env and single quotes +To test @command{env -S} on the command line, use single quotes for the +@option{-S} string to emulate a single paramter. Single quotes are not +needed when using @command{env -S} in a shebang line on the first line of a +script (the operating system already treats it as one argument). + +The following command is equivalent to the @file{hello.pl} script above: + +@example +$ env -S'perl -T -w' hello.pl +@end example + +@cindex @command{env -S}, debugging +@cindex debugging, @command{env -S} + +To troubleshoot @option{-S} usage add the @option{-v} as the first +argument (before @option{-S}). + +Using @option{-vS} on a shebang line in a script: + +@example +$ cat hello-debug.pl +#!/usr/bin/env -vS perl -T -w +print "hello\n"; + +$ chmod a+x hello-debug.pl +$ ./hello-debug.pl +split -S: 'perl -T -w' + into: 'perl' + & '-T' + & '-w' +executing: perl + arg[0]= 'perl' + arg[1]= '-T' + arg[2]= '-w' + arg[3]= './hello-debug.pl' +hello +@end example + +Using @option{-vS} on the command line prompt (adding single quotes): + +@example +$ env -vS'perl -T -w' hello-debug.pl +split -S: 'perl -T -w' + into: 'perl' + & '-T' + & '-w' +executing: perl + arg[0]= 'perl' + arg[1]= '-T' + arg[2]= '-w' + arg[3]= 'hello-debug.pl' +hello +@end example + +@subsection @option{-S}/@option{--split-string} syntax + +@unnumberedsubsubsec Splitting arguments by whitespace + +Running @command{env -Sstring} splits the @var{string} into +arguments based on unquoted spaces or tab characters. + +In the following contrived example the @command{awk} variable +@samp{OFS} will be @code{xyz} as these spaces are inside +double quotes. The other space characters are used as argument separators: + +@example +$ cat one.awk +#!/usr/bin/env -S awk -v OFS=" xyz " -f +BEGIN @{print 1,2,3@} + +$ chmod a+x one.awk +$ ./one.awk +1 xyz 2 xyz 3 +@end example + +When using @option{-S} on the command line prompt, remember to add +single quotes around the entire string: + +@example +$ env -S'awk -v OFS=" xyz " -f' one.awk +1 xyz 2 xyz 3 +@end example + +@unnumberedsubsubsec Escape sequences + +@command{env} supports several escape sequences. These sequences +are processed when unquoted or inside double quotes (unless otherwise noted). +Single quotes disable escape sequences except @samp{\'} and @samp{\\}. + +@multitable @columnfractions .10 .90 + +@item @code{\c} +@tab Ignore the remaining characters in the string. +Cannot be used inside double quotes. + +@item @code{\f} +@tab form-feed character (ASCII 0x0C) + +@item @code{\n} +@tab new-line character (ASCII 0x0A) + +@item @code{\r} +@tab carriage-return character (ASCII 0x0D) + +@item @code{\t} +@tab tab character (ASCII 0x09) + +@item @code{\v} +@tab vertical tab character (ASCII 0x0B) + +@item @code{\#} +@tab A hash @samp{#} character. Used when a @samp{#} character +is needed as the first character of an argument (see 'comments' section +below). + +@item @code{\$} +@tab A dollar-sign character @samp{$}. Unescaped @samp{$} characters +are used to expand environment variables (see 'variables' section below). + +@item @code{\_} +@tab Inside double-quotes, replaced with a single space character. +Outside quotes, treated as an argument separator. @samp{\_} can be used +to avoid space characters in a shebang line (see examples below). + +@item @code{\"} +@tab A double-quote character. + +@item @code{\'} +@tab A single-quote character. +This escape sequence works inside single-quoted strings. + +@item @code{\\} +@tab A backslash character. +This escape sequence works inside single-quoted strings. + +@end multitable + +The following @command{awk} script will use tab character as input and output +field seperator (instead of spaces and tabs): + +@example +$ cat tabs.awk +#!/usr/bin/env -S awk -v FS="\t" -v OFS="\t" -f +... +@end example + +@unnumberedsubsubsec Comments + +The escape sequence @samp{\c} (used outside single/double quotes) +causes @command{env} to ignore the rest of the string. + +The @samp{#} character causes @command{env} to ignore the rest of +the string when it appears as the first character of an argument. +Use @samp{\#} to reverse this behavior. + +@example +$ env -S'printf %s\n A B C' +A +B +C + +$ env -S'printf %s\n A# B C' +A# +B +C + +$ env -S'printf %s\n A #B C' +A + +$ env -S'printf %s\n A \#B C' +A +#B +C + +$ env -S'printf %s\n A\cB C' +A +@end example + +NOTE: The above examples use single quotes as they are executed +on the command-line. + + + +@unnumberedsubsubsec Environment variable expansion + +The pattern @samp{$@{VARNAME@}} is used to substitute a value from +the environment variable. The pattern must include the curly braces +(@samp{@{},@samp{@}}). Without them @command{env} will reject the string. +Special shell variables (such as @samp{$@@}, @samp{$*}, @samp{$$}) are +not supported. + +If the environment variable is empty or not set, the pattern will be replaced +by an empty string. The value of @samp{$@{VARNAME@}} will be that of +the executed @command{env}, before any modifications using +@option{-i}/@option{--ignore-environment}/@option{-u}/@option{--unset} or +setting new values using @samp{VAR=VALUE}. + +The following python script prepends @file{/opt/custom/modules} to the python +module search path environment variable (@samp{PYTHONPATH}): + +@example +$ cat custom.py +#!/usr/bin/env -S PYTHONPATH=/opt/custom/modules/:$@{PYTHONPATH@} python +print "hello" +... +@end example + +The expansion of @samp{$@{PYTHONPATH@}} is performed by @command{env}, +not by a shell. If the curly braces are omitted, @command{env} will fail: + +@example +$ cat custom.py +#!/usr/bin/env -S PYTHONPATH=/opt/custom/modules/:$PYTHONPATH python +print "hello" +... + +$ chmod a+x custom.py +$ custom.py +/usr/bin/env: only $@{VARNAME@} expansion is supported, error at: $PYTHONPATH @c +python +@end example + +Environment variable expansion happens before clearing the environment +(with @option{-i}) or unsetting specific variables (with @option{-u}): + +@example +$ env -S'-i OLDUSER=$@{USER@} env' +OLDUSER=gordon +@end example + +Use @option{-v} to diagnose the operations step-by-step: + +@example +$ env -vS'-i OLDUSER=$@{USER@} env' +expanding $@{USER@} into 'gordon' +split -S: '-i OLDUSER=$@{USER@} env' + into: '-i' + & 'OLDUSER=gordon' + & 'env' +cleaning environ +setenv: OLDUSER=gordon +executing: env + arg[0]= 'env' +OLDUSER=gordon +@end example + + @node nice invocation @section @command{nice}: Run a command with modified niceness diff --git a/man/env.x b/man/env.x index 914fb9cdc4..fb6665b818 100644 --- a/man/env.x +++ b/man/env.x @@ -1,4 +1,39 @@ +'\" Copyright (C) 1998-2018 Free Software Foundation, Inc. +'\" +'\" This is free software. You may redistribute copies of it under the terms +'\" of the GNU General Public License . +'\" There is NO WARRANTY, to the extent permitted by law. [NAME] env \- run a program in a modified environment [DESCRIPTION] .\" Add any additional description here +[OPTIONS] +.SS "\-S/\-\-split\-string usage in scripts" +The +.B \-S +option allows specifing multiple parameters in a script. +Running a script named +.B 1.pl +containing the following first line: +.PP +.RS +.nf +#!/usr/bin/env \-S perl \-w \-T +... +.fi +.RE +.PP +Will execute +.B "perl \-w \-T 1.pl". +.PP +Without the +.B '\-S' +parameter the script will likely fail with: +.PP +.RS +.nf +/usr/bin/env: 'perl \-w \-T': No such file or directory +.fi +.RE +.PP +See the full documentation for more details. diff --git a/src/env.c b/src/env.c index a721573d71..97b6d6bfdd 100644 --- a/src/env.c +++ b/src/env.c @@ -20,7 +20,9 @@ #include #include #include +#include +#include #include "system.h" #include "die.h" #include "error.h" @@ -31,16 +33,23 @@ #define AUTHORS \ proper_name ("Richard Mlynarik"), \ - proper_name ("David MacKenzie") + proper_name ("David MacKenzie"), \ + proper_name ("Assaf Gordon") /* array of envvars to unset. */ static const char** usvars; -size_t usvars_alloc; -size_t usvars_used; +static size_t usvars_alloc; +static size_t usvars_used; /* Annotate the output with extra info to aid the user. */ static bool dev_debug; +/* buffer and length of extracted envvars in -S strings. */ +static char *varname; +static size_t vnlen; + +static char const shortopts[] = "+C:iS:u:v0 \t-"; + static struct option const longopts[] = { {"ignore-environment", no_argument, NULL, 'i'}, @@ -48,6 +57,7 @@ static struct option const longopts[] = {"unset", required_argument, NULL, 'u'}, {"chdir", required_argument, NULL, 'C'}, {"debug", no_argument, NULL, 'v'}, + {"split-string", required_argument, NULL, 'S'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -78,6 +88,8 @@ Set each NAME to VALUE in the environment and run COMMAND.\n\ -C, --chdir=DIR change working directory to DIR\n\ "), stdout); fputs (_("\ + -S, --split-string=S process and split S into separate arguments;\n\ + used to pass multiple arguments on shebang lines\n\ -v, --debug print verbose information for each processing step\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); @@ -117,6 +129,402 @@ unset_envvars (void) IF_LINT (usvars_alloc = 0); } +static bool _GL_ATTRIBUTE_PURE +valid_escape_sequence (const char c) +{ + return (c == 'c' || c == 'f' || c == 'n' || c == 'r' || c == 't' || c == 'v' \ + || c == '#' || c == '$' || c == '_' || c == '"' || c == '\'' \ + || c == '\\'); +} + +static char _GL_ATTRIBUTE_PURE +escape_char (const char c) +{ + switch (c) + { + /* \a,\b not supported by FreeBSD's env. */ + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + default: assert (0); /* LCOV_EXCL_LINE */ + } +} + +/* Return a pointer to the end of a valid ${VARNAME} string, or NULL. + 'str' should point to the '$' character. + First letter in VARNAME must be alpha or underscore, + rest of letters are alnum or underscore. Any other character is an error. */ +static const char* _GL_ATTRIBUTE_PURE +scan_varname (const char* str) +{ + assert (str && *str == '$'); /* LCOV_EXCL_LINE */ + if ( *(str+1) == '{' && (c_isalpha (*(str+2)) || *(str+2) == '_')) + { + const char* end = str+3; + while (c_isalnum (*end) || *end == '_') + ++end; + if (*end == '}') + return end; + } + + return NULL; +} + +/* Return a pointer to a static buffer containing the VARNAME as + extracted from a '${VARNAME}' string. + The returned string will be NUL terminated. + The returned pointer should not be freed. + Return NULL if not a valid ${VARNAME} syntax. */ +static char* +extract_varname (const char* str) +{ + ptrdiff_t i; + const char* p; + + p = scan_varname (str); + if (!p) + return NULL; + + /* -2 and +2 (below) account for the '${' prefix. */ + i = p - str - 2; + + if (i >= vnlen) + { + vnlen = i + 1; + varname = xrealloc (varname, vnlen); + } + + memcpy (varname, str+2, i); + varname[i]=0; + + return varname; +} + +/* Validate the "-S" parameter, according to the syntax defined by FreeBSD's + env(1). Terminate with an error message if not valid. + + Calculate and set two values: + bufsize - the size (in bytes) required to hold the resulting string + after ENVVAR expansion (the value is overestimated). + maxargc - the maximum number of arguments (the size of the new argv). */ +static void +validate_split_str (const char* str, size_t* /*out*/ bufsize, + int* /*out*/ maxargc) +{ + bool dq, sq, sp; + const char *pch; + size_t buflen; + int cnt = 1; + + assert (str && str[0] && !isspace (str[0])); /* LCOV_EXCL_LINE */ + + dq = sq = sp = false; + buflen = strlen (str)+1; + + while (*str) + { + const char next = *(str+1); + + if (isspace (*str) && !dq && !sq) + { + sp = true; + } + else + { + if (sp) + ++cnt; + sp = false; + } + + switch (*str) + { + case '\'': + assert (!(sq && dq)); /* LCOV_EXCL_LINE */ + sq = !sq && !dq; + break; + + case '"': + assert (!(sq && dq)); /* LCOV_EXCL_LINE */ + dq = !sq && !dq; + break; + + case '\\': + if (dq && next == 'c') + die (EXIT_CANCELED, 0, + _("'\\c' must not appear in double-quoted -S string")); + + if (next == '\0') + die (EXIT_CANCELED, 0, + _("invalid backslash at end of string in -S")); + + if (!valid_escape_sequence (next)) + die (EXIT_CANCELED, 0, _("invalid sequence '\\%c' in -S"), next); + + if (next == '_') + ++cnt; + + ++str; + break; + + + case '$': + if (sq) + break; + + if (!(pch = extract_varname (str))) + die (EXIT_CANCELED, 0, _("only ${VARNAME} expansion is supported,"\ + " error at: %s"), str); + + if ((pch = getenv (pch))) + buflen += strlen (pch); + break; + } + ++str; + } + + if (dq || sq) + die (EXIT_CANCELED, 0, _("no terminating quote in -S string")); + + *maxargc = cnt; + *bufsize = buflen; +} + +/* Return a newly-allocated *arg[]-like array, + by parsing and splitting the input 'str'. + 'extra_argc' is the number of additional elements to allocate + in the array (on top of the number of args required to split 'str'). + + Example: + char **argv = build_argv ("A=B uname -k', 3) + Results in: + argv[0] = "DUMMY" - dummy executable name, can be replaced later. + argv[1] = "A=B" + argv[2] = "uname" + argv[3] = "-k" + argv[4] = NULL + argv[5,6,7] = [allocated due to extra_argc, but not initialized] + + The strings are stored in an allocated buffer, pointed by argv[0]. + To free allocated memory: + free (argv[0]); + free (argv); */ +static char** +build_argv (const char* str, int extra_argc) +{ + bool dq = false, sq = false, sep = true; + char *dest; /* buffer to hold the new argv values. allocated as one buffer, + but will contain multiple NUL-terminate strings. */ + char **newargv, **nextargv; + int newargc = 0; + size_t buflen = 0; + + /* This macro is called before inserting any characters to the output + buffer. It checks if the previous character was a separator + and if so starts a new argv element. */ +#define CHECK_START_NEW_ARG \ + do { \ + if (sep) \ + { \ + *dest++ = '\0'; \ + *nextargv++ = dest; \ + sep = false; \ + } \ + } while (0) + + assert (str && str[0] && !isspace (str[0])); /* LCOV_EXCL_LINE */ + + validate_split_str (str, &buflen, &newargc); + + /* allocate buffer. +6 for the "DUMMY\0" executable name, +1 for NUL. */ + dest = xmalloc (buflen + 6 + 1); + + /* allocate the argv array. + +2 for the program name (argv[0]) and the last NULL pointer. */ + nextargv = newargv = xmalloc ((newargc + extra_argc + 2) * sizeof (char *)); + + /* argv[0] = executable's name - will be replaced later. */ + strcpy (dest, "DUMMY"); + *nextargv++ = dest; + dest += 6; + + /* In the following loop, + 'break' causes the character 'newc' to be added to *dest, + 'continue' skips the character. */ + while (*str) + { + char newc = *str; /* default: add the next character. */ + + switch (*str) + { + case '\'': + if (dq) + break; + sq = !sq; + CHECK_START_NEW_ARG; + ++str; + continue; + + case '"': + if (sq) + break; + dq = !dq; + CHECK_START_NEW_ARG; + ++str; + continue; + + case ' ': + case '\t': + /* space/tab outside quotes starts a new argument. */ + if (sq || dq) + break; + sep = true; + str += strspn (str, " \t"); /* skip whitespace. */ + continue; + + case '#': + if (!sep) + break; + goto eos; /* '#' as first char terminates the string. */ + + case '\\': + /* backslash inside single-quotes is not special, except \\ and \'. */ + if (sq && *(str+1) != '\\' && *(str+1) != '\'') + break; + + /* skip the backslash and examine the next character. */ + newc = *(++str); + if ((newc == '\\' || newc == '\'') + || (!sq && (newc == '#' || newc == '$' || newc == '"'))) + { + /* Pass escaped character as-is. */ + } + else if (newc == '_') + { + if (!dq) + { + ++str; /* '\_' outside double-quotes is arg separator. */ + sep = true; + continue; + } + else + newc = ' '; /* '\_' inside double-quotes is space. */ + } + else if (newc == 'c') + goto eos; /* '\c' terminates the string. */ + else + newc = escape_char (newc); /* other characters (e.g. '\n'). */ + break; + + case '$': + /* ${VARNAME} are not expanded inside single-quotes. */ + if (sq) + break; + + /* Store the ${VARNAME} value. Error checking omitted as + the ${VARNAME} was already validated. */ + { + char *n = extract_varname (str); + char *v = getenv (n); + if (v) + { + CHECK_START_NEW_ARG; + devmsg ("expanding ${%s} into %s\n", n, quote (v)); + dest = stpcpy (dest, v); + } + else + devmsg ("replacing ${%s} with null string\n", n); + + str = strchr (str, '}') + 1; + continue; + } + + } + + CHECK_START_NEW_ARG; + *dest++ = newc; + ++str; + } + + eos: + *dest = '\0'; + *nextargv = NULL; /* mark the last element in argv as NULL. */ + + return newargv; +} + +/* Process an "-S" string and create the corresponding argv array. + Update the given argc/argv parameters with the new argv. + + Example: if executed as: + $ env -S"-i -C/tmp A=B" foo bar + The input argv is: + argv[0] = 'env' + argv[1] = "-S-i -C/tmp A=B" + argv[2] = foo + argv[3] = bar + This function will modify argv to be: + argv[0] = 'env' + argv[1] = "-i" + argv[2] = "-C/tmp" + argv[3] = A=B" + argv[4] = foo + argv[5] = bar + argc will be updated from 4 to 6. + optind will be reset to 0 to force getopt_long to rescan all arguments. */ +static void +parse_split_string (const char* str, int /*out*/ *orig_optind, + int /*out*/ *orig_argc, char*** /*out*/ orig_argv) +{ + int i, newargc; + char **newargv, **nextargv; + + + while (isspace (*str)) + str++; + if (*str == '\0') + return; + + newargv = build_argv (str, *orig_argc - *orig_optind); + + /* restore argv[0] - the 'env' executable name */ + *newargv = (*orig_argv)[0]; + + /* Start from argv[1] */ + nextargv = newargv + 1; + + /* Print parsed arguments */ + if (dev_debug && *nextargv) + { + devmsg ("split -S: %s\n", quote (str)); + devmsg (" into: %s\n", quote (*nextargv++)); + while (*nextargv) + devmsg (" & %s\n", quote (*nextargv++)); + } + else + { + /* Ensure nextargv points to the last argument */ + while (*nextargv) + ++nextargv; + } + + /* Add remaining arguments from original command line */ + for (i = *orig_optind; i < *orig_argc; ++i) + *nextargv++ = (*orig_argv)[i]; + *nextargv = NULL; + + /* Count how many new arguments we have */ + newargc = 0; + for (nextargv = newargv; *nextargv; ++nextargv) + ++newargc; + + /* set new values for original getopt variables */ + *orig_argc = newargc; + *orig_argv = newargv; + *orig_optind = 0; /* tell getopt to restart from first argument */ +} + int main (int argc, char **argv) { @@ -134,7 +542,7 @@ main (int argc, char **argv) initialize_exit_failure (EXIT_CANCELED); atexit (close_stdout); - while ((optc = getopt_long (argc, argv, "+C:iu:v0", longopts, NULL)) != -1) + while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) { switch (optc) { @@ -153,6 +561,21 @@ main (int argc, char **argv) case 'C': newdir = optarg; break; + case 'S': + parse_split_string (optarg, &optind, &argc, &argv); + break; + case ' ': + case '\t': + case '-': + /* Space,tab,dash are undocumented options. Attempt to detect + incorrect shebang usage with extraneous space, e.g.: + #!/usr/bin/env -i command + In which case argv[1] == "-i command". */ + error (0, 0, _("invalid option -- '%c'"), optc); + if (argc == 3) + error (0, 0, _("use -[v]S to pass options in shebang lines")); + usage (EXIT_CANCELED); + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: @@ -232,5 +655,9 @@ main (int argc, char **argv) int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE; error (0, errno, "%s", quote (argv[optind])); + + if (argc == 3 && exit_status == EXIT_ENOENT && strchr (argv[optind], ' ')) + error (0, 0, _("use -[v]S to pass options in shebang lines")); + return exit_status; } diff --git a/tests/local.mk b/tests/local.mk index 528f9e5331..e13a6aad5c 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -290,6 +290,8 @@ all_tests = \ tests/misc/dircolors.pl \ tests/misc/dirname.pl \ tests/misc/env-null.sh \ + tests/misc/env-S.pl \ + tests/misc/env-S-script.sh \ tests/misc/expand.pl \ tests/misc/expr.pl \ tests/misc/expr-multibyte.pl \ diff --git a/tests/misc/env-S-script.sh b/tests/misc/env-S-script.sh new file mode 100755 index 0000000000..d164be5bf8 --- /dev/null +++ b/tests/misc/env-S-script.sh @@ -0,0 +1,141 @@ +#!/bin/sh +# Test env -S in a #! line of a script. + +# Copyright (C) 2018 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ env +print_ver_ printf + +# a shortcut to avoid long lines +dir="$abs_top_builddir/src" + +# A simple shebang program to call our new "env" +printf "#!$dir/env sh\necho hello\n" > env_test || framework_failure_ +chmod a+x env_test || framework_failure_ + +# Verify we can run the shebang which is not the case if +# there are spaces in $abs_top_builddir. +./env_test || skip_ "Error running env_test script" + + +# This script (without -S) should not work if the OS does not +# support multiple arguments on the shebang line. +# Ignoring the absolute paths, the script is: +# #!env printf x%sx\n A B +printf "#!$dir/env $dir/printf "'x%%sx\\n A B\n' > env_bad || + framework_failure_ +chmod a+x env_bad || framework_failure_ +returns_ 127 ./env_bad || + warn_ 'Note: OS natively accepts multiple arguments on shebang line' + +# env should execute 'printf' with 7 parameters: +# 'x%sx\n', 'A', 'B' from the "-S" argument, +# the name of the executed script, and its 3 parameters (C,D,'E F'). +# Ignoring the absolute paths, the script is: +# #!env -S printf x%sx\n A B +printf "#!$dir/env -S $dir/printf "'x%%sx\\n A B\n' > env1 || framework_failure_ +chmod a+x env1 || framework_failure_ +cat<<\EOF>exp1 || framework_failure_ +xAx +xBx +x./env1x +xCx +xDx +xE Fx +EOF +./env1 C D "E F" > out1 || fail=1 +compare exp1 out1 || fail=1 + + +# similar to the above test but with quotes, the first parameter should be +# 'A B' and not two paramaters 'A','B'. +# Ignoring the absolute paths, the script is: +# #!env -S printf x%sx\n "A B" +printf "#!$dir/env -S $dir/printf "'x%%sx\\n "A B"\n' > env2 || + framework_failure_ +chmod a+x env2 || framework_failure_ +cat<<\EOF>exp2 || framework_failure_ +xA Bx +x./env2x +EOF +./env2 > out2 || fail=1 +compare exp2 out2 || fail=1 + + +# backslash-underscore instead of spaces. +# Ignoring the absolute paths, the script is: +# #!env -Sprintf\_x%sx\n\_Y +printf "#!$dir/env -S$dir/printf"'\\_x%%sx\\n\\_Y\n' > env3 || + framework_failure_ +chmod a+x env3 || framework_failure_ +cat<<\EOF>exp3 || framework_failure_ +xYx +x./env3x +xWx +EOF +./env3 W > out3 || fail=1 +compare exp3 out3 || fail=1 + + + +# Test comments - The "#C D" should be ignored. +# Ignoring the absolute paths, the script is: +# #!env -Sprintf x%sx\n A#B #C D +printf "#!$dir/env -S$dir/printf"' x%%sx\\n A#B #C D\n' > env4 \ + || framework_failure_ +chmod a+x env4 || framework_failure_ +cat<<\EOF>exp4 || framework_failure_ +xA#Bx +x./env4x +xZx +EOF +./env4 Z > out4 || fail=1 +compare exp4 out4 || fail=1 + + +# Test with a simple Perl usage. +# (assume Perl is in $PATH, as it is required for the test suite). +# Ignoring the absolute paths, the script is: +# #!env -S perl -w -T +# print "hello\n"; +{ printf "#!$dir/env -S perl -w -T\n" ; + printf 'print "hello\\n";\n' ; } > env5 || framework_failure_ +chmod a+x env5 || framework_failure_ +cat<<\EOF>exp5 || framework_failure_ +hello +EOF +./env5 > out5 || fail=1 +compare exp5 out5 || fail=1 + + +# Test with a more complex Perl usage. +# Ignoring the absolute paths, the script is: +# #!env -S perl -mFile::Basename=basename -e "print basename(\$ARGV[0]);" +# The backslash before the '$' is required to prevent env(1) from treating +# $ARGV as an (invalid syntax) envvar, and pass it as-is to Perl. +{ printf "#!$dir/env -S " ; + printf 'perl -mFile::Basename=basename -e ' ; + printf '"print basename(\\$ARGV[0]);"\n' ; } > env6 || framework_failure_ +chmod a+x env6 || framework_failure_ +# Note: the perl script does not output a newline. +printf "env6" > exp6 || framework_failure_ +./env6 > out6 || fail=1 +compare exp6 out6 || fail=1 + + +Exit $fail diff --git a/tests/misc/env-S.pl b/tests/misc/env-S.pl new file mode 100755 index 0000000000..d3bfc46c3b --- /dev/null +++ b/tests/misc/env-S.pl @@ -0,0 +1,272 @@ +#!/usr/bin/perl +# Test 'env -S' feature + +# Copyright (C) 2018 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; + +(my $program_name = $0) =~ s|.*/||; +my $prog = 'env'; + +# Turn off localization of executable's output. +@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + +my @Tests = + ( + # Test combination of -S and regular arguments + ['1', q[-i A=B FOO=AR sh -c 'echo $A$FOO'], {OUT=>"BAR"}], + ['2', q[-i -S'A=B FOO=AR sh -c "echo \\$A\\$FOO"'], {OUT=>"BAR"}], + ['3', q[-i -S'A=B FOO=AR' sh -c 'echo $A$FOO'], {OUT=>"BAR"}], + ['4', q[-i -S'A=B' FOO=AR sh -c 'echo $A$FOO'], {OUT=>"BAR"}], + ['5', q[-S'-i A=B FOO=AR sh -c "echo \\$A\\$FOO"'], {OUT=>"BAR"}], + + # Test quoting inside -S + ['q1', q[-S'-i A="B C" env'], {OUT=>"A=B C"}], + ['q2', q[-S"-i A='B C' env"], {OUT=>"A=B C"}], + ['q3', q[-S"-i A=\"B C\" env"], {OUT=>"A=B C"}], + # Test backslash-quoting inside quoting inside -S + ['q4', q[-S'-i A="B \" C" env'], {OUT=>'A=B " C'}], + ['q5', q[-S"-i A='B \\' C' env"], {OUT=>"A=B ' C"}], + # Single-quotes in double-quotes and vice-versa + ['q6', q[-S'-i A="B'"'"'C" env'], {OUT=>"A=B'C"}], + ['q7', q[-S"-i A='B\\"C' env"], {OUT=>'A=B"C'}], + + # Test tab and space (note: tab here is expanded by perl + # and sent to the shell as ASCII 0x9 inside single-quotes). + ['t1', qq[-S'-i\tA="B \tC" env'], {OUT=>"A=B \tC"}], + # Here '\\t' is not interpolated by perl/shell, passed as two characters + # (backslash, 't') to env, resulting in one argument ("AB"). + ['t2', qq[-S'printf x%sx\\n A\\tB'], {OUT=>"xA\tBx"}], + # Here '\t' is interpolated by perl, passed as literal tab (ASCII 0x9) + # to env, resulting in two arguments ("A" "B"). + ['t3', qq[-S'printf x%sx\\n A\tB'], {OUT=>"xAx\nxBx"}], + ['t4', qq[-S'printf x%sx\\n A \t B'], {OUT=>"xAx\nxBx"}], + + + # Test empty strings + ['m1', qq[-i -S"" A=B env], {OUT=>"A=B"}], + ['m2', qq[-i -S" \t" A=B env], {OUT=>"A=B"}], + + # Test escape sequences. + # note: in the following, there is no interpolation by perl due + # to q[], and no interpolation by the shell due to single-quotes. + # env will receive the backslash character followed by t/f/r/n/v. + # Also: Perl does not recognize "\v", so use "\013" for vertical tab. + ['e1', q[-i -S'A="B\tC" env'], {OUT=>"A=B\tC"}], + ['e2', q[-i -S'A="B\fC" env'], {OUT=>"A=B\fC"}], + ['e3', q[-i -S'A="B\rC" env'], {OUT=>"A=B\rC"}], + ['e4', q[-i -S'A="B\nC" env'], {OUT=>"A=B\nC"}], + ['e5', q[-i -S'A="B\vC" env'], {OUT=>"A=B\013C"}], + ['e6', q[-i -S'A="B\$C" env'], {OUT=>'A=B$C'}], + ['e7', q[-i -S'A=B\$C env'], {OUT=>'A=B$C'}], + ['e8', q[-i -S'A="B\#C" env'], {OUT=>'A=B#C'}], + ['e9', q[-i -S'A="B\\\\C" env'], {OUT=>'A=B\\C'}], + ['e10',q[-i -S"A='B\\\\\\\\C' env"], {OUT=>'A=B\\C'}], + + # Escape in single-quoted string - passed as-is + # (the multiple pairs of backslashes are to survive two interpolations: + # by perl and then by the shell due to double-quotes). + ['e11',q[-i -S"A='B\\\\tC' env"], {OUT=>'A=B\tC'}], + ['e12',q[-i -S"A='B\\\\#C' env"], {OUT=>'A=B\#C'}], + ['e13',q[-i -S"A='B\\\\\\$C' env"], {OUT=>'A=B\$C'}], + ['e14',q[-i -S"A='B\\\\\\"C' env"], {OUT=>'A=B\"C'}], + + # Special escape sequences: + # \_ in duoble-quotes is a space - result is just one envvar 'A' + ['e20', q[-i -S'A="B\_C=D" env'], {OUT=>'A=B C=D'}], + # \_ outside double-quotes is arg separator, the command to + # execute should be 'env env' + ['e21', q[-i -S'A=B\_env\_env'], {OUT=>"A=B"}], + + # Test -C inside -S + ['c1', q["-S-C/ pwd"], {OUT=>"/"}], + ['c2', q["-S -C / pwd"], {OUT=>"/"}], + ['c3', q["-S --ch'dir='/ pwd"], {OUT=>"/"}], + + # Test -u inside and outside -S + # u1,u2 - establish a baseline, without -S + ['u1', q[ sh -c 'echo =$FOO='], {ENV=>"FOO=BAR"}, {OUT=>"=BAR="}], + ['u2', q[-uFOO sh -c 'echo =$FOO='], {ENV=>"FOO=BAR"}, {OUT=>"=="}], + # u3,u4: ${FOO} expanded by env itself before executing sh. + # \\$FOO expanded by sh. + # ${FOO} should have value of the original environment + # and \\$FOO should be unset, regardlss where -uFOO is used. + # 'u3' behavior differs from FreeBSD's but deemed preferable, in + # https://lists.gnu.org/r/coreutils/2018-04/msg00014.html + ['u3', q[-uFOO -S'sh -c "echo x${FOO}x =\\$FOO="'], + {ENV=>"FOO=BAR"}, {OUT=>"xBARx =="}], + ['u4', q[-S'-uFOO sh -c "echo x${FOO}x =\\$FOO="'], + {ENV=>"FOO=BAR"}, {OUT=>"xBARx =="}], + + # Test ENVVAR expansion + ['v1', q[-i -S'A=${FOO} env'], {ENV=>"FOO=BAR"}, {OUT=>"A=BAR"}], + ['v2', q[-i -S'A=x${FOO}x env'], {ENV=>"FOO=BAR"}, {OUT=>"A=xBARx"}], + ['v3', q[-i -S'A=x${FOO}x env'], {ENV=>"FOO="}, {OUT=>"A=xx"}], + ['v4', q[-i -S'A=x${FOO}x env'], {OUT=>"A=xx"}], + ['v5', q[-i -S'A="x${FOO}x" env'], {ENV=>"FOO=BAR"}, {OUT=>"A=xBARx"}], + ['v6', q[-i -S'${FOO}=A env'], {ENV=>"FOO=BAR"}, {OUT=>"BAR=A"}], + # No expansion inside single-quotes + ['v7', q[-i -S"A='x\${FOO}x' env"], {OUT=>'A=x${FOO}x'}], + ['v8', q[-i -S'A="${_FOO}" env'], {ENV=>"_FOO=BAR"}, {OUT=>"A=BAR"}], + ['v9', q[-i -S'A="${F_OO}" env'], {ENV=>"F_OO=BAR"}, {OUT=>"A=BAR"}], + ['v10', q[-i -S'A="${FOO1}" env'], {ENV=>"FOO1=BAR"}, {OUT=>"A=BAR"}], + + # Test end-of-string '#" and '\c' + ['d1', q[-i -S'A=B #C=D' env], {OUT=>"A=B"}], + ['d2', q[-i -S'#A=B C=D' env], {OUT=>""}], + ['d3', q[-i -S'A=B#' env], {OUT=>"A=B#"}], + ['d4', q[-i -S'A=B #' env], {OUT=>"A=B"}], + + ['d5', q[-i -S'A=B\cC=D' env], {OUT=>"A=B"}], + ['d6', q[-i -S'\cA=B C=D' env], {OUT=>""}], + ['d7', q[-i -S'A=B\c' env], {OUT=>"A=B"}], + ['d8', q[-i -S'A=B \c' env], {OUT=>"A=B"}], + + ['d10', q[-S'echo FOO #BAR'], {OUT=>"FOO"}], + ['d11', q[-S'echo FOO \\#BAR'], {OUT=>"FOO #BAR"}], + ['d12', q[-S'echo FOO#BAR'], {OUT=>"FOO#BAR"}], + + # Test underscore as space/seperator in double/single/no quotes + ['s1', q[-S'printf x%sx\\n "A\\_B"'], {OUT=>"xA Bx"}], + ['s2', q[-S"printf x%sx\\n 'A\\_B'"], {OUT=>"xA\\_Bx"}], + ['s3', q[-S"printf x%sx\\n A\\_B"], {OUT=>"xAx\nxBx"}], + ['s4', q[-S"printf x%sx\\n A B"], {OUT=>"xAx\nxBx"}], + ['s5', q[-S"printf x%sx\\n A B"], {OUT=>"xAx\nxBx"}], + # test underscore/spaces variations - + # ensure they don't generate empty arguments. + ['s6', q[-S"\\_printf x%sx\\n FOO"], {OUT=>"xFOOx"}], + ['s7', q[-S"printf x%sx\\n FOO\\_"], {OUT=>"xFOOx"}], + ['s8', q[-S"\\_printf x%sx\\n FOO\\_"], {OUT=>"xFOOx"}], + ['s9', q[-S"\\_\\_printf x%sx\\n FOO\\_\\_"], {OUT=>"xFOOx"}], + ['s10', q[-S" printf x%sx\\n FOO"], {OUT=>"xFOOx"}], + ['s11', q[-S"printf x%sx\\n FOO "], {OUT=>"xFOOx"}], + ['s12', q[-S" printf x%sx\\n FOO "], {OUT=>"xFOOx"}], + ['s13', q[-S" printf x%sx\\n FOO "], {OUT=>"xFOOx"}], + ['s14', q[-S'printf\\_x%sx\\n\\_FOO'], {OUT=>"xFOOx"}], + ['s15', q[-S"printf x%sx\\n \\_ FOO"], {OUT=>"xFOOx"}], + ['s16', q[-S"printf x%sx\\n\\_ \\_FOO"], {OUT=>"xFOOx"}], + ['s17', q[-S"\\_ \\_ printf x%sx\\n FOO \\_ \\_"], {OUT=>"xFOOx"}], + + # Check for empty quotes + ['eq1', q[-S'printf x%sx\\n A "" B'], {OUT=>"xAx\nxx\nxBx"}], + ['eq2', q[-S'printf x%sx\\n A"" B'], {OUT=>"xAx\nxBx"}], + ['eq3', q[-S'printf x%sx\\n A""B'], {OUT=>"xABx"}], + ['eq4', q[-S'printf x%sx\\n A ""B'], {OUT=>"xAx\nxBx"}], + ['eq5', q[-S'printf x%sx\\n ""'], {OUT=>"xx"}], + ['eq6', q[-S'printf x%sx\\n "" '], {OUT=>"xx"}], + ['eq10', q[-S"printf x%sx\\n A '' B"], {OUT=>"xAx\nxx\nxBx"}], + ['eq11', q[-S"printf x%sx\\n A'' B"], {OUT=>"xAx\nxBx"}], + ['eq12', q[-S"printf x%sx\\n A''B"], {OUT=>"xABx"}], + ['eq13', q[-S"printf x%sx\\n A ''B"], {OUT=>"xAx\nxBx"}], + ['eq14', q[-S'printf x%sx\\n ""'], {OUT=>"xx"}], + ['eq15', q[-S'printf x%sx\\n "" '], {OUT=>"xx"}], + + # extreme example - such as could be found on a #! line. + ['p10', q[-S"\\_ \\_perl\_-w\_-T\_-e\_'print \"hello\n\";'\\_ \\_"], + {OUT=>"hello"}], + + # Test Error Conditions + ['err1', q[-S'"\\c"'], {EXIT=>125}, + {ERR=>"$prog: '\\c' must not appear in double-quoted -S string\n"}], + ['err2', q[-S'A=B\\'], {EXIT=>125}, + {ERR=>"$prog: invalid backslash at end of string in -S\n"}], + ['err3', q[-S'"A=B\\"'], {EXIT=>125}, + {ERR=>"$prog: no terminating quote in -S string\n"}], + ['err4', q[-S"'A=B\\\\'"], {EXIT=>125}, + {ERR=>"$prog: no terminating quote in -S string\n"}], + ['err5', q[-S'A=B\\q'], {EXIT=>125}, + {ERR=>"$prog: invalid sequence '\\q' in -S\n"}], + ['err6', q[-S'A=$B'], {EXIT=>125}, + {ERR=>"$prog: only \${VARNAME} expansion is supported, error at: \$B\n"}], + ['err7', q[-S'A=${B'], {EXIT=>125}, + {ERR=>"$prog: only \${VARNAME} expansion is supported, " . + "error at: \${B\n"}], + ['err8', q[-S'A=${B%B}'], {EXIT=>125}, + {ERR=>"$prog: only \${VARNAME} expansion is supported, " . + "error at: \${B%B}\n"}], + ['err9', q[-S'A=${9B}'], {EXIT=>125}, + {ERR=>"$prog: only \${VARNAME} expansion is supported, " . + "error at: \${9B}\n"}], + + # Test incorrect shebang usage (extraneous sapce). + # With anything other than 3 args report invalid options (as before). + ['err_sp1', q['-v-S cat -n'], {EXIT=>125}, + {ERR=>"env: invalid option -- '-'\n" . + "Try 'env --help' for more information.\n"}], + ['err_sp2', q['-v -S cat -n'], {EXIT=>125}, + {ERR=>"env: invalid option -- ' '\n" . + "Try 'env --help' for more information.\n"}], + ['err_sp3', q['-v -S cat -n'], {EXIT=>125}, # embedded tab after -v + {ERR=>"env: invalid option -- '\t'\n" . + "Try 'env --help' for more information.\n"}], + + # With exactly 3 args, assume it's incorrect shebang usage, + # and report a different message. This typically happens with: + # + # $ cat xxx + # #!env -v -S cat -n + # + # $ ./xxx + # + # in which case: + # argv[0] = env + # argv[1] = '-v -S cat -n' + # argv[2] = './xxx' + ['err_sp5', q['-v -S cat -n' ./xxx], {EXIT=>125}, + {ERR=>"env: invalid option -- ' '\n" . + "env: use -[v]S to pass options in shebang lines\n" . + "Try 'env --help' for more information.\n"}], + + ['err_sp6', q['cat -n' ./xxx], {EXIT=>127}, + {ERR=>"env: 'cat -n': No such file or directory\n" . + "env: use -[v]S to pass options in shebang lines\n"}], + ); + +# Append a newline to end of each expected 'OUT' string. +my $t; +foreach $t (@Tests) + { + my $arg1 = $t->[1]; + my $e; + foreach $e (@$t) + { + $e->{OUT} .= "\n" + if ref $e eq 'HASH' and exists $e->{OUT} and length($e->{OUT})>0; + } + } + +# Repeat above tests with "--debug" option (but discard STDERR). +my @new; +foreach my $t (@Tests) +{ + #skip tests that are expected to fail + next if $t->[0] =~ /^err/; + + my @new_t = @$t; + my $test_name = shift @new_t; + my $args = shift @new_t; + push @new, ["$test_name-debug", + "--debug " . $args, + @new_t, + {ERR_SUBST => 's/.*//ms'}]; +} +push @Tests, @new; + +my $save_temps = $ENV{SAVE_TEMPS}; +my $verbose = $ENV{VERBOSE}; + +my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); +exit $fail;