4242#include <sys/types.h>
4343#include "system.h"
4444#include "fadvise.h"
45+ #include "mcel.h"
4546
4647/* The official name of this program (e.g., no 'g' prefix). */
4748#define PROGRAM_NAME "paste"
5051 proper_name ("David M. Ihnat"), \
5152 proper_name ("David MacKenzie")
5253
53- /* Indicates that no delimiter should be added in the current position. */
54- #define EMPTY_DELIM '\0'
55-
5654/* If nonzero, we have read standard input at some point. */
5755static bool have_read_stdin ;
5856
5957/* If nonzero, merge subsequent lines of each file rather than
6058 corresponding lines from each file in parallel. */
6159static bool serial_merge ;
6260
63- /* The delimiters between lines of input files (used cyclically). */
61+ /* The delimiters between lines of input files (used cyclically).
62+ This stores the raw bytes of all delimiters concatenated. */
6463static char * delims ;
6564
66- /* A pointer to the character after the end of 'delims'. */
67- static char const * delim_end ;
65+ /* Length of each delimiter in bytes (supports multi-byte characters).
66+ A length of 0 indicates no delimiter at this position (from \0 escape). */
67+ static size_t * delim_lens ;
68+
69+ /* Number of delimiters. */
70+ static idx_t num_delims ;
6871
6972static unsigned char line_delim = '\n' ;
7073
@@ -78,10 +81,10 @@ static struct option const longopts[] =
7881 {nullptr , 0 , nullptr , 0 }
7982};
8083
81- /* Set globals delims and delim_end. Copy STRPTR to DELIMS, converting
82- backslash representations of special characters in STRPTR to their actual
83- values. The set of possible backslash characters has been expanded beyond
84- that recognized by the Unix version.
84+ /* Set globals delims, delim_lens, and num_delims.
85+ Process STRPTR converting backslash representations of special characters
86+ to their actual values. The set of possible backslash characters has been
87+ expanded beyond that recognized by the Unix version.
8588 Return 0 upon success.
8689 If the string ends in an odd number of backslashes, ignore the
8790 final backslash and return nonzero. */
@@ -93,62 +96,65 @@ collapse_escapes (char const *strptr)
9396 bool backslash_at_end = false;
9497
9598 delims = strout ;
99+ delim_lens = xnmalloc (MAX (1 , strlen (strptr )), sizeof * delim_lens );
100+
101+ char const * s = strptr ;
102+ idx_t idx = 0 ;
96103
97- while (* strptr )
104+ while (* s )
98105 {
99- if (* strptr != '\\' ) /* Is it an escape character? */
100- * strout ++ = * strptr ++ ; /* No, just transfer it. */
101- else
106+ if (* s == '\\' )
102107 {
103- switch (* ++ strptr )
108+ s ++ ;
109+ if (* s == '\0' )
104110 {
105- case '0' :
106- * strout ++ = EMPTY_DELIM ;
107- break ;
108-
109- case 'b' :
110- * strout ++ = '\b' ;
111- break ;
112-
113- case 'f' :
114- * strout ++ = '\f' ;
115- break ;
116-
117- case 'n' :
118- * strout ++ = '\n' ;
119- break ;
120-
121- case 'r' :
122- * strout ++ = '\r' ;
111+ backslash_at_end = true;
123112 break ;
113+ }
114+ else if (* s == '0' )
115+ {
116+ /* Empty delimiter at this position. */
117+ s ++ ;
118+ delim_lens [idx ++ ] = 0 ;
119+ }
120+ else
121+ {
122+ switch (* s )
123+ {
124+ case 'b' : * strout ++ = '\b' ; break ;
125+ case 'f' : * strout ++ = '\f' ; break ;
126+ case 'n' : * strout ++ = '\n' ; break ;
127+ case 'r' : * strout ++ = '\r' ; break ;
128+ case 't' : * strout ++ = '\t' ; break ;
129+ case 'v' : * strout ++ = '\v' ; break ;
130+ case '\\' : * strout ++ = '\\' ; break ;
131+ default : goto copy_character ;
132+ }
124133
125- case 't' :
126- * strout ++ = '\t' ;
127- break ;
134+ s ++ ;
135+ delim_lens [ idx ++ ] = 1 ;
136+ }
128137
129- case 'v' :
130- * strout ++ = '\v' ;
131- break ;
138+ continue ;
139+ }
132140
133- case '\\' :
134- * strout ++ = '\\' ;
135- break ;
141+ copy_character :
142+ mcel_t g = mcel_scanz (s );
143+ strout = mempcpy (strout , s , g .len );
144+ s += g .len ;
145+ delim_lens [idx ++ ] = g .len ;
146+ }
136147
137- case '\0' :
138- backslash_at_end = true;
139- goto done ;
148+ * strout = '\0' ;
140149
141- default :
142- * strout ++ = * strptr ;
143- break ;
144- }
145- strptr ++ ;
146- }
150+ if (idx == 0 )
151+ {
152+ delim_lens [0 ] = 0 ;
153+ idx = 1 ;
147154 }
148155
149- done :
156+ num_delims = idx ;
150157
151- delim_end = strout ;
152158 return backslash_at_end ? 1 : 0 ;
153159}
154160
@@ -161,6 +167,16 @@ xputchar (char c)
161167 write_error ();
162168}
163169
170+ /* Output the delimiter at DELIMPTR with length LEN.
171+ If LEN is 0, nothing is output (empty delimiter from \0 escape). */
172+
173+ static inline void
174+ output_delim (char const * delimptr , size_t len )
175+ {
176+ if (len > 0 && fwrite (delimptr , 1 , len , stdout ) != len )
177+ write_error ();
178+ }
179+
164180/* Perform column paste on the NFILES files named in FNAMPTR.
165181 Return true if successful, false if one or more files could not be
166182 opened or read. */
@@ -171,9 +187,9 @@ paste_parallel (size_t nfiles, char **fnamptr)
171187 bool ok = true;
172188 /* If all files are just ready to be closed, or will be on this
173189 round, the string of delimiters must be preserved.
174- delbuf[0] through delbuf[nfiles]
175- store the delimiters for closed files. */
176- char * delbuf = xmalloc (nfiles + 2 );
190+ delbuf stores the delimiter bytes for closed files.
191+ Size it to hold up to (nfiles - 1) delimiters. */
192+ char * delbuf = xmalloc (( nfiles - 1 ) * MB_CUR_MAX + 1 );
177193
178194 /* Streams open to the files to process; null if the corresponding
179195 stream is closed. */
@@ -218,8 +234,9 @@ paste_parallel (size_t nfiles, char **fnamptr)
218234 {
219235 /* Set up for the next line. */
220236 bool somedone = false;
221- char const * delimptr = delims ;
222- size_t delims_saved = 0 ; /* Number of delims saved in 'delbuf'. */
237+ idx_t delimidx = 0 ; /* Current delimiter index. */
238+ idx_t delimoff = 0 ; /* Current offset into delims. */
239+ idx_t delims_saved = 0 ; /* Bytes saved in 'delbuf'. */
223240
224241 for (size_t i = 0 ; i < nfiles && files_open ; i ++ )
225242 {
@@ -292,10 +309,18 @@ paste_parallel (size_t nfiles, char **fnamptr)
292309 else
293310 {
294311 /* Closed file; add delimiter to 'delbuf'. */
295- if (* delimptr != EMPTY_DELIM )
296- delbuf [delims_saved ++ ] = * delimptr ;
297- if (++ delimptr == delim_end )
298- delimptr = delims ;
312+ size_t len = delim_lens [delimidx ];
313+ if (len > 0 )
314+ {
315+ memcpy (delbuf + delims_saved , delims + delimoff , len );
316+ delims_saved += len ;
317+ }
318+ delimoff += len ;
319+ if (++ delimidx == num_delims )
320+ {
321+ delimidx = 0 ;
322+ delimoff = 0 ;
323+ }
299324 }
300325 }
301326 else
@@ -308,10 +333,13 @@ paste_parallel (size_t nfiles, char **fnamptr)
308333 {
309334 if (chr != line_delim && chr != EOF )
310335 xputchar (chr );
311- if (* delimptr != EMPTY_DELIM )
312- xputchar (* delimptr );
313- if (++ delimptr == delim_end )
314- delimptr = delims ;
336+ output_delim (delims + delimoff , delim_lens [delimidx ]);
337+ delimoff += delim_lens [delimidx ];
338+ if (++ delimidx == num_delims )
339+ {
340+ delimidx = 0 ;
341+ delimoff = 0 ;
342+ }
315343 }
316344 else
317345 {
@@ -337,7 +365,6 @@ paste_serial (size_t nfiles, char **fnamptr)
337365{
338366 bool ok = true; /* false if open or read errors occur. */
339367 int charnew , charold ; /* Current and previous char read. */
340- char const * delimptr ; /* Current delimiter char. */
341368 FILE * fileptr ; /* Open for reading current file. */
342369
343370 for (; nfiles ; nfiles -- , fnamptr ++ )
@@ -361,7 +388,8 @@ paste_serial (size_t nfiles, char **fnamptr)
361388 fadvise (fileptr , FADVISE_SEQUENTIAL );
362389 }
363390
364- delimptr = delims ; /* Set up for delimiter string. */
391+ idx_t delimidx = 0 ; /* Current delimiter index. */
392+ idx_t delimoff = 0 ; /* Current offset into delims. */
365393
366394 charold = getc (fileptr );
367395 saved_errno = errno ;
@@ -378,11 +406,13 @@ paste_serial (size_t nfiles, char **fnamptr)
378406 /* Process the old character. */
379407 if (charold == line_delim )
380408 {
381- if (* delimptr != EMPTY_DELIM )
382- xputchar (* delimptr );
383-
384- if (++ delimptr == delim_end )
385- delimptr = delims ;
409+ output_delim (delims + delimoff , delim_lens [delimidx ]);
410+ delimoff += delim_lens [delimidx ];
411+ if (++ delimidx == num_delims )
412+ {
413+ delimidx = 0 ;
414+ delimoff = 0 ;
415+ }
386416 }
387417 else
388418 xputchar (charold );
@@ -520,6 +550,7 @@ main (int argc, char **argv)
520550 (nfiles , & argv [optind ]));
521551
522552 free (delims );
553+ free (delim_lens );
523554
524555 if (have_read_stdin && fclose (stdin ) == EOF )
525556 error (EXIT_FAILURE , errno , "-" );
0 commit comments