From a1545a2f1f3c52b584b02a848871bd08097ad01a Mon Sep 17 00:00:00 2001 From: Alfred Klomp Date: Sun, 14 Jan 2024 01:57:18 +0100 Subject: [PATCH] bin/base64: add --ignore-garbage --- bin/base64.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/bin/base64.c b/bin/base64.c index 057aec5..e797c13 100644 --- a/bin/base64.c +++ b/bin/base64.c @@ -56,6 +56,9 @@ struct config { // Whether to strip newlines from the input when decoding. bool strip_newlines; + + // Whether to ignore any character not in the base64 alphabet. + bool ignore_garbage; }; // Input/output buffer structure. @@ -343,6 +346,37 @@ encode (const struct config *config, struct buffer *buf) return true; } +static inline size_t +find_garbage (const char *p, const size_t avail) +{ + // Use a lookup table to distinguish garbage from non-garbage. + static const char lut[256] = { + ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1, + ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, + ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, + ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, + ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1, + ['Z'] = 1, + ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, ['e'] = 1, + ['f'] = 1, ['g'] = 1, ['h'] = 1, ['i'] = 1, ['j'] = 1, + ['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1, ['o'] = 1, + ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1, ['t'] = 1, + ['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1, ['y'] = 1, + ['z'] = 1, + ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, + ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, + ['+'] = 1, ['/'] = 1, ['"'] = 1, + }; + + for (size_t len = 0; len < avail; len++) { + if (lut[(unsigned char) p[len]] == 0) { + return len; + } + } + + return avail; +} + static inline size_t find_newline (const char *p, const size_t avail) { @@ -378,11 +412,14 @@ decode (const struct config *config, struct buffer *buf) while (avail > 0) { size_t outlen, len; - // When stripping newlines in the input, find the - // offset of the next newline character, which is also - // the length of the next chunk. Otherwise treat the - // entire input as a single chunk. - if (config->strip_newlines) { + // When stripping garbage or newlines in the input, + // find the offset of the next garbage/newline + // character, which is also the length of the next + // chunk. Otherwise treat the entire input as a single + // chunk. + if (config->ignore_garbage) { + len = find_garbage(start, avail); + } else if (config->strip_newlines) { len = find_newline(start, avail); } else { len = avail; @@ -443,6 +480,8 @@ usage (FILE *fp, const struct config *config) "Options:\n" " -d, --decode Decode a base64 stream.\n" " -h, --help Print this help text.\n" + " -i, --ignore-garbage When decoding, ignore any " + "non-base64 data.\n" " -n, --no-strip-newlines When decoding, do not strip " "newlines. Speeds up decoding for inputs that do not contain " "newlines.\n" @@ -486,6 +525,7 @@ parse_opts (int argc, char **argv, struct config *config) static const struct option opts[] = { { "decode", no_argument, NULL, 'd' }, { "help", no_argument, NULL, 'h' }, + { "ignore-garbage", no_argument, NULL, 'i' }, { "no-strip-newlines", no_argument, NULL, 'n' }, { "wrap", required_argument, NULL, 'w' }, { NULL } @@ -495,7 +535,7 @@ parse_opts (int argc, char **argv, struct config *config) config->name = *argv; // Parse command line options. - while ((c = getopt_long(argc, argv, ":dhnw:", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, ":dhinw:", opts, NULL)) != -1) { switch (c) { case 'd': config->decode = true; @@ -505,6 +545,10 @@ parse_opts (int argc, char **argv, struct config *config) config->print_help = true; return true; + case 'i': + config->ignore_garbage = true; + break; + case 'n': config->strip_newlines = false; break; @@ -571,6 +615,7 @@ main (int argc, char **argv) .decode = false, .print_help = false, .strip_newlines = true, + .ignore_garbage = false, }; struct buffer buf;