Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

buffer: speed up base64 encoding by 20%

Remove a lot of branches from the inner loop. Speeds up buf.toString('base64')
by about 20%.

Before:

  $ time out/Release/node benchmark/buffer-base64-encode.js
  real    0m6.607s
  user    0m5.508s
  sys     0m1.088s

After:

  $ time out/Release/node benchmark/buffer-base64-encode.js
  real    0m5.520s
  user    0m4.520s
  sys     0m0.992s
  • Loading branch information...
commit 1cbc6c9eba4f8368a298c5da6bf4b34e55fec7da 1 parent f84bf5b
@bnoordhuis authored
Showing with 78 additions and 58 deletions.
  1. +27 −0 benchmark/buffer-base64-encode.js
  2. +51 −58 src/node_buffer.cc
View
27 benchmark/buffer-base64-encode.js
@@ -0,0 +1,27 @@
+// Copyright Joyent, Inc. and other Node contributors.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to permit
+// persons to whom the Software is furnished to do so, subject to the
+// following conditions:
+//
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+// USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+var N = 64*1024*1024
+var b = Buffer(N);
+var s = '';
+for (var i = 0; i < 256; ++i) s += String.fromCharCode(i);
+for (var i = 0; i < N; i += 256) b.write(s, i, 256, 'ascii');
+for (var i = 0; i < 32; ++i) b.toString('base64');
View
109 src/node_buffer.cc
@@ -276,9 +276,6 @@ Handle<Value> Buffer::Ucs2Slice(const Arguments &args) {
return scope.Close(string);
}
-static const char *base64_table = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789+/";
// supports regular and URL-safe base64
static const int unbase64_table[] =
@@ -307,69 +304,65 @@ Handle<Value> Buffer::Base64Slice(const Arguments &args) {
Buffer *parent = ObjectWrap::Unwrap<Buffer>(args.This());
SLICE_ARGS(args[0], args[1])
- int n = end - start;
- int out_len = (n + 2 - ((n + 2) % 3)) / 3 * 4;
- char *out = new char[out_len];
-
- uint8_t bitbuf[3];
- int i = start; // data() index
- int j = 0; // out index
- char c;
- bool b1_oob, b2_oob;
-
- while (i < end) {
- bitbuf[0] = parent->data_[i++];
-
- if (i < end) {
- bitbuf[1] = parent->data_[i];
- b1_oob = false;
- } else {
- bitbuf[1] = 0;
- b1_oob = true;
- }
- i++;
-
- if (i < end) {
- bitbuf[2] = parent->data_[i];
- b2_oob = false;
- } else {
- bitbuf[2] = 0;
- b2_oob = true;
- }
- i++;
+ unsigned slen = end - start;
+ const char* src = parent->data_ + start;
+ unsigned dlen = (slen + 2 - ((slen + 2) % 3)) / 3 * 4;
+ char* dst = new char[dlen];
- c = bitbuf[0] >> 2;
- assert(c < 64);
- out[j++] = base64_table[(int)c];
- assert(j < out_len);
+ unsigned a;
+ unsigned b;
+ unsigned c;
+ unsigned i;
+ unsigned k;
+ unsigned n;
- c = ((bitbuf[0] & 0x03) << 4) | (bitbuf[1] >> 4);
- assert(c < 64);
- out[j++] = base64_table[(int)c];
- assert(j < out_len);
+ static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
- if (b1_oob) {
- out[j++] = '=';
- } else {
- c = ((bitbuf[1] & 0x0F) << 2) | (bitbuf[2] >> 6);
- assert(c < 64);
- out[j++] = base64_table[(int)c];
- }
- assert(j < out_len);
+ i = 0;
+ k = 0;
+ n = slen / 3 * 3;
- if (b2_oob) {
- out[j++] = '=';
- } else {
- c = bitbuf[2] & 0x3F;
- assert(c < 64);
- out[j++] = base64_table[(int)c];
+ while (i < n) {
+ a = src[i + 0] & 0xff;
+ b = src[i + 1] & 0xff;
+ c = src[i + 2] & 0xff;
+
+ dst[k + 0] = table[a >> 2];
+ dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
+ dst[k + 2] = table[((b & 0x0f) << 2) | (c >> 6)];
+ dst[k + 3] = table[c & 0x3f];
+
+ i += 3;
+ k += 4;
+ }
+
+ if (n != slen) {
+ switch (slen - n) {
+ case 1:
+ a = src[i + 0] & 0xff;
+ dst[k + 0] = table[a >> 2];
+ dst[k + 1] = table[(a & 3) << 4];
+ dst[k + 2] = '=';
+ dst[k + 3] = '=';
+ break;
+
+ case 2:
+ a = src[i + 0] & 0xff;
+ b = src[i + 1] & 0xff;
+ dst[k + 0] = table[a >> 2];
+ dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
+ dst[k + 2] = table[(b & 0x0f) << 2];
+ dst[k + 3] = '=';
+ break;
}
- assert(j <= out_len);
}
- Local<String> string = String::New(out, out_len);
- delete [] out;
+ Local<String> string = String::New(dst, dlen);
+ delete [] dst;
+
return scope.Close(string);
}

2 comments on commit 1cbc6c9

@defunctzombie

cool. btw, did you try collapsing the dst access into an array of int32_t versus accessing individual chars since you always set things in groups of 4?

@bnoordhuis
Owner

Yes, actually. :-)

It speeds it up by another ~1-2% but the resulting code is a lot less readable because it needs to deal with endianness and unaligned stores. I decided it wasn't worth it.

Please sign in to comment.
Something went wrong with that request. Please try again.