encoding/binary: support for varint encoding

R=rsc, r, nigeltao, r, dsymonds CC=golang-dev https://golang.org/cl/5146048
golang · Sep 29, 2011 · f30719d · f30719d
1 parent b741369
commit f30719d
Show file tree

Hide file tree

Showing 4 changed files with 342 additions and 3 deletions.
diff --git a/src/pkg/encoding/binary/Makefile b/src/pkg/encoding/binary/Makefile
@@ -7,5 +7,6 @@ include ../../../Make.inc
 TARG=encoding/binary
 GOFILES=\
 	binary.go\
+        varint.go\
 
 include ../../../Make.pkg
diff --git a/src/pkg/encoding/binary/binary.go b/src/pkg/encoding/binary/binary.go
@@ -17,9 +17,9 @@ import (
 // A ByteOrder specifies how to convert byte sequences into
 // 16-, 32-, or 64-bit unsigned integers.
 type ByteOrder interface {
-	Uint16(b []byte) uint16
-	Uint32(b []byte) uint32
-	Uint64(b []byte) uint64
+	Uint16([]byte) uint16
+	Uint32([]byte) uint32
+	Uint64([]byte) uint64
 	PutUint16([]byte, uint16)
 	PutUint32([]byte, uint32)
 	PutUint64([]byte, uint64)

diff --git a/src/pkg/encoding/binary/varint.go b/src/pkg/encoding/binary/varint.go
@@ -0,0 +1,163 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binary
+
+// This file implements "varint" encoding of 64-bit integers.
+// The encoding is:
+// - unsigned integers are serialized 7 bits at a time, starting with the
+//   least significant bits
+// - the most significant bit (msb) in each output byte indicates if there
+//   is a continuation byte (msb = 1)
+// - signed integers are mapped to unsigned integers using "zig-zag"
+//   encoding: Positive values x are written as 2*x + 0, negative values
+//   are written as 2*(^x) + 1; that is, negative numbers are complemented
+//   and whether to complement is encoded in bit 0.
+//
+// Design note:
+// At most 10 bytes are needed for 64-bit values. The encoding could
+// be more dense: a full 64-bit value needs an extra byte just to hold bit 63.
+// Instead, the msb of the previous byte could be used to hold bit 63 since we
+// know there can't be more than 64 bits. This is a trivial improvement and
+// would reduce the maximum encoding length to 9 bytes. However, it breaks the
+// invariant that the msb is always the "continuation bit" and thus makes the
+// format incompatible with a varint encoding for larger numbers (say 128-bit).
+
+import (
+	"io"
+	"os"
+)
+
+// MaxVarintLenN is the maximum length of a varint-encoded N-bit integer.
+const (
+	MaxVarintLen16 = 3
+	MaxVarintLen32 = 5
+	MaxVarintLen64 = 10
+)
+
+// PutUvarint encodes a uint64 into buf and returns the number of bytes written.
+// If the buffer is too small, the result is the negated number of bytes required
+// (that is, -PutUvarint(nil, x) is the number of bytes required to encode x).
+func PutUvarint(buf []byte, x uint64) int {
+	var i int
+	for i = range buf {
+		if x < 0x80 {
+			buf[i] = byte(x)
+			return i + 1
+		}
+		buf[i] = byte(x) | 0x80
+		x >>= 7
+	}
+	// buffer too small; compute number of bytes required
+	for x >= 0x4000 {
+		x >>= 2 * 7
+		i += 2
+	}
+	if x >= 0x80 {
+		i++
+	}
+	return -(i + 1)
+}
+
+// Uvarint decodes a uint64 from buf and returns that value and the
+// number of bytes read (> 0). If an error occurred, the value is 0
+// and the number of bytes n is <= 0 meaning:
+//
+//	n == 0: buf too small
+//	n  < 0: value larger than 64 bits (overflow)
+//              and -n is the number of bytes read
+//
+func Uvarint(buf []byte) (uint64, int) {
+	var x uint64
+	var s uint
+	for i, b := range buf {
+		if b < 0x80 {
+			if i > 9 || i == 9 && b > 1 {
+				return 0, -(i + 1) // overflow
+			}
+			return x | uint64(b)<<s, i + 1
+		}
+		x |= uint64(b&0x7f) << s
+		s += 7
+	}
+	return 0, 0
+}
+
+// PutVarint encodes an int64 into buf and returns the number of bytes written.
+// If the buffer is too small, the result is the negated number of bytes required
+// (that is, -PutVarint(nil, x) is the number of bytes required to encode x).
+func PutVarint(buf []byte, x int64) int {
+	ux := uint64(x) << 1
+	if x < 0 {
+		ux = ^ux
+	}
+	return PutUvarint(buf, ux)
+}
+
+// Varint decodes an int64 from buf and returns that value and the
+// number of bytes read (> 0). If an error occurred, the value is 0
+// and the number of bytes n is <= 0 with the following meaning:
+//
+//	n == 0: buf too small
+//	n  < 0: value larger than 64 bits (overflow)
+//              and -n is the number of bytes read
+//
+func Varint(buf []byte) (int64, int) {
+	ux, n := Uvarint(buf) // ok to continue in presence of error
+	x := int64(ux >> 1)
+	if ux&1 != 0 {
+		x = ^x
+	}
+	return x, n
+}
+
+// WriteUvarint encodes x and writes the result to w.
+func WriteUvarint(w io.Writer, x uint64) os.Error {
+	var buf [MaxVarintLen64]byte
+	n := PutUvarint(buf[:], x) // won't fail
+	_, err := w.Write(buf[0:n])
+	return err
+}
+
+var overflow = os.NewError("binary: varint overflows a 64-bit integer")
+
+// ReadUvarint reads an encoded unsigned integer from r and returns it as a uint64.
+func ReadUvarint(r io.ByteReader) (uint64, os.Error) {
+	var x uint64
+	var s uint
+	for i := 0; ; i++ {
+		b, err := r.ReadByte()
+		if err != nil {
+			return x, err
+		}
+		if b < 0x80 {
+			if i > 9 || i == 9 && b > 1 {
+				return x, overflow
+			}
+			return x | uint64(b)<<s, nil
+		}
+		x |= uint64(b&0x7f) << s
+		s += 7
+	}
+	panic("unreachable")
+}
+
+// WriteVarint encodes x and writes the result to w.
+func WriteVarint(w io.Writer, x int64) os.Error {
+	ux := uint64(x) << 1
+	if x < 0 {
+		ux = ^ux
+	}
+	return WriteUvarint(w, ux)
+}
+
+// ReadVarint reads an encoded unsigned integer from r and returns it as a uint64.
+func ReadVarint(r io.ByteReader) (int64, os.Error) {
+	ux, err := ReadUvarint(r) // ok to continue in presence of error
+	x := int64(ux >> 1)
+	if ux&1 != 0 {
+		x = ^x
+	}
+	return x, err
+}
diff --git a/src/pkg/encoding/binary/varint_test.go b/src/pkg/encoding/binary/varint_test.go
@@ -0,0 +1,175 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binary
+
+import (
+	"bytes"
+	"os"
+	"testing"
+)
+
+func testConstant(t *testing.T, w uint, max int) {
+	n := -PutUvarint(nil, 1<<w-1)
+	if n != max {
+		t.Errorf("MaxVarintLen%d = %d; want %d", w, max, n)
+	}
+}
+
+func TestConstants(t *testing.T) {
+	testConstant(t, 16, MaxVarintLen16)
+	testConstant(t, 32, MaxVarintLen32)
+	testConstant(t, 64, MaxVarintLen64)
+}
+
+func testVarint(t *testing.T, x int64) {
+	var buf1 [10]byte
+	n := PutVarint(buf1[:], x)
+	y, m := Varint(buf1[0:n])
+	if x != y {
+		t.Errorf("Varint(%d): got %d", x, y)
+	}
+	if n != m {
+		t.Errorf("Varint(%d): got n = %d; want %d", x, m, n)
+	}
+
+	var buf2 bytes.Buffer
+	err := WriteVarint(&buf2, x)
+	if err != nil {
+		t.Errorf("WriteVarint(%d): %s", x, err)
+	}
+	if n != buf2.Len() {
+		t.Errorf("WriteVarint(%d): got n = %d; want %d", x, buf2.Len(), n)
+	}
+	y, err = ReadVarint(&buf2)
+	if err != nil {
+		t.Errorf("ReadVarint(%d): %s", x, err)
+	}
+	if x != y {
+		t.Errorf("ReadVarint(%d): got %d", x, y)
+	}
+}
+
+func testUvarint(t *testing.T, x uint64) {
+	var buf1 [10]byte
+	n := PutUvarint(buf1[:], x)
+	y, m := Uvarint(buf1[0:n])
+	if x != y {
+		t.Errorf("Uvarint(%d): got %d", x, y)
+	}
+	if n != m {
+		t.Errorf("Uvarint(%d): got n = %d; want %d", x, m, n)
+	}
+
+	var buf2 bytes.Buffer
+	err := WriteUvarint(&buf2, x)
+	if err != nil {
+		t.Errorf("WriteUvarint(%d): %s", x, err)
+	}
+	if n != buf2.Len() {
+		t.Errorf("WriteUvarint(%d): got n = %d; want %d", x, buf2.Len(), n)
+	}
+	y, err = ReadUvarint(&buf2)
+	if err != nil {
+		t.Errorf("ReadUvarint(%d): %s", x, err)
+	}
+	if x != y {
+		t.Errorf("ReadUvarint(%d): got %d", x, y)
+	}
+}
+
+var tests = []int64{
+	-1 << 63,
+	-1<<63 + 1,
+	-1,
+	0,
+	1,
+	2,
+	10,
+	20,
+	63,
+	64,
+	65,
+	127,
+	128,
+	129,
+	255,
+	256,
+	257,
+	1<<63 - 1,
+}
+
+func TestVarint(t *testing.T) {
+	for _, x := range tests {
+		testVarint(t, x)
+		testVarint(t, -x)
+	}
+	for x := int64(0x7); x != 0; x <<= 1 {
+		testVarint(t, x)
+		testVarint(t, -x)
+	}
+}
+
+func TestUvarint(t *testing.T) {
+	for _, x := range tests {
+		testUvarint(t, uint64(x))
+	}
+	for x := uint64(0x7); x != 0; x <<= 1 {
+		testUvarint(t, x)
+	}
+}
+
+func TestBufferTooSmall(t *testing.T) {
+	for i := 0; i < 10; i++ {
+		buf := make([]byte, i)
+		x := uint64(1) << (uint(i) * 7)
+		n0 := -i
+		if i == 0 {
+			n0 = -1 // encoding 0 takes one byte
+		}
+		if n := PutUvarint(buf, x); n != n0 {
+			t.Errorf("PutUvarint([%d]byte, %d): got n = %d; want %d", len(buf), x, n, n0)
+		}
+	}
+
+	buf := []byte{0x80, 0x80, 0x80, 0x80}
+	for i := 0; i <= len(buf); i++ {
+		buf := buf[0:i]
+		x, n := Uvarint(buf)
+		if x != 0 || n != 0 {
+			t.Errorf("Uvarint(%v): got x = %d, n = %d", buf, x, n)
+		}
+
+		x, err := ReadUvarint(bytes.NewBuffer(buf))
+		if x != 0 || err != os.EOF {
+			t.Errorf("ReadUvarint(%v): got x = %d, err = %s", buf, x, err)
+		}
+	}
+}
+
+func testOverflow(t *testing.T, buf []byte, n0 int, err0 os.Error) {
+	x, n := Uvarint(buf)
+	if x != 0 || n != n0 {
+		t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, %d", buf, x, n, n0)
+	}
+
+	x, err := ReadUvarint(bytes.NewBuffer(buf))
+	if x != 0 || err != err0 {
+		t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want 0, %s", buf, x, err, err0)
+	}
+}
+
+func TestOverflow(t *testing.T) {
+	testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, -10, overflow)
+	testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, -13, overflow)
+}
+
+func TestNonCanonicalZero(t *testing.T) {
+	buf := []byte{0x80, 0x80, 0x80, 0}
+	x, n := Uvarint(buf)
+	if x != 0 || n != 4 {
+		t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, 4", buf, x, n)
+
+	}
+}