Skip to content

Commit

Permalink
encoding/binary: support for varint encoding
Browse files Browse the repository at this point in the history
R=rsc, r, nigeltao, r, dsymonds
CC=golang-dev
https://golang.org/cl/5146048
  • Loading branch information
griesemer committed Sep 29, 2011
1 parent b741369 commit f30719d
Show file tree
Hide file tree
Showing 4 changed files with 342 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/pkg/encoding/binary/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ include ../../../Make.inc
TARG=encoding/binary
GOFILES=\
binary.go\
varint.go\

include ../../../Make.pkg
6 changes: 3 additions & 3 deletions src/pkg/encoding/binary/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ import (
// A ByteOrder specifies how to convert byte sequences into
// 16-, 32-, or 64-bit unsigned integers.
type ByteOrder interface {
Uint16(b []byte) uint16
Uint32(b []byte) uint32
Uint64(b []byte) uint64
Uint16([]byte) uint16
Uint32([]byte) uint32
Uint64([]byte) uint64
PutUint16([]byte, uint16)
PutUint32([]byte, uint32)
PutUint64([]byte, uint64)
Expand Down
163 changes: 163 additions & 0 deletions src/pkg/encoding/binary/varint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package binary

// This file implements "varint" encoding of 64-bit integers.
// The encoding is:
// - unsigned integers are serialized 7 bits at a time, starting with the
// least significant bits
// - the most significant bit (msb) in each output byte indicates if there
// is a continuation byte (msb = 1)
// - signed integers are mapped to unsigned integers using "zig-zag"
// encoding: Positive values x are written as 2*x + 0, negative values
// are written as 2*(^x) + 1; that is, negative numbers are complemented
// and whether to complement is encoded in bit 0.
//
// Design note:
// At most 10 bytes are needed for 64-bit values. The encoding could
// be more dense: a full 64-bit value needs an extra byte just to hold bit 63.
// Instead, the msb of the previous byte could be used to hold bit 63 since we
// know there can't be more than 64 bits. This is a trivial improvement and
// would reduce the maximum encoding length to 9 bytes. However, it breaks the
// invariant that the msb is always the "continuation bit" and thus makes the
// format incompatible with a varint encoding for larger numbers (say 128-bit).

import (
"io"
"os"
)

// MaxVarintLenN is the maximum length of a varint-encoded N-bit integer.
const (
MaxVarintLen16 = 3
MaxVarintLen32 = 5
MaxVarintLen64 = 10
)

// PutUvarint encodes a uint64 into buf and returns the number of bytes written.
// If the buffer is too small, the result is the negated number of bytes required
// (that is, -PutUvarint(nil, x) is the number of bytes required to encode x).
func PutUvarint(buf []byte, x uint64) int {
var i int
for i = range buf {
if x < 0x80 {
buf[i] = byte(x)
return i + 1
}
buf[i] = byte(x) | 0x80
x >>= 7
}
// buffer too small; compute number of bytes required
for x >= 0x4000 {
x >>= 2 * 7
i += 2
}
if x >= 0x80 {
i++
}
return -(i + 1)
}

// Uvarint decodes a uint64 from buf and returns that value and the
// number of bytes read (> 0). If an error occurred, the value is 0
// and the number of bytes n is <= 0 meaning:
//
// n == 0: buf too small
// n < 0: value larger than 64 bits (overflow)
// and -n is the number of bytes read
//
func Uvarint(buf []byte) (uint64, int) {
var x uint64
var s uint
for i, b := range buf {
if b < 0x80 {
if i > 9 || i == 9 && b > 1 {
return 0, -(i + 1) // overflow
}
return x | uint64(b)<<s, i + 1
}
x |= uint64(b&0x7f) << s
s += 7
}
return 0, 0
}

// PutVarint encodes an int64 into buf and returns the number of bytes written.
// If the buffer is too small, the result is the negated number of bytes required
// (that is, -PutVarint(nil, x) is the number of bytes required to encode x).
func PutVarint(buf []byte, x int64) int {
ux := uint64(x) << 1
if x < 0 {
ux = ^ux
}
return PutUvarint(buf, ux)
}

// Varint decodes an int64 from buf and returns that value and the
// number of bytes read (> 0). If an error occurred, the value is 0
// and the number of bytes n is <= 0 with the following meaning:
//
// n == 0: buf too small
// n < 0: value larger than 64 bits (overflow)
// and -n is the number of bytes read
//
func Varint(buf []byte) (int64, int) {
ux, n := Uvarint(buf) // ok to continue in presence of error
x := int64(ux >> 1)
if ux&1 != 0 {
x = ^x
}
return x, n
}

// WriteUvarint encodes x and writes the result to w.
func WriteUvarint(w io.Writer, x uint64) os.Error {
var buf [MaxVarintLen64]byte
n := PutUvarint(buf[:], x) // won't fail
_, err := w.Write(buf[0:n])
return err
}

var overflow = os.NewError("binary: varint overflows a 64-bit integer")

// ReadUvarint reads an encoded unsigned integer from r and returns it as a uint64.
func ReadUvarint(r io.ByteReader) (uint64, os.Error) {
var x uint64
var s uint
for i := 0; ; i++ {
b, err := r.ReadByte()
if err != nil {
return x, err
}
if b < 0x80 {
if i > 9 || i == 9 && b > 1 {
return x, overflow
}
return x | uint64(b)<<s, nil
}
x |= uint64(b&0x7f) << s
s += 7
}
panic("unreachable")
}

// WriteVarint encodes x and writes the result to w.
func WriteVarint(w io.Writer, x int64) os.Error {
ux := uint64(x) << 1
if x < 0 {
ux = ^ux
}
return WriteUvarint(w, ux)
}

// ReadVarint reads an encoded unsigned integer from r and returns it as a uint64.
func ReadVarint(r io.ByteReader) (int64, os.Error) {
ux, err := ReadUvarint(r) // ok to continue in presence of error
x := int64(ux >> 1)
if ux&1 != 0 {
x = ^x
}
return x, err
}
175 changes: 175 additions & 0 deletions src/pkg/encoding/binary/varint_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package binary

import (
"bytes"
"os"
"testing"
)

func testConstant(t *testing.T, w uint, max int) {
n := -PutUvarint(nil, 1<<w-1)
if n != max {
t.Errorf("MaxVarintLen%d = %d; want %d", w, max, n)
}
}

func TestConstants(t *testing.T) {
testConstant(t, 16, MaxVarintLen16)
testConstant(t, 32, MaxVarintLen32)
testConstant(t, 64, MaxVarintLen64)
}

func testVarint(t *testing.T, x int64) {
var buf1 [10]byte
n := PutVarint(buf1[:], x)
y, m := Varint(buf1[0:n])
if x != y {
t.Errorf("Varint(%d): got %d", x, y)
}
if n != m {
t.Errorf("Varint(%d): got n = %d; want %d", x, m, n)
}

var buf2 bytes.Buffer
err := WriteVarint(&buf2, x)
if err != nil {
t.Errorf("WriteVarint(%d): %s", x, err)
}
if n != buf2.Len() {
t.Errorf("WriteVarint(%d): got n = %d; want %d", x, buf2.Len(), n)
}
y, err = ReadVarint(&buf2)
if err != nil {
t.Errorf("ReadVarint(%d): %s", x, err)
}
if x != y {
t.Errorf("ReadVarint(%d): got %d", x, y)
}
}

func testUvarint(t *testing.T, x uint64) {
var buf1 [10]byte
n := PutUvarint(buf1[:], x)
y, m := Uvarint(buf1[0:n])
if x != y {
t.Errorf("Uvarint(%d): got %d", x, y)
}
if n != m {
t.Errorf("Uvarint(%d): got n = %d; want %d", x, m, n)
}

var buf2 bytes.Buffer
err := WriteUvarint(&buf2, x)
if err != nil {
t.Errorf("WriteUvarint(%d): %s", x, err)
}
if n != buf2.Len() {
t.Errorf("WriteUvarint(%d): got n = %d; want %d", x, buf2.Len(), n)
}
y, err = ReadUvarint(&buf2)
if err != nil {
t.Errorf("ReadUvarint(%d): %s", x, err)
}
if x != y {
t.Errorf("ReadUvarint(%d): got %d", x, y)
}
}

var tests = []int64{
-1 << 63,
-1<<63 + 1,
-1,
0,
1,
2,
10,
20,
63,
64,
65,
127,
128,
129,
255,
256,
257,
1<<63 - 1,
}

func TestVarint(t *testing.T) {
for _, x := range tests {
testVarint(t, x)
testVarint(t, -x)
}
for x := int64(0x7); x != 0; x <<= 1 {
testVarint(t, x)
testVarint(t, -x)
}
}

func TestUvarint(t *testing.T) {
for _, x := range tests {
testUvarint(t, uint64(x))
}
for x := uint64(0x7); x != 0; x <<= 1 {
testUvarint(t, x)
}
}

func TestBufferTooSmall(t *testing.T) {
for i := 0; i < 10; i++ {
buf := make([]byte, i)
x := uint64(1) << (uint(i) * 7)
n0 := -i
if i == 0 {
n0 = -1 // encoding 0 takes one byte
}
if n := PutUvarint(buf, x); n != n0 {
t.Errorf("PutUvarint([%d]byte, %d): got n = %d; want %d", len(buf), x, n, n0)
}
}

buf := []byte{0x80, 0x80, 0x80, 0x80}
for i := 0; i <= len(buf); i++ {
buf := buf[0:i]
x, n := Uvarint(buf)
if x != 0 || n != 0 {
t.Errorf("Uvarint(%v): got x = %d, n = %d", buf, x, n)
}

x, err := ReadUvarint(bytes.NewBuffer(buf))
if x != 0 || err != os.EOF {
t.Errorf("ReadUvarint(%v): got x = %d, err = %s", buf, x, err)
}
}
}

func testOverflow(t *testing.T, buf []byte, n0 int, err0 os.Error) {
x, n := Uvarint(buf)
if x != 0 || n != n0 {
t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, %d", buf, x, n, n0)
}

x, err := ReadUvarint(bytes.NewBuffer(buf))
if x != 0 || err != err0 {
t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want 0, %s", buf, x, err, err0)
}
}

func TestOverflow(t *testing.T) {
testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, -10, overflow)
testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, -13, overflow)
}

func TestNonCanonicalZero(t *testing.T) {
buf := []byte{0x80, 0x80, 0x80, 0}
x, n := Uvarint(buf)
if x != 0 || n != 4 {
t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, 4", buf, x, n)

}
}

0 comments on commit f30719d

Please sign in to comment.