-
Notifications
You must be signed in to change notification settings - Fork 17.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
encoding/binary: support for varint encoding
R=rsc, r, nigeltao, r, dsymonds CC=golang-dev https://golang.org/cl/5146048
- Loading branch information
Showing
4 changed files
with
342 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,5 +7,6 @@ include ../../../Make.inc | |
TARG=encoding/binary | ||
GOFILES=\ | ||
binary.go\ | ||
varint.go\ | ||
|
||
include ../../../Make.pkg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
// Copyright 2011 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package binary | ||
|
||
// This file implements "varint" encoding of 64-bit integers. | ||
// The encoding is: | ||
// - unsigned integers are serialized 7 bits at a time, starting with the | ||
// least significant bits | ||
// - the most significant bit (msb) in each output byte indicates if there | ||
// is a continuation byte (msb = 1) | ||
// - signed integers are mapped to unsigned integers using "zig-zag" | ||
// encoding: Positive values x are written as 2*x + 0, negative values | ||
// are written as 2*(^x) + 1; that is, negative numbers are complemented | ||
// and whether to complement is encoded in bit 0. | ||
// | ||
// Design note: | ||
// At most 10 bytes are needed for 64-bit values. The encoding could | ||
// be more dense: a full 64-bit value needs an extra byte just to hold bit 63. | ||
// Instead, the msb of the previous byte could be used to hold bit 63 since we | ||
// know there can't be more than 64 bits. This is a trivial improvement and | ||
// would reduce the maximum encoding length to 9 bytes. However, it breaks the | ||
// invariant that the msb is always the "continuation bit" and thus makes the | ||
// format incompatible with a varint encoding for larger numbers (say 128-bit). | ||
|
||
import ( | ||
"io" | ||
"os" | ||
) | ||
|
||
// MaxVarintLenN is the maximum length of a varint-encoded N-bit integer. | ||
const ( | ||
MaxVarintLen16 = 3 | ||
MaxVarintLen32 = 5 | ||
MaxVarintLen64 = 10 | ||
) | ||
|
||
// PutUvarint encodes a uint64 into buf and returns the number of bytes written. | ||
// If the buffer is too small, the result is the negated number of bytes required | ||
// (that is, -PutUvarint(nil, x) is the number of bytes required to encode x). | ||
func PutUvarint(buf []byte, x uint64) int { | ||
var i int | ||
for i = range buf { | ||
if x < 0x80 { | ||
buf[i] = byte(x) | ||
return i + 1 | ||
} | ||
buf[i] = byte(x) | 0x80 | ||
x >>= 7 | ||
} | ||
// buffer too small; compute number of bytes required | ||
for x >= 0x4000 { | ||
x >>= 2 * 7 | ||
i += 2 | ||
} | ||
if x >= 0x80 { | ||
i++ | ||
} | ||
return -(i + 1) | ||
} | ||
|
||
// Uvarint decodes a uint64 from buf and returns that value and the | ||
// number of bytes read (> 0). If an error occurred, the value is 0 | ||
// and the number of bytes n is <= 0 meaning: | ||
// | ||
// n == 0: buf too small | ||
// n < 0: value larger than 64 bits (overflow) | ||
// and -n is the number of bytes read | ||
// | ||
func Uvarint(buf []byte) (uint64, int) { | ||
var x uint64 | ||
var s uint | ||
for i, b := range buf { | ||
if b < 0x80 { | ||
if i > 9 || i == 9 && b > 1 { | ||
return 0, -(i + 1) // overflow | ||
} | ||
return x | uint64(b)<<s, i + 1 | ||
} | ||
x |= uint64(b&0x7f) << s | ||
s += 7 | ||
} | ||
return 0, 0 | ||
} | ||
|
||
// PutVarint encodes an int64 into buf and returns the number of bytes written. | ||
// If the buffer is too small, the result is the negated number of bytes required | ||
// (that is, -PutVarint(nil, x) is the number of bytes required to encode x). | ||
func PutVarint(buf []byte, x int64) int { | ||
ux := uint64(x) << 1 | ||
if x < 0 { | ||
ux = ^ux | ||
} | ||
return PutUvarint(buf, ux) | ||
} | ||
|
||
// Varint decodes an int64 from buf and returns that value and the | ||
// number of bytes read (> 0). If an error occurred, the value is 0 | ||
// and the number of bytes n is <= 0 with the following meaning: | ||
// | ||
// n == 0: buf too small | ||
// n < 0: value larger than 64 bits (overflow) | ||
// and -n is the number of bytes read | ||
// | ||
func Varint(buf []byte) (int64, int) { | ||
ux, n := Uvarint(buf) // ok to continue in presence of error | ||
x := int64(ux >> 1) | ||
if ux&1 != 0 { | ||
x = ^x | ||
} | ||
return x, n | ||
} | ||
|
||
// WriteUvarint encodes x and writes the result to w. | ||
func WriteUvarint(w io.Writer, x uint64) os.Error { | ||
var buf [MaxVarintLen64]byte | ||
n := PutUvarint(buf[:], x) // won't fail | ||
_, err := w.Write(buf[0:n]) | ||
return err | ||
} | ||
|
||
var overflow = os.NewError("binary: varint overflows a 64-bit integer") | ||
|
||
// ReadUvarint reads an encoded unsigned integer from r and returns it as a uint64. | ||
func ReadUvarint(r io.ByteReader) (uint64, os.Error) { | ||
var x uint64 | ||
var s uint | ||
for i := 0; ; i++ { | ||
b, err := r.ReadByte() | ||
if err != nil { | ||
return x, err | ||
} | ||
if b < 0x80 { | ||
if i > 9 || i == 9 && b > 1 { | ||
return x, overflow | ||
} | ||
return x | uint64(b)<<s, nil | ||
} | ||
x |= uint64(b&0x7f) << s | ||
s += 7 | ||
} | ||
panic("unreachable") | ||
} | ||
|
||
// WriteVarint encodes x and writes the result to w. | ||
func WriteVarint(w io.Writer, x int64) os.Error { | ||
ux := uint64(x) << 1 | ||
if x < 0 { | ||
ux = ^ux | ||
} | ||
return WriteUvarint(w, ux) | ||
} | ||
|
||
// ReadVarint reads an encoded unsigned integer from r and returns it as a uint64. | ||
func ReadVarint(r io.ByteReader) (int64, os.Error) { | ||
ux, err := ReadUvarint(r) // ok to continue in presence of error | ||
x := int64(ux >> 1) | ||
if ux&1 != 0 { | ||
x = ^x | ||
} | ||
return x, err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
// Copyright 2011 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package binary | ||
|
||
import ( | ||
"bytes" | ||
"os" | ||
"testing" | ||
) | ||
|
||
func testConstant(t *testing.T, w uint, max int) { | ||
n := -PutUvarint(nil, 1<<w-1) | ||
if n != max { | ||
t.Errorf("MaxVarintLen%d = %d; want %d", w, max, n) | ||
} | ||
} | ||
|
||
func TestConstants(t *testing.T) { | ||
testConstant(t, 16, MaxVarintLen16) | ||
testConstant(t, 32, MaxVarintLen32) | ||
testConstant(t, 64, MaxVarintLen64) | ||
} | ||
|
||
func testVarint(t *testing.T, x int64) { | ||
var buf1 [10]byte | ||
n := PutVarint(buf1[:], x) | ||
y, m := Varint(buf1[0:n]) | ||
if x != y { | ||
t.Errorf("Varint(%d): got %d", x, y) | ||
} | ||
if n != m { | ||
t.Errorf("Varint(%d): got n = %d; want %d", x, m, n) | ||
} | ||
|
||
var buf2 bytes.Buffer | ||
err := WriteVarint(&buf2, x) | ||
if err != nil { | ||
t.Errorf("WriteVarint(%d): %s", x, err) | ||
} | ||
if n != buf2.Len() { | ||
t.Errorf("WriteVarint(%d): got n = %d; want %d", x, buf2.Len(), n) | ||
} | ||
y, err = ReadVarint(&buf2) | ||
if err != nil { | ||
t.Errorf("ReadVarint(%d): %s", x, err) | ||
} | ||
if x != y { | ||
t.Errorf("ReadVarint(%d): got %d", x, y) | ||
} | ||
} | ||
|
||
func testUvarint(t *testing.T, x uint64) { | ||
var buf1 [10]byte | ||
n := PutUvarint(buf1[:], x) | ||
y, m := Uvarint(buf1[0:n]) | ||
if x != y { | ||
t.Errorf("Uvarint(%d): got %d", x, y) | ||
} | ||
if n != m { | ||
t.Errorf("Uvarint(%d): got n = %d; want %d", x, m, n) | ||
} | ||
|
||
var buf2 bytes.Buffer | ||
err := WriteUvarint(&buf2, x) | ||
if err != nil { | ||
t.Errorf("WriteUvarint(%d): %s", x, err) | ||
} | ||
if n != buf2.Len() { | ||
t.Errorf("WriteUvarint(%d): got n = %d; want %d", x, buf2.Len(), n) | ||
} | ||
y, err = ReadUvarint(&buf2) | ||
if err != nil { | ||
t.Errorf("ReadUvarint(%d): %s", x, err) | ||
} | ||
if x != y { | ||
t.Errorf("ReadUvarint(%d): got %d", x, y) | ||
} | ||
} | ||
|
||
var tests = []int64{ | ||
-1 << 63, | ||
-1<<63 + 1, | ||
-1, | ||
0, | ||
1, | ||
2, | ||
10, | ||
20, | ||
63, | ||
64, | ||
65, | ||
127, | ||
128, | ||
129, | ||
255, | ||
256, | ||
257, | ||
1<<63 - 1, | ||
} | ||
|
||
func TestVarint(t *testing.T) { | ||
for _, x := range tests { | ||
testVarint(t, x) | ||
testVarint(t, -x) | ||
} | ||
for x := int64(0x7); x != 0; x <<= 1 { | ||
testVarint(t, x) | ||
testVarint(t, -x) | ||
} | ||
} | ||
|
||
func TestUvarint(t *testing.T) { | ||
for _, x := range tests { | ||
testUvarint(t, uint64(x)) | ||
} | ||
for x := uint64(0x7); x != 0; x <<= 1 { | ||
testUvarint(t, x) | ||
} | ||
} | ||
|
||
func TestBufferTooSmall(t *testing.T) { | ||
for i := 0; i < 10; i++ { | ||
buf := make([]byte, i) | ||
x := uint64(1) << (uint(i) * 7) | ||
n0 := -i | ||
if i == 0 { | ||
n0 = -1 // encoding 0 takes one byte | ||
} | ||
if n := PutUvarint(buf, x); n != n0 { | ||
t.Errorf("PutUvarint([%d]byte, %d): got n = %d; want %d", len(buf), x, n, n0) | ||
} | ||
} | ||
|
||
buf := []byte{0x80, 0x80, 0x80, 0x80} | ||
for i := 0; i <= len(buf); i++ { | ||
buf := buf[0:i] | ||
x, n := Uvarint(buf) | ||
if x != 0 || n != 0 { | ||
t.Errorf("Uvarint(%v): got x = %d, n = %d", buf, x, n) | ||
} | ||
|
||
x, err := ReadUvarint(bytes.NewBuffer(buf)) | ||
if x != 0 || err != os.EOF { | ||
t.Errorf("ReadUvarint(%v): got x = %d, err = %s", buf, x, err) | ||
} | ||
} | ||
} | ||
|
||
func testOverflow(t *testing.T, buf []byte, n0 int, err0 os.Error) { | ||
x, n := Uvarint(buf) | ||
if x != 0 || n != n0 { | ||
t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, %d", buf, x, n, n0) | ||
} | ||
|
||
x, err := ReadUvarint(bytes.NewBuffer(buf)) | ||
if x != 0 || err != err0 { | ||
t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want 0, %s", buf, x, err, err0) | ||
} | ||
} | ||
|
||
func TestOverflow(t *testing.T) { | ||
testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, -10, overflow) | ||
testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, -13, overflow) | ||
} | ||
|
||
func TestNonCanonicalZero(t *testing.T) { | ||
buf := []byte{0x80, 0x80, 0x80, 0} | ||
x, n := Uvarint(buf) | ||
if x != 0 || n != 4 { | ||
t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, 4", buf, x, n) | ||
|
||
} | ||
} |