Skip to content

Commit

Permalink
archive/tar: fix bugs with sparseFileReader
Browse files Browse the repository at this point in the history
The sparseFileReader is prone to two different forms of
denial-of-service attacks:
* A malicious tar file can cause an infinite loop
* A malicious tar file can cause arbitrary panics

This results because of poor error checking/handling, which this
CL fixes. While we are at it, add a plethora of unit tests to
test for possible malicious inputs.

Change-Id: I2f9446539d189f3c1738a1608b0ad4859c1be929
Reviewed-on: https://go-review.googlesource.com/15115
Reviewed-by: Andrew Gerrand <adg@golang.org>
Run-TryBot: Andrew Gerrand <adg@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
  • Loading branch information
dsnet authored and adg committed Oct 1, 2015
1 parent b179739 commit 79480ca
Show file tree
Hide file tree
Showing 2 changed files with 232 additions and 101 deletions.
139 changes: 96 additions & 43 deletions src/archive/tar/reader.go
Expand Up @@ -12,6 +12,7 @@ import (
"errors"
"io"
"io/ioutil"
"math"
"os"
"strconv"
"strings"
Expand Down Expand Up @@ -49,12 +50,36 @@ type regFileReader struct {
nb int64 // number of unread bytes for current file entry
}

// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
// A sparseFileReader is a numBytesReader for reading sparse file data from a
// tar archive.
type sparseFileReader struct {
rfr *regFileReader // reads the sparse-encoded file data
sp []sparseEntry // the sparse map for the file
pos int64 // keeps track of file position
tot int64 // total size of the file
rfr numBytesReader // Reads the sparse-encoded file data
sp []sparseEntry // The sparse map for the file
pos int64 // Keeps track of file position
total int64 // Total size of the file
}

// A sparseEntry holds a single entry in a sparse file's sparse map.
//
// Sparse files are represented using a series of sparseEntrys.
// Despite the name, a sparseEntry represents an actual data fragment that
// references data found in the underlying archive stream. All regions not
// covered by a sparseEntry are logically filled with zeros.
//
// For example, if the underlying raw file contains the 10-byte data:
// var compactData = "abcdefgh"
//
// And the sparse map has the following entries:
// var sp = []sparseEntry{
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
// }
//
// Then the content of the resulting sparse file with a "real" size of 25 is:
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type sparseEntry struct {
offset int64 // Starting position of the fragment
numBytes int64 // Length of the fragment
}

// Keywords for GNU sparse files in a PAX extended header
Expand Down Expand Up @@ -128,7 +153,10 @@ func (tr *Reader) Next() (*Header, error) {
if sp != nil {
// Current file is a PAX format GNU sparse file.
// Set the current file reader to a sparse file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil, tr.err
}
}
return hdr, nil
case TypeGNULongName:
Expand Down Expand Up @@ -541,21 +569,17 @@ func (tr *Reader) readHeader() *Header {
if tr.err != nil {
return nil
}

// Current file is a GNU sparse file. Update the current file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil
}
}

return hdr
}

// A sparseEntry holds a single entry in a sparse file's sparse map.
// A sparse entry indicates the offset and size in a sparse file of a
// block of data.
type sparseEntry struct {
offset int64
numBytes int64
}

// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map.
Expand Down Expand Up @@ -771,9 +795,33 @@ func (rfr *regFileReader) numBytes() int64 {
return rfr.nb
}

// readHole reads a sparse file hole ending at offset toOffset
func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
n64 := toOffset - sfr.pos
// newSparseFileReader creates a new sparseFileReader, but validates all of the
// sparse entries before doing so.
func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
if total < 0 {
return nil, ErrHeader // Total size cannot be negative
}

// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
for i, s := range sp {
switch {
case s.offset < 0 || s.numBytes < 0:
return nil, ErrHeader // Negative values are never okay
case s.offset > math.MaxInt64-s.numBytes:
return nil, ErrHeader // Integer overflow with large length
case s.offset+s.numBytes > total:
return nil, ErrHeader // Region extends beyond the "real" size
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
return nil, ErrHeader // Regions can't overlap and must be in order
}
}
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
}

// readHole reads a sparse hole ending at endOffset.
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
n64 := endOffset - sfr.pos
if n64 > int64(len(b)) {
n64 = int64(len(b))
}
Expand All @@ -787,49 +835,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {

// Read reads the sparse file data in expanded form.
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
// Skip past all empty fragments.
for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
sfr.sp = sfr.sp[1:]
}

// If there are no more fragments, then it is possible that there
// is one last sparse hole.
if len(sfr.sp) == 0 {
// No more data fragments to read from.
if sfr.pos < sfr.tot {
// We're in the last hole
n = sfr.readHole(b, sfr.tot)
return
// This behavior matches the BSD tar utility.
// However, GNU tar stops returning data even if sfr.total is unmet.
if sfr.pos < sfr.total {
return sfr.readHole(b, sfr.total), nil
}
// Otherwise, we're at the end of the file
return 0, io.EOF
}
if sfr.tot < sfr.sp[0].offset {
return 0, io.ErrUnexpectedEOF
}

// In front of a data fragment, so read a hole.
if sfr.pos < sfr.sp[0].offset {
// We're in a hole
n = sfr.readHole(b, sfr.sp[0].offset)
return
return sfr.readHole(b, sfr.sp[0].offset), nil
}

// We're not in a hole, so we'll read from the next data fragment
posInFragment := sfr.pos - sfr.sp[0].offset
bytesLeft := sfr.sp[0].numBytes - posInFragment
// In a data fragment, so read from it.
// This math is overflow free since we verify that offset and numBytes can
// be safely added when creating the sparseFileReader.
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
bytesLeft := endPos - sfr.pos // Bytes left in fragment
if int64(len(b)) > bytesLeft {
b = b[0:bytesLeft]
b = b[:bytesLeft]
}

n, err = sfr.rfr.Read(b)
sfr.pos += int64(n)

if int64(n) == bytesLeft {
// We're done with this fragment
sfr.sp = sfr.sp[1:]
if err == io.EOF {
if sfr.pos < endPos {
err = io.ErrUnexpectedEOF // There was supposed to be more data
} else if sfr.pos < sfr.total {
err = nil // There is still an implicit sparse hole at the end
}
}

if err == io.EOF && sfr.pos < sfr.tot {
// We reached the end of the last fragment's data, but there's a final hole
err = nil
if sfr.pos == endPos {
sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
}
return
return n, err
}

// numBytes returns the number of bytes left to read in the sparse file's
// sparse-encoded data in the tar archive.
func (sfr *sparseFileReader) numBytes() int64 {
return sfr.rfr.nb
return sfr.rfr.numBytes()
}

0 comments on commit 79480ca

Please sign in to comment.