archive/tar: fix bugs with sparseFileReader

The sparseFileReader is prone to two different forms of denial-of-service attacks: * A malicious tar file can cause an infinite loop * A malicious tar file can cause arbitrary panics This results because of poor error checking/handling, which this CL fixes. While we are at it, add a plethora of unit tests to test for possible malicious inputs. Change-Id: I2f9446539d189f3c1738a1608b0ad4859c1be929 Reviewed-on: https://go-review.googlesource.com/15115 Reviewed-by: Andrew Gerrand <adg@golang.org> Run-TryBot: Andrew Gerrand <adg@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
golang · Oct 1, 2015 · 79480ca · 79480ca
1 parent b179739
commit 79480ca
Show file tree

Hide file tree

Showing 2 changed files with 232 additions and 101 deletions.
diff --git a/src/archive/tar/reader.go b/src/archive/tar/reader.go
@@ -12,6 +12,7 @@ import (
 	"errors"
 	"io"
 	"io/ioutil"
+	"math"
 	"os"
 	"strconv"
 	"strings"
@@ -49,12 +50,36 @@ type regFileReader struct {
 	nb int64     // number of unread bytes for current file entry
 }
 
-// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
+// A sparseFileReader is a numBytesReader for reading sparse file data from a
+// tar archive.
 type sparseFileReader struct {
-	rfr *regFileReader // reads the sparse-encoded file data
-	sp  []sparseEntry  // the sparse map for the file
-	pos int64          // keeps track of file position
-	tot int64          // total size of the file
+	rfr   numBytesReader // Reads the sparse-encoded file data
+	sp    []sparseEntry  // The sparse map for the file
+	pos   int64          // Keeps track of file position
+	total int64          // Total size of the file
+}
+
+// A sparseEntry holds a single entry in a sparse file's sparse map.
+//
+// Sparse files are represented using a series of sparseEntrys.
+// Despite the name, a sparseEntry represents an actual data fragment that
+// references data found in the underlying archive stream. All regions not
+// covered by a sparseEntry are logically filled with zeros.
+//
+// For example, if the underlying raw file contains the 10-byte data:
+//	var compactData = "abcdefgh"
+//
+// And the sparse map has the following entries:
+//	var sp = []sparseEntry{
+//		{offset: 2,  numBytes: 5} // Data fragment for [2..7]
+//		{offset: 18, numBytes: 3} // Data fragment for [18..21]
+//	}
+//
+// Then the content of the resulting sparse file with a "real" size of 25 is:
+//	var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
+type sparseEntry struct {
+	offset   int64 // Starting position of the fragment
+	numBytes int64 // Length of the fragment
 }
 
 // Keywords for GNU sparse files in a PAX extended header
@@ -128,7 +153,10 @@ func (tr *Reader) Next() (*Header, error) {
 		if sp != nil {
 			// Current file is a PAX format GNU sparse file.
 			// Set the current file reader to a sparse file reader.
-			tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
+			tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+			if tr.err != nil {
+				return nil, tr.err
+			}
 		}
 		return hdr, nil
 	case TypeGNULongName:
@@ -541,21 +569,17 @@ func (tr *Reader) readHeader() *Header {
 		if tr.err != nil {
 			return nil
 		}
+
 		// Current file is a GNU sparse file. Update the current file reader.
-		tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
+		tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
+		if tr.err != nil {
+			return nil
+		}
 	}
 
 	return hdr
 }
 
-// A sparseEntry holds a single entry in a sparse file's sparse map.
-// A sparse entry indicates the offset and size in a sparse file of a
-// block of data.
-type sparseEntry struct {
-	offset   int64
-	numBytes int64
-}
-
 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
 // then one or more extension headers are used to store the rest of the sparse map.
@@ -771,9 +795,33 @@ func (rfr *regFileReader) numBytes() int64 {
 	return rfr.nb
 }
 
-// readHole reads a sparse file hole ending at offset toOffset
-func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
-	n64 := toOffset - sfr.pos
+// newSparseFileReader creates a new sparseFileReader, but validates all of the
+// sparse entries before doing so.
+func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
+	if total < 0 {
+		return nil, ErrHeader // Total size cannot be negative
+	}
+
+	// Validate all sparse entries. These are the same checks as performed by
+	// the BSD tar utility.
+	for i, s := range sp {
+		switch {
+		case s.offset < 0 || s.numBytes < 0:
+			return nil, ErrHeader // Negative values are never okay
+		case s.offset > math.MaxInt64-s.numBytes:
+			return nil, ErrHeader // Integer overflow with large length
+		case s.offset+s.numBytes > total:
+			return nil, ErrHeader // Region extends beyond the "real" size
+		case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
+			return nil, ErrHeader // Regions can't overlap and must be in order
+		}
+	}
+	return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
+}
+
+// readHole reads a sparse hole ending at endOffset.
+func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
+	n64 := endOffset - sfr.pos
 	if n64 > int64(len(b)) {
 		n64 = int64(len(b))
 	}
@@ -787,49 +835,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
 
 // Read reads the sparse file data in expanded form.
 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
+	// Skip past all empty fragments.
+	for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
+		sfr.sp = sfr.sp[1:]
+	}
+
+	// If there are no more fragments, then it is possible that there
+	// is one last sparse hole.
 	if len(sfr.sp) == 0 {
-		// No more data fragments to read from.
-		if sfr.pos < sfr.tot {
-			// We're in the last hole
-			n = sfr.readHole(b, sfr.tot)
-			return
+		// This behavior matches the BSD tar utility.
+		// However, GNU tar stops returning data even if sfr.total is unmet.
+		if sfr.pos < sfr.total {
+			return sfr.readHole(b, sfr.total), nil
 		}
-		// Otherwise, we're at the end of the file
 		return 0, io.EOF
 	}
-	if sfr.tot < sfr.sp[0].offset {
-		return 0, io.ErrUnexpectedEOF
-	}
+
+	// In front of a data fragment, so read a hole.
 	if sfr.pos < sfr.sp[0].offset {
-		// We're in a hole
-		n = sfr.readHole(b, sfr.sp[0].offset)
-		return
+		return sfr.readHole(b, sfr.sp[0].offset), nil
 	}
 
-	// We're not in a hole, so we'll read from the next data fragment
-	posInFragment := sfr.pos - sfr.sp[0].offset
-	bytesLeft := sfr.sp[0].numBytes - posInFragment
+	// In a data fragment, so read from it.
+	// This math is overflow free since we verify that offset and numBytes can
+	// be safely added when creating the sparseFileReader.
+	endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
+	bytesLeft := endPos - sfr.pos                   // Bytes left in fragment
 	if int64(len(b)) > bytesLeft {
-		b = b[0:bytesLeft]
+		b = b[:bytesLeft]
 	}
 
 	n, err = sfr.rfr.Read(b)
 	sfr.pos += int64(n)
-
-	if int64(n) == bytesLeft {
-		// We're done with this fragment
-		sfr.sp = sfr.sp[1:]
+	if err == io.EOF {
+		if sfr.pos < endPos {
+			err = io.ErrUnexpectedEOF // There was supposed to be more data
+		} else if sfr.pos < sfr.total {
+			err = nil // There is still an implicit sparse hole at the end
+		}
 	}
 
-	if err == io.EOF && sfr.pos < sfr.tot {
-		// We reached the end of the last fragment's data, but there's a final hole
-		err = nil
+	if sfr.pos == endPos {
+		sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
 	}
-	return
+	return n, err
 }
 
 // numBytes returns the number of bytes left to read in the sparse file's
 // sparse-encoded data in the tar archive.
 func (sfr *sparseFileReader) numBytes() int64 {
-	return sfr.rfr.nb
+	return sfr.rfr.numBytes()
 }