Navigation Menu

Skip to content

Commit

Permalink
archive/tar: implement Writer support for sparse files
Browse files Browse the repository at this point in the history
This CL is the second step (of two; part1 is CL/56771) for adding
sparse file support to the Writer.

There are no new identifiers exported in this CL, but this does make
use of Header.SparseHoles added in part1. If the Typeflag is set to
TypeGNUSparse or len(SparseHoles) > 0, then the Writer will emit an
sparse file, where the holes must be written by the user as zeros.

If TypeGNUSparse is set, then the output file must use the GNU format.
Otherwise, it must use the PAX format (with GNU-defined PAX keys).

A future CL may export Reader.Discard and Writer.FillZeros,
but those methods are currently unexported, and only used by the
tests for efficiency reasons.
Calling Discard or FillZeros on a hole 10GiB in size does take
time, even if it is essentially a memcopy.

Updates #13548

Change-Id: Id586d9178c227c0577f796f731ae2cbb72355601
Reviewed-on: https://go-review.googlesource.com/57212
Reviewed-by: Ian Lance Taylor <iant@golang.org>
  • Loading branch information
dsnet committed Aug 23, 2017
1 parent 645ecf5 commit e0ab505
Show file tree
Hide file tree
Showing 13 changed files with 1,026 additions and 203 deletions.
26 changes: 22 additions & 4 deletions src/archive/tar/common.go
Expand Up @@ -33,6 +33,7 @@ var (
ErrWriteAfterClose = errors.New("tar: write after close")
errMissData = errors.New("tar: sparse file references non-existent data")
errUnrefData = errors.New("tar: sparse file contains unreferenced data")
errWriteHole = errors.New("tar: write non-NUL byte in sparse hole")
)

// Header type flags.
Expand Down Expand Up @@ -74,10 +75,13 @@ type Header struct {

// SparseHoles represents a sequence of holes in a sparse file.
//
// The regions must be sorted in ascending order, not overlap with
// each other, and not extend past the specified Size.
// The file is sparse if either len(SparseHoles) > 0 or
// the Typeflag is set to TypeGNUSparse.
// A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse.
// A sparse file consists of fragments of data, intermixed with holes
// (described by this field). A hole is semantically a block of NUL-bytes,
// but does not actually exist within the TAR file.
// The logical size of the file stored in the Size field, while
// the holes must be sorted in ascending order,
// not overlap with each other, and not extend past the specified Size.
SparseHoles []SparseEntry
}

Expand Down Expand Up @@ -300,6 +304,20 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) {
return formatUnknown, nil // Invalid PAX key
}
}
if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
if isHeaderOnlyType(h.Typeflag) {
return formatUnknown, nil // Cannot have sparse data on header-only file
}
if !validateSparseEntries(h.SparseHoles, h.Size) {
return formatUnknown, nil
}
if h.Typeflag == TypeGNUSparse {
format &= formatGNU // GNU only
} else {
format &^= formatGNU // No GNU
}
format &^= formatUSTAR // No USTAR
}
return format, paxHdrs
}

Expand Down
112 changes: 95 additions & 17 deletions src/archive/tar/example_test.go
Expand Up @@ -7,20 +7,20 @@ package tar_test
import (
"archive/tar"
"bytes"
"crypto/md5"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"strings"
)

func Example() {
// Create a buffer to write our archive to.
buf := new(bytes.Buffer)

// Create a new tar archive.
// Create and add some files to the archive.
tw := tar.NewWriter(buf)

// Add some files to the archive.
var files = []struct {
Name, Body string
}{
Expand All @@ -35,34 +35,29 @@ func Example() {
Size: int64(len(file.Body)),
}
if err := tw.WriteHeader(hdr); err != nil {
log.Fatalln(err)
log.Fatal(err)
}
if _, err := tw.Write([]byte(file.Body)); err != nil {
log.Fatalln(err)
log.Fatal(err)
}
}
// Make sure to check the error on Close.
if err := tw.Close(); err != nil {
log.Fatalln(err)
log.Fatal(err)
}

// Open the tar archive for reading.
r := bytes.NewReader(buf.Bytes())
tr := tar.NewReader(r)

// Iterate through the files in the archive.
// Open and iterate through the files in the archive.
tr := tar.NewReader(buf)
for {
hdr, err := tr.Next()
if err == io.EOF {
// end of tar archive
break
break // End of archive
}
if err != nil {
log.Fatalln(err)
log.Fatal(err)
}
fmt.Printf("Contents of %s:\n", hdr.Name)
if _, err := io.Copy(os.Stdout, tr); err != nil {
log.Fatalln(err)
log.Fatal(err)
}
fmt.Println()
}
Expand All @@ -78,3 +73,86 @@ func Example() {
// Contents of todo.txt:
// Get animal handling license.
}

// A sparse file can efficiently represent a large file that is mostly empty.
func Example_sparse() {
buf := new(bytes.Buffer)

// Define a sparse file to add to the archive.
// This sparse files contains 5 data fragments, and 4 hole fragments.
// The logical size of the file is 16 KiB, while the physical size of the
// file is only 3 KiB (not counting the header data).
hdr := &tar.Header{
Name: "sparse.db",
Size: 16384,
SparseHoles: []tar.SparseEntry{
// Data fragment at 0..1023
{Offset: 1024, Length: 1024 - 512}, // Hole fragment at 1024..1535
// Data fragment at 1536..2047
{Offset: 2048, Length: 2048 - 512}, // Hole fragment at 2048..3583
// Data fragment at 3584..4095
{Offset: 4096, Length: 4096 - 512}, // Hole fragment at 4096..7679
// Data fragment at 7680..8191
{Offset: 8192, Length: 8192 - 512}, // Hole fragment at 8192..15871
// Data fragment at 15872..16383
},
}

// The regions marked as a sparse hole are filled with NUL-bytes.
// The total length of the body content must match the specified Size field.
body := "" +
strings.Repeat("A", 1024) +
strings.Repeat("\x00", 1024-512) +
strings.Repeat("B", 512) +
strings.Repeat("\x00", 2048-512) +
strings.Repeat("C", 512) +
strings.Repeat("\x00", 4096-512) +
strings.Repeat("D", 512) +
strings.Repeat("\x00", 8192-512) +
strings.Repeat("E", 512)

h := md5.Sum([]byte(body))
fmt.Printf("Write content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h)
fmt.Printf("Write SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)

// Create a new archive and write the sparse file.
tw := tar.NewWriter(buf)
if err := tw.WriteHeader(hdr); err != nil {
log.Fatal(err)
}
if _, err := tw.Write([]byte(body)); err != nil {
log.Fatal(err)
}
if err := tw.Close(); err != nil {
log.Fatal(err)
}

// Open and iterate through the files in the archive.
tr := tar.NewReader(buf)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
body, err := ioutil.ReadAll(tr)
if err != nil {
log.Fatal(err)
}

h := md5.Sum([]byte(body))
fmt.Printf("Read content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h)
fmt.Printf("Read SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)
}

// Output:
// Write content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9
// Write SparseHoles of sparse.db:
// [{1024 512} {2048 1536} {4096 3584} {8192 7680}]
//
// Read content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9
// Read SparseHoles of sparse.db:
// [{1024 512} {2048 1536} {4096 3584} {8192 7680} {16384 0}]
}

0 comments on commit e0ab505

Please sign in to comment.