Skip to content

Commit

Permalink
archive/tar: add Header.DetectSparseHoles and Header.PunchSparseHoles
Browse files Browse the repository at this point in the history
To support the detection and creation of sparse files,
add two new methods:
	func Header.DetectSparseHoles(*os.File) error
	func Header.PunchSparseHoles(*os.File) error

DetectSparseHoles is intended to be used after FileInfoHeader
prior to serializing the Header with WriteHeader.
For each OS, it uses specialized logic to detect
the location of sparse holes. On most Unix systems, it uses
SEEK_HOLE and SEEK_DATA to query for the holes.
On Windows, it uses a specialized the FSCTL_QUERY_ALLOCATED_RANGES
syscall to query for all the holes.

PunchSparseHoles is intended to be used after Reader.Next
prior to populating the file with Reader.WriteTo.
On Windows, this uses the FSCTL_SET_ZERO_DATA syscall.
On other operating systems it simply truncates the file
to the end-offset of SparseHoles.

DetectSparseHoles and PunchSparseHoles are added as methods on
Header because they are heavily tied to the operating system,
for which there is already an existing precedence for
(since FileInfoHeader makes uses of OS-specific details).

Fixes #13548

Change-Id: I98a321dd1ce0165f3d143d4edadfda5e7db67746
Reviewed-on: https://go-review.googlesource.com/60871
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
  • Loading branch information
dsnet committed Sep 20, 2017
1 parent d2f3172 commit 1eacf78
Show file tree
Hide file tree
Showing 7 changed files with 416 additions and 22 deletions.
64 changes: 63 additions & 1 deletion src/archive/tar/common.go
Expand Up @@ -13,6 +13,7 @@ package tar
import (
"errors"
"fmt"
"io"
"math"
"os"
"path"
Expand Down Expand Up @@ -525,6 +526,66 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
return format, paxHdrs, err
}

var sysSparseDetect func(f *os.File) (sparseHoles, error)
var sysSparsePunch func(f *os.File, sph sparseHoles) error

// DetectSparseHoles searches for holes within f to populate SparseHoles
// on supported operating systems and filesystems.
// The file offset is cleared to zero.
//
// When packing a sparse file, DetectSparseHoles should be called prior to
// serializing the header to the archive with Writer.WriteHeader.
func (h *Header) DetectSparseHoles(f *os.File) (err error) {
defer func() {
if _, serr := f.Seek(0, io.SeekStart); err == nil {
err = serr
}
}()

h.SparseHoles = nil
if sysSparseDetect != nil {
sph, err := sysSparseDetect(f)
h.SparseHoles = sph
return err
}
return nil
}

// PunchSparseHoles destroys the contents of f, and prepares a sparse file
// (on supported operating systems and filesystems)
// with holes punched according to SparseHoles.
// The file offset is cleared to zero.
//
// When extracting a sparse file, PunchSparseHoles should be called prior to
// populating the content of a file with Reader.WriteTo.
func (h *Header) PunchSparseHoles(f *os.File) (err error) {
defer func() {
if _, serr := f.Seek(0, io.SeekStart); err == nil {
err = serr
}
}()

if err := f.Truncate(0); err != nil {
return err
}

var size int64
if len(h.SparseHoles) > 0 {
size = h.SparseHoles[len(h.SparseHoles)-1].endOffset()
}
if !validateSparseEntries(h.SparseHoles, size) {
return errors.New("tar: invalid sparse holes")
}

if size == 0 {
return nil // For non-sparse files, do nothing (other than Truncate)
}
if sysSparsePunch != nil {
return sysSparsePunch(f, h.SparseHoles)
}
return f.Truncate(size)
}

// FileInfo returns an os.FileInfo for the Header.
func (h *Header) FileInfo() os.FileInfo {
return headerFileInfo{h}
Expand Down Expand Up @@ -627,7 +688,8 @@ const (
// the file it describes, it may be necessary to modify Header.Name
// to provide the full path name of the file.
//
// This function does not populate Header.SparseHoles.
// This function does not populate Header.SparseHoles;
// for sparse file support, additionally call Header.DetectSparseHoles.
func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
if fi == nil {
return nil, errors.New("tar: FileInfo is nil")
Expand Down
110 changes: 101 additions & 9 deletions src/archive/tar/example_test.go
Expand Up @@ -16,11 +16,10 @@ import (
"strings"
)

func Example() {
buf := new(bytes.Buffer)

func Example_minimal() {
// Create and add some files to the archive.
tw := tar.NewWriter(buf)
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
var files = []struct {
Name, Body string
}{
Expand All @@ -46,7 +45,7 @@ func Example() {
}

// Open and iterate through the files in the archive.
tr := tar.NewReader(buf)
tr := tar.NewReader(&buf)
for {
hdr, err := tr.Next()
if err == io.EOF {
Expand Down Expand Up @@ -75,9 +74,101 @@ func Example() {
}

// A sparse file can efficiently represent a large file that is mostly empty.
func Example_sparse() {
buf := new(bytes.Buffer)
// When packing an archive, Header.DetectSparseHoles can be used to populate
// the sparse map, while Header.PunchSparseHoles can be used to create a
// sparse file on disk when extracting an archive.
func Example_sparseAutomatic() {
// Create the source sparse file.
src, err := ioutil.TempFile("", "sparse.db")
if err != nil {
log.Fatal(err)
}
defer os.Remove(src.Name()) // Best-effort cleanup
defer func() {
if err := src.Close(); err != nil {
log.Fatal(err)
}
}()
if err := src.Truncate(10e6); err != nil {
log.Fatal(err)
}
for i := 0; i < 10; i++ {
if _, err := src.Seek(1e6-1e3, io.SeekCurrent); err != nil {
log.Fatal(err)
}
if _, err := src.Write(bytes.Repeat([]byte{'0' + byte(i)}, 1e3)); err != nil {
log.Fatal(err)
}
}

// Create an archive and pack the source sparse file to it.
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
fi, err := src.Stat()
if err != nil {
log.Fatal(err)
}
hdr, err := tar.FileInfoHeader(fi, "")
if err != nil {
log.Fatal(err)
}
if err := hdr.DetectSparseHoles(src); err != nil {
log.Fatal(err)
}
if err := tw.WriteHeader(hdr); err != nil {
log.Fatal(err)
}
if _, err := io.Copy(tw, src); err != nil {
log.Fatal(err)
}
if err := tw.Close(); err != nil {
log.Fatal(err)
}

// Create the destination sparse file.
dst, err := ioutil.TempFile("", "sparse.db")
if err != nil {
log.Fatal(err)
}
defer os.Remove(dst.Name()) // Best-effort cleanup
defer func() {
if err := dst.Close(); err != nil {
log.Fatal(err)
}
}()

// Open the archive and extract the sparse file into the destination file.
tr := tar.NewReader(&buf)
hdr, err = tr.Next()
if err != nil {
log.Fatal(err)
}
if err := hdr.PunchSparseHoles(dst); err != nil {
log.Fatal(err)
}
if _, err := io.Copy(dst, tr); err != nil {
log.Fatal(err)
}

// Verify that the sparse files are identical.
want, err := ioutil.ReadFile(src.Name())
if err != nil {
log.Fatal(err)
}
got, err := ioutil.ReadFile(dst.Name())
if err != nil {
log.Fatal(err)
}
fmt.Printf("Src MD5: %08x\n", md5.Sum(want))
fmt.Printf("Dst MD5: %08x\n", md5.Sum(got))

// Output:
// Src MD5: 33820d648d42cb3da2515da229149f74
// Dst MD5: 33820d648d42cb3da2515da229149f74
}

// The SparseHoles can be manually constructed without Header.DetectSparseHoles.
func Example_sparseManual() {
// Define a sparse file to add to the archive.
// This sparse files contains 5 data fragments, and 4 hole fragments.
// The logical size of the file is 16 KiB, while the physical size of the
Expand Down Expand Up @@ -116,7 +207,8 @@ func Example_sparse() {
fmt.Printf("Write SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)

// Create a new archive and write the sparse file.
tw := tar.NewWriter(buf)
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
if err := tw.WriteHeader(hdr); err != nil {
log.Fatal(err)
}
Expand All @@ -128,7 +220,7 @@ func Example_sparse() {
}

// Open and iterate through the files in the archive.
tr := tar.NewReader(buf)
tr := tar.NewReader(&buf)
for {
hdr, err := tr.Next()
if err == io.EOF {
Expand Down
68 changes: 68 additions & 0 deletions src/archive/tar/sparse_unix.go
@@ -0,0 +1,68 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build linux darwin dragonfly freebsd openbsd netbsd solaris

package tar

import (
"io"
"os"
"syscall"
)

func init() {
sysSparseDetect = sparseDetectUnix
}

func sparseDetectUnix(f *os.File) (sph sparseHoles, err error) {
// SEEK_DATA and SEEK_HOLE originated from Solaris and support for it
// has been added to most of the other major Unix systems.
const seekData = 3 // SEEK_DATA from unistd.h
const seekHole = 4 // SEEK_HOLE from unistd.h

// Check for seekData/seekHole support.
if _, err := f.Seek(0, seekHole); errno(err) == syscall.EINVAL {
return nil, nil // Either old kernel or FS does not support this
}

// Populate the SparseHoles.
var last, pos int64 = -1, 0
for {
// Get the location of the next hole section.
if pos, err = fseek(f, pos, seekHole); pos == last || err != nil {
return sph, err
}
offset := pos
last = pos

// Get the location of the next data section.
if pos, err = fseek(f, pos, seekData); pos == last || err != nil {
return sph, err
}
length := pos - offset
last = pos

if length > 0 {
sph = append(sph, SparseEntry{offset, length})
}
}
}

func fseek(f *os.File, pos int64, whence int) (int64, error) {
pos, err := f.Seek(pos, whence)
if errno(err) == syscall.ENXIO {
// SEEK_DATA returns ENXIO when past the last data fragment,
// which makes determining the size of the last hole difficult.
pos, err = f.Seek(0, io.SeekEnd)
}
return pos, err
}

func errno(err error) error {
if perr, ok := err.(*os.PathError); ok {
return perr.Err
}
return err
}

0 comments on commit 1eacf78

Please sign in to comment.