Skip to content

Commit

Permalink
handle hard links and whiteouts correctly
Browse files Browse the repository at this point in the history
Hard links and whiteout were not handled correctly in this scenario:
- The first Docker layer contains:
  - file A
  - file B hard link to file A
- The second Docker layer contains:
  - whiteout file A

Then, the squashed ACI previously generated by docker2aci contained:
  - file B dangling hard link to file A

Hence the bug.

This patch changes:
- the converting algorithm lib/common/common.go:writeACI() to generate:
  - First docker layer
    - .hidden.docker2aci.sha512-xxx (hash of the layer and file name)
    - file A hard link to .hidden.docker2aci.sha512-xxx
    - file B hard link to .hidden.docker2aci.sha512-xxx
  - Second docker layer
    - whiteout file A
- the squashing algorithm lib/docker2aci.go:SquashLayers() to have two
  passes:
  - Pass one: build an in-memory map of hard links and whiteouts
  - Pass two: remove white-out and .hidden.docker2aci.sha512-xxx files

I tested the following images from rkt/rkt#1653:
- docker://albanc/busybox-hardlinks
- docker://zopyx/xmldirector-plone

TODO:
- not optimized if there are no hard links
- semantic changes on --nosquash

Fixes appc#98
  • Loading branch information
alban committed Nov 20, 2015
1 parent ec4e5d7 commit 091b568
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 15 deletions.
28 changes: 27 additions & 1 deletion lib/common/common.go
Expand Up @@ -3,6 +3,7 @@ package common
import (
"archive/tar"
"compress/gzip"
"crypto/sha512"
"encoding/json"
"fmt"
"io"
Expand Down Expand Up @@ -310,6 +311,16 @@ func convertVolumesToMPs(dockerVolumes map[string]struct{}) ([]appctypes.MountPo
return mps, nil
}

func getIdFromName(layer, target string) string {
h := sha512.New()
h.Write([]byte(layer))
h.Write([]byte{0})
h.Write([]byte(target))
h.Write([]byte{0})

return fmt.Sprintf(".hidden.docker2aci.sha512-%x", h.Sum(nil))
}

func writeACI(layer io.ReadSeeker, manifest schema.ImageManifest, curPwl []string, output string, compress bool) (*schema.ImageManifest, error) {
aciFile, err := os.Create(output)
if err != nil {
Expand Down Expand Up @@ -348,8 +359,23 @@ func writeACI(layer io.ReadSeeker, manifest schema.ImageManifest, curPwl []strin
whiteouts = append(whiteouts, strings.Replace(absolutePath, ".wh.", "", 1))
return nil
}

if t.Header.Typeflag == tar.TypeLink {
t.Header.Linkname = path.Join("rootfs", t.Linkname())
hash := getIdFromName(manifest.Name.String(), t.Linkname())
t.Header.Linkname = path.Join("rootfs", hash)
} else if t.Header.Typeflag == tar.TypeReg {
var newHeader tar.Header = *t.Header
linkname := path.Join("rootfs", getIdFromName(manifest.Name.String(), name))
newHeader.Name = linkname
if err := trw.WriteHeader(&newHeader); err != nil {
return err
}
if _, err := io.Copy(trw, t.TarStream); err != nil {
return err
}
t.Header.Typeflag = tar.TypeLink
t.Header.Linkname = linkname
t.Header.Size = 0
}

if err := trw.WriteHeader(t.Header); err != nil {
Expand Down
107 changes: 93 additions & 14 deletions lib/docker2aci.go
Expand Up @@ -230,6 +230,32 @@ func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI
outputWriter := tar.NewWriter(gw)
defer outputWriter.Close()

finalManifest := mergeManifests(manifests)

if err := common.WriteManifest(outputWriter, finalManifest); err != nil {
return err
}

if err := common.WriteRootfsDir(outputWriter); err != nil {
return err
}

type hardLinkEntry struct {
firstLinkCleanName string
firstLinkHeader tar.Header
walked bool
}
hardLinks := make(map[string]hardLinkEntry)

type tempEntry struct {
cleanName string
keep bool
firstLink bool
}
var entries []tempEntry

// first pass: read all the entries and build tempEntry, hardLinks in memory
// but don't write on disk
for _, aciFile := range renderedACI {
rs, err := aciProvider.ReadStream(aciFile.Key)
if err != nil {
Expand All @@ -239,19 +265,78 @@ func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI

squashWalker := func(t *tarball.TarFile) error {
cleanName := filepath.Clean(t.Name())

if _, ok := aciFile.FileMap[cleanName]; ok {
// we generate and add rootfs and the squashed manifest later
if cleanName == "manifest" || cleanName == "rootfs" {
return nil
// the rootfs and the squashed manifest are added separately
if cleanName == "manifest" || cleanName == "rootfs" {
return nil
}
_, keep := aciFile.FileMap[cleanName]
if keep && t.Header.Typeflag == tar.TypeLink {
cleanTarget := filepath.Clean(t.Linkname())
if _, ok := hardLinks[cleanTarget]; !ok {
hardLinks[cleanTarget] = hardLinkEntry{cleanName, *t.Header, false}
}
if err := outputWriter.WriteHeader(t.Header); err != nil {
}
entries = append(entries, tempEntry{cleanName, keep, false})
return nil
}

tr := tar.NewReader(rs)
if err := tarball.Walk(*tr, squashWalker); err != nil {
return err
}
}

// second pass: write on disk
currentEntry := 0
for _, aciFile := range renderedACI {
rs, err := aciProvider.ReadStream(aciFile.Key)
if err != nil {
return err
}
defer rs.Close()

squashWalker := func(t *tarball.TarFile) error {
cleanName := filepath.Clean(t.Name())
// the rootfs and the squashed manifest are added separately
if cleanName == "manifest" || cleanName == "rootfs" {
return nil
}

if link, ok := hardLinks[cleanName]; ok {
link.firstLinkHeader.Size = t.Header.Size
link.firstLinkHeader.Typeflag = t.Header.Typeflag
link.firstLinkHeader.Linkname = ""

if err := outputWriter.WriteHeader(&link.firstLinkHeader); err != nil {
return fmt.Errorf("error writing header: %v", err)
}
if _, err := io.Copy(outputWriter, t.TarStream); err != nil {
return fmt.Errorf("error copying file into the tar out: %v", err)
}
} else if entries[currentEntry].keep {
if t.Header.Typeflag == tar.TypeLink {
cleanTarget := filepath.Clean(t.Linkname())
if link, ok := hardLinks[cleanTarget]; ok {
if !link.walked {
entries[currentEntry].firstLink = true
} else {
t.Header.Linkname = link.firstLinkCleanName
}
link.walked = true
hardLinks[cleanTarget] = link
}
}

if !entries[currentEntry].firstLink {
if err := outputWriter.WriteHeader(t.Header); err != nil {
return fmt.Errorf("error writing header: %v", err)
}
if _, err := io.Copy(outputWriter, t.TarStream); err != nil {
return fmt.Errorf("error copying file into the tar out: %v", err)
}
}
}
currentEntry++
return nil
}

Expand All @@ -261,14 +346,8 @@ func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI
}
}

if err := common.WriteRootfsDir(outputWriter); err != nil {
return err
}

finalManifest := mergeManifests(manifests)

if err := common.WriteManifest(outputWriter, finalManifest); err != nil {
return err
if len(entries) != currentEntry {
panic(fmt.Sprintf("found %d tarball entries in the first iteration and %d in the second", len(entries), currentEntry))
}

return nil
Expand Down

0 comments on commit 091b568

Please sign in to comment.