Skip to content

Commit

Permalink
handle hard links and whiteouts correctly
Browse files Browse the repository at this point in the history
Hard links and whiteout were not handled correctly in this scenario:
- The first Docker layer contains:
  - file A
  - file B hard link to file A
- The second Docker layer contains:
  - whiteout file A

Then, the squashed ACI previously generated by docker2aci contained:
  - file B dangling hard link to file A

Hence the bug.

This patch changes:
- the converting algorithm lib/common/common.go:writeACI() to generate:
  - First docker layer
    - .hidden.docker2aci.sha512-xxx (hash of the layer and file name)
    - file A hard link to .hidden.docker2aci.sha512-xxx
    - file B hard link to .hidden.docker2aci.sha512-xxx
  - Second docker layer
    - whiteout file A
- the squashing algorithm lib/docker2aci.go:SquashLayers() to have two
  passes:
  - Pass one: build an in-memory map of hard links and whiteouts
  - Pass two: remove white-out and .hidden.docker2aci.sha512-xxx files

I tested the following images from rkt/rkt#1653:
- docker://albanc/busybox-hardlinks
- docker://zopyx/xmldirector-plone

TODO:
- not optimized if there are no hard links
- semantic changes on --nosquash

Fixes appc#98
  • Loading branch information
alban committed Nov 17, 2015
1 parent ec4e5d7 commit 6ddea53
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 19 deletions.
28 changes: 27 additions & 1 deletion lib/common/common.go
Expand Up @@ -3,6 +3,7 @@ package common
import (
"archive/tar"
"compress/gzip"
"crypto/sha512"
"encoding/json"
"fmt"
"io"
Expand Down Expand Up @@ -310,6 +311,16 @@ func convertVolumesToMPs(dockerVolumes map[string]struct{}) ([]appctypes.MountPo
return mps, nil
}

func getIdFromName(layer, target string) string {
h := sha512.New()
h.Write([]byte(layer))
h.Write([]byte{0})
h.Write([]byte(target))
h.Write([]byte{0})

return fmt.Sprintf(".hidden.docker2aci.sha512-%x", h.Sum(nil))
}

func writeACI(layer io.ReadSeeker, manifest schema.ImageManifest, curPwl []string, output string, compress bool) (*schema.ImageManifest, error) {
aciFile, err := os.Create(output)
if err != nil {
Expand Down Expand Up @@ -348,8 +359,23 @@ func writeACI(layer io.ReadSeeker, manifest schema.ImageManifest, curPwl []strin
whiteouts = append(whiteouts, strings.Replace(absolutePath, ".wh.", "", 1))
return nil
}

if t.Header.Typeflag == tar.TypeLink {
t.Header.Linkname = path.Join("rootfs", t.Linkname())
hash := getIdFromName(manifest.Name.String(), t.Linkname())
t.Header.Linkname = path.Join("rootfs", hash)
} else if t.Header.Typeflag == tar.TypeReg {
var newHeader tar.Header = *t.Header
linkname := path.Join("rootfs", getIdFromName(manifest.Name.String(), name))
newHeader.Name = linkname
if err := trw.WriteHeader(&newHeader); err != nil {
return err
}
if _, err := io.Copy(trw, t.TarStream); err != nil {
return err
}
t.Header.Typeflag = tar.TypeLink
t.Header.Linkname = linkname
t.Header.Size = 0
}

if err := trw.WriteHeader(t.Header); err != nil {
Expand Down
117 changes: 99 additions & 18 deletions lib/docker2aci.go
Expand Up @@ -181,9 +181,14 @@ func SquashLayers(images []acirenderer.Image, aciRegistry acirenderer.ACIRegistr
os.Remove(squashedTempFile.Name())
}
}()
tempFile, err := ioutil.TempFile(outputDir, "docker2aci-tempFile-")
if err != nil {
return "", err
}
defer os.Remove(tempFile.Name())

util.Debug("Writing squashed ACI...")
if err := writeSquashedImage(squashedTempFile, renderedACI, aciRegistry, manifests); err != nil {
if err := writeSquashedImage(squashedTempFile, tempFile, renderedACI, aciRegistry, manifests); err != nil {
return "", fmt.Errorf("error writing squashed image: %v", err)
}

Expand Down Expand Up @@ -224,12 +229,43 @@ func getManifests(renderedACI acirenderer.RenderedACI, aciRegistry acirenderer.A
return manifests, nil
}

func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI, aciProvider acirenderer.ACIProvider, manifests []schema.ImageManifest) error {
func writeSquashedImage(outputFile, tempFile *os.File,
renderedACI acirenderer.RenderedACI,
aciProvider acirenderer.ACIProvider,
manifests []schema.ImageManifest) error {

gw := gzip.NewWriter(outputFile)
defer gw.Close()
outputWriter := tar.NewWriter(gw)
defer outputWriter.Close()

tempWriter := tar.NewWriter(tempFile)
defer tempWriter.Close()

finalManifest := mergeManifests(manifests)

if err := common.WriteManifest(outputWriter, finalManifest); err != nil {
return err
}

if err := common.WriteRootfsDir(outputWriter); err != nil {
return err
}

type hardLinkEntry struct {
firstLinkCleanName string
firstLinkHeader tar.Header
walked bool
}
hardLinks := make(map[string]hardLinkEntry)

type tempEntry struct {
cleanName string
keep bool
firstLink bool
}
var entries []tempEntry

for _, aciFile := range renderedACI {
rs, err := aciProvider.ReadStream(aciFile.Key)
if err != nil {
Expand All @@ -239,19 +275,25 @@ func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI

squashWalker := func(t *tarball.TarFile) error {
cleanName := filepath.Clean(t.Name())

if _, ok := aciFile.FileMap[cleanName]; ok {
// we generate and add rootfs and the squashed manifest later
if cleanName == "manifest" || cleanName == "rootfs" {
return nil
}
if err := outputWriter.WriteHeader(t.Header); err != nil {
return fmt.Errorf("error writing header: %v", err)
}
if _, err := io.Copy(outputWriter, t.TarStream); err != nil {
return fmt.Errorf("error copying file into the tar out: %v", err)
// we generate and add rootfs and the squashed manifest later
if cleanName == "manifest" || cleanName == "rootfs" {
return nil
}
_, keep := aciFile.FileMap[cleanName]
if keep && t.Header.Typeflag == tar.TypeLink {
cleanTarget := filepath.Clean(t.Linkname())
if _, ok := hardLinks[cleanTarget]; !ok {
hardLinks[cleanTarget] = hardLinkEntry{cleanName, *t.Header, false}
}
}
entries = append(entries, tempEntry{cleanName, keep, false})

if err := tempWriter.WriteHeader(t.Header); err != nil {
return fmt.Errorf("error writing header: %v", err)
}
if _, err := io.Copy(tempWriter, t.TarStream); err != nil {
return fmt.Errorf("error copying file into the tar out: %v", err)
}
return nil
}

Expand All @@ -261,15 +303,54 @@ func writeSquashedImage(outputFile *os.File, renderedACI acirenderer.RenderedACI
}
}

if err := common.WriteRootfsDir(outputWriter); err != nil {
return err
}
tr := tar.NewReader(tempFile)

finalManifest := mergeManifests(manifests)
currentEntry := 0
deduplicateWalker := func(t *tarball.TarFile) error {
cleanName := filepath.Clean(t.Name())
if link, ok := hardLinks[cleanName]; ok {
link.firstLinkHeader.Size = t.Header.Size
link.firstLinkHeader.Typeflag = t.Header.Typeflag
link.firstLinkHeader.Linkname = ""

if err := common.WriteManifest(outputWriter, finalManifest); err != nil {
if err := outputWriter.WriteHeader(&link.firstLinkHeader); err != nil {
return fmt.Errorf("error writing header: %v", err)
}
if _, err := io.Copy(outputWriter, t.TarStream); err != nil {
return fmt.Errorf("error copying file into the tar out: %v", err)
}
} else if entries[currentEntry].keep {
if t.Header.Typeflag == tar.TypeLink {
cleanTarget := filepath.Clean(t.Linkname())
if link, ok := hardLinks[cleanTarget]; ok {
if !link.walked {
entries[currentEntry].firstLink = true
} else {
t.Header.Linkname = link.firstLinkCleanName
}
link.walked = true
hardLinks[cleanTarget] = link
}
}

if !entries[currentEntry].firstLink {
if err := outputWriter.WriteHeader(t.Header); err != nil {
return fmt.Errorf("error writing header: %v", err)
}
if _, err := io.Copy(outputWriter, t.TarStream); err != nil {
return fmt.Errorf("error copying file into the tar out: %v", err)
}
}
}
currentEntry++
return nil
}
if err := tarball.Walk(*tr, deduplicateWalker); err != nil {
return err
}
if len(entries) != currentEntry {
panic(fmt.Sprintf("found %d tarball entries in the first iteration and %d in the second", len(entries), currentEntry))
}

return nil
}
Expand Down

0 comments on commit 6ddea53

Please sign in to comment.