Skip to content

Commit

Permalink
Support >= 128 layers in overlayfs snapshots
Browse files Browse the repository at this point in the history
Auto-detect longest common dir in lowerdir option and compact it if the
option size is hitting one page size. If does, Use chdir + CLONE to do
mount thing to avoid hitting one page argument buffer in linux kernel
mount.

Signed-off-by: Wei Fu <fhfuwei@163.com>
  • Loading branch information
Wei Fu authored and estesp committed Aug 31, 2018
1 parent d725c75 commit 8a2991c
Show file tree
Hide file tree
Showing 7 changed files with 673 additions and 2 deletions.
155 changes: 153 additions & 2 deletions mount/mount_linux.go
Expand Up @@ -17,16 +17,41 @@
package mount

import (
"fmt"
"os"
"path"
"strings"
"time"

"github.com/containerd/containerd/sys"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)

var pagesize = 4096

func init() {
pagesize = os.Getpagesize()
}

// Mount to the provided target path
func (m *Mount) Mount(target string) error {
flags, data := parseMountOptions(m.Options)
var (
chdir string
options = m.Options
)

// avoid hitting one page limit of mount argument buffer
//
// NOTE: 512 is a buffer during pagesize check.
if m.Type == "overlay" && optionsSize(options) >= pagesize-512 {
chdir, options = compactLowerdirOption(options)
}

flags, data := parseMountOptions(options)
if len(data) > pagesize {
return errors.Errorf("mount options is too long")
}

// propagation types.
const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
Expand All @@ -38,7 +63,7 @@ func (m *Mount) Mount(target string) error {
if flags&unix.MS_REMOUNT == 0 || data != "" {
// Initial call applying all non-propagation flags for mount
// or remount with changed data
if err := unix.Mount(m.Source, target, m.Type, uintptr(oflags), data); err != nil {
if err := mountAt(chdir, m.Source, target, m.Type, uintptr(oflags), data); err != nil {
return err
}
}
Expand Down Expand Up @@ -155,3 +180,129 @@ func parseMountOptions(options []string) (int, string) {
}
return flag, strings.Join(data, ",")
}

// compactLowerdirOption updates overlay lowdir option and returns the common
// dir among all the lowdirs.
func compactLowerdirOption(opts []string) (string, []string) {
idx, dirs := findOverlayLowerdirs(opts)
if idx == -1 || len(dirs) == 1 {
// no need to compact if there is only one lowerdir
return "", opts
}

// find out common dir
commondir := longestCommonPrefix(dirs)
if commondir == "" {
return "", opts
}

// NOTE: the snapshot id is based on digits.
// in order to avoid to get snapshots/x, should be back to parent dir.
// however, there is assumption that the common dir is ${root}/io.containerd.v1.overlayfs/snapshots.
commondir = path.Dir(commondir)
if commondir == "/" {
return "", opts
}
commondir = commondir + "/"

newdirs := make([]string, 0, len(dirs))
for _, dir := range dirs {
newdirs = append(newdirs, dir[len(commondir):])
}

newopts := copyOptions(opts)
newopts = append(newopts[:idx], newopts[idx+1:]...)
newopts = append(newopts, fmt.Sprintf("lowerdir=%s", strings.Join(newdirs, ":")))
return commondir, newopts
}

// findOverlayLowerdirs returns the index of lowerdir in mount's options and
// all the lowerdir target.
func findOverlayLowerdirs(opts []string) (int, []string) {
var (
idx = -1
prefix = "lowerdir="
)

for i, opt := range opts {
if strings.HasPrefix(opt, prefix) {
idx = i
break
}
}

if idx == -1 {
return -1, nil
}
return idx, strings.Split(opts[idx][len(prefix):], ":")
}

// longestCommonPrefix finds the longest common prefix in the string slice.
func longestCommonPrefix(strs []string) string {
if len(strs) == 0 {
return ""
} else if len(strs) == 1 {
return strs[0]
}

// find out the min/max value by alphabetical order
min, max := strs[0], strs[0]
for _, str := range strs[1:] {
if min > str {
min = str
}
if max < str {
max = str
}
}

// find out the common part between min and max
for i := 0; i < len(min) && i < len(max); i++ {
if min[i] != max[i] {
return min[:i]
}
}
return min
}

// copyOptions copies the options.
func copyOptions(opts []string) []string {
if len(opts) == 0 {
return nil
}

acopy := make([]string, len(opts))
copy(acopy, opts)
return acopy
}

// optionsSize returns the byte size of options of mount.
func optionsSize(opts []string) int {
size := 0
for _, opt := range opts {
size += len(opt)
}
return size
}

func mountAt(chdir string, source, target, fstype string, flags uintptr, data string) error {
if chdir == "" {
return unix.Mount(source, target, fstype, flags, data)
}

f, err := os.Open(chdir)
if err != nil {
return errors.Wrap(err, "failed to mountat")
}
defer f.Close()

fs, err := f.Stat()
if err != nil {
return errors.Wrap(err, "failed to mountat")
}

if !fs.IsDir() {
return errors.Wrap(errors.Errorf("%s is not dir", chdir), "failed to mountat")
}
return errors.Wrap(sys.FMountat(f.Fd(), source, target, fstype, flags, data), "failed to mountat")
}
94 changes: 94 additions & 0 deletions mount/mount_linux_test.go
@@ -0,0 +1,94 @@
// +build linux

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package mount

import (
"reflect"
"testing"
)

func TestLongestCommonPrefix(t *testing.T) {
tcases := []struct {
in []string
expected string
}{
{[]string{}, ""},
{[]string{"foo"}, "foo"},
{[]string{"foo", "bar"}, ""},
{[]string{"foo", "foo"}, "foo"},
{[]string{"foo", "foobar"}, "foo"},
{[]string{"foo", "", "foobar"}, ""},
}

for i, tc := range tcases {
if got := longestCommonPrefix(tc.in); got != tc.expected {
t.Fatalf("[%d case] expected (%s), but got (%s)", i+1, tc.expected, got)
}
}
}

func TestCompactLowerdirOption(t *testing.T) {
tcases := []struct {
opts []string
commondir string
newopts []string
}{
// no lowerdir or only one
{
[]string{"workdir=a"},
"",
[]string{"workdir=a"},
},
{
[]string{"workdir=a", "lowerdir=b"},
"",
[]string{"workdir=a", "lowerdir=b"},
},

// >= 2 lowerdir
{
[]string{"lowerdir=/snapshots/1/fs:/snapshots/10/fs"},
"/snapshots/",
[]string{"lowerdir=1/fs:10/fs"},
},
{
[]string{"lowerdir=/snapshots/1/fs:/snapshots/10/fs:/snapshots/2/fs"},
"/snapshots/",
[]string{"lowerdir=1/fs:10/fs:2/fs"},
},

// if common dir is /
{
[]string{"lowerdir=/snapshots/1/fs:/other_snapshots/1/fs"},
"",
[]string{"lowerdir=/snapshots/1/fs:/other_snapshots/1/fs"},
},
}

for i, tc := range tcases {
dir, opts := compactLowerdirOption(tc.opts)
if dir != tc.commondir {
t.Fatalf("[%d case] expected common dir (%s), but got (%s)", i+1, tc.commondir, dir)
}

if !reflect.DeepEqual(opts, tc.newopts) {
t.Fatalf("[%d case] expected options (%v), but got (%v)", i+1, tc.newopts, opts)
}
}
}
93 changes: 93 additions & 0 deletions snapshots/testsuite/testsuite.go
Expand Up @@ -63,6 +63,8 @@ func SnapshotterSuite(t *testing.T, name string, snapshotterFn func(ctx context.
t.Run("StatInWalk", makeTest(name, snapshotterFn, checkStatInWalk))
t.Run("CloseTwice", makeTest(name, snapshotterFn, closeTwice))
t.Run("RootPermission", makeTest(name, snapshotterFn, checkRootPermission))

t.Run("128LayersMount", makeTest(name, snapshotterFn, check128LayersMount))
}

func makeTest(name string, snapshotterFn func(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error), fn func(ctx context.Context, t *testing.T, snapshotter snapshots.Snapshotter, work string)) func(t *testing.T) {
Expand Down Expand Up @@ -858,3 +860,94 @@ func checkRootPermission(ctx context.Context, t *testing.T, snapshotter snapshot
t.Fatalf("expected 0755, got 0%o", mode)
}
}

func check128LayersMount(ctx context.Context, t *testing.T, snapshotter snapshots.Snapshotter, work string) {
lowestApply := fstest.Apply(
fstest.CreateFile("/bottom", []byte("way at the bottom\n"), 0777),
fstest.CreateFile("/overwriteme", []byte("FIRST!\n"), 0777),
fstest.CreateDir("/ADDHERE", 0755),
fstest.CreateDir("/ONLYME", 0755),
fstest.CreateFile("/ONLYME/bottom", []byte("bye!\n"), 0777),
)

appliers := []fstest.Applier{lowestApply}
for i := 1; i <= 127; i++ {
appliers = append(appliers, fstest.Apply(
fstest.CreateFile("/overwriteme", []byte(fmt.Sprintf("%d WAS HERE!\n", i)), 0777),
fstest.CreateFile(fmt.Sprintf("/ADDHERE/file-%d", i), []byte("same\n"), 0755),
fstest.RemoveAll("/ONLYME"),
fstest.CreateDir("/ONLYME", 0755),
fstest.CreateFile(fmt.Sprintf("/ONLYME/file-%d", i), []byte("only me!\n"), 0777),
))
}

flat := filepath.Join(work, "flat")
if err := os.MkdirAll(flat, 0777); err != nil {
t.Fatalf("failed to create flat dir(%s): %+v", flat, err)
}

// NOTE: add gc labels to avoid snapshots get removed by gc...
parent := ""
for i, applier := range appliers {
preparing := filepath.Join(work, fmt.Sprintf("prepare-layer-%d", i))
if err := os.MkdirAll(preparing, 0777); err != nil {
t.Fatalf("[layer %d] failed to create preparing dir(%s): %+v", i, preparing, err)
}

mounts, err := snapshotter.Prepare(ctx, preparing, parent, opt)
if err != nil {
t.Fatalf("[layer %d] failed to get mount info: %+v", i, err)
}

if err := mount.All(mounts, preparing); err != nil {
t.Fatalf("[layer %d] failed to mount on the target(%s): %+v", i, preparing, err)
}

if err := fstest.CheckDirectoryEqual(preparing, flat); err != nil {
testutil.Unmount(t, preparing)
t.Fatalf("[layer %d] preparing doesn't equal to flat before apply: %+v", i, err)
}

if err := applier.Apply(flat); err != nil {
testutil.Unmount(t, preparing)
t.Fatalf("[layer %d] failed to apply on flat dir: %+v", i, err)
}

if err = applier.Apply(preparing); err != nil {
testutil.Unmount(t, preparing)
t.Fatalf("[layer %d] failed to apply on preparing dir: %+v", i, err)
}

if err := fstest.CheckDirectoryEqual(preparing, flat); err != nil {
testutil.Unmount(t, preparing)
t.Fatalf("[layer %d] preparing doesn't equal to flat after apply: %+v", i, err)
}

testutil.Unmount(t, preparing)

parent = filepath.Join(work, fmt.Sprintf("committed-%d", i))
if err := snapshotter.Commit(ctx, parent, preparing, opt); err != nil {
t.Fatalf("[layer %d] failed to commit the preparing: %+v", i, err)
}

}

view := filepath.Join(work, "fullview")
if err := os.MkdirAll(view, 0777); err != nil {
t.Fatalf("failed to create fullview dir(%s): %+v", view, err)
}

mounts, err := snapshotter.View(ctx, view, parent, opt)
if err != nil {
t.Fatalf("failed to get view's mount info: %+v", err)
}

if err := mount.All(mounts, view); err != nil {
t.Fatalf("failed to mount on the target(%s): %+v", view, err)
}
defer testutil.Unmount(t, view)

if err := fstest.CheckDirectoryEqual(view, flat); err != nil {
t.Fatalf("fullview should equal to flat: %+v", err)
}
}

0 comments on commit 8a2991c

Please sign in to comment.