Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion pkg/sentry/fsimpl/overlay/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ declare_mutex(
prefix = "directoryFD",
)

declare_mutex(
name = "create_creds_mutex",
out = "create_creds_mutex.go",
package = "overlay",
prefix = "createCreds",
)

declare_rwmutex(
name = "rename_rwmutex",
out = "rename_rwmutex.go",
Expand Down Expand Up @@ -91,9 +98,10 @@ go_library(
srcs = [
"ancestry_rwmutex.go",
"copy_up.go",
"create_creds_mutex.go",
"data_rwmutex.go",
"dev_mutex.go",
"dir_cache_mutex",
"dir_cache_mutex.go",
"dir_fd_mutex.go",
"dir_mutex.go",
"directory.go",
Expand Down
4 changes: 4 additions & 0 deletions pkg/sentry/fsimpl/overlay/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ func (d *dentry) isDir() bool {
return d.mode.Load()&linux.S_IFMT == linux.S_IFDIR
}

func (d *dentry) isSGIDSet() bool {
return d.mode.Load()&linux.ModeSetGID != 0
}

// Preconditions:
// - d.dirMu must be locked.
// - d.isDir().
Expand Down
83 changes: 15 additions & 68 deletions pkg/sentry/fsimpl/overlay/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,8 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return err
}
}
if err := vfsObj.LinkAt(ctx, fs.creds, &vfs.PathOperation{
createCreds := parent.credsForCreate(rp.Credentials(), parent.isSGIDSet())
if err := vfsObj.LinkAt(ctx, createCreds, &vfs.PathOperation{
Root: old.upperVD,
Start: old.upperVD,
}, &newpop); err != nil {
Expand All @@ -702,21 +703,6 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
}
return err
}
creds := rp.Credentials()
if err := vfsObj.SetStatAt(ctx, fs.creds, &newpop, &vfs.SetStatOptions{
Stat: linux.Statx{
Mask: linux.STATX_UID | linux.STATX_GID,
UID: uint32(creds.EffectiveKUID),
GID: uint32(creds.EffectiveKGID),
},
}); err != nil {
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after LinkAt metadata update failure: %v", cleanupErr))
} else if haveUpperWhiteout {
fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop)
}
return err
}
old.watches.Notify(ctx, "", linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent, false /* unlinked */)
return nil
})
Expand All @@ -740,23 +726,19 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
}
if err := vfsObj.MkdirAt(ctx, fs.creds, &pop, &opts); err != nil {
sgidSet := parent.isSGIDSet()
if sgidSet {
// Directories inherit the SGID bit.
opts.Mode |= linux.ModeSetGID
}
createCreds := parent.credsForCreate(rp.Credentials(), sgidSet)
if err := vfsObj.MkdirAt(ctx, createCreds, &pop, &opts); err != nil {
if haveUpperWhiteout {
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
}
return err
}

if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
Stat: parent.newChildOwnerStat(opts.Mode, rp.Credentials()),
}); err != nil {
if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt metadata update failure: %v", cleanupErr))
} else if haveUpperWhiteout {
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
}
return err
}
if haveUpperWhiteout {
// A whiteout is being replaced with this new directory. There may be
// directories on lower layers (previously hidden by the whiteout) that
Expand Down Expand Up @@ -807,23 +789,13 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
}
if err := vfsObj.MknodAt(ctx, fs.creds, &pop, &opts); err != nil {
createCreds := parent.credsForCreate(rp.Credentials(), parent.isSGIDSet())
if err := vfsObj.MknodAt(ctx, createCreds, &pop, &opts); err != nil {
if haveUpperWhiteout {
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
}
return err
}
creds := rp.Credentials()
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
Stat: parent.newChildOwnerStat(opts.Mode, creds),
}); err != nil {
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after MknodAt metadata update failure: %v", cleanupErr))
} else if haveUpperWhiteout {
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
}
return err
}
return nil
})
}
Expand Down Expand Up @@ -1027,7 +999,8 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
}
}
// Create the file on the upper layer, and get an FD representing it.
upperFD, err := vfsObj.OpenAt(ctx, fs.creds, &pop, &vfs.OpenOptions{
createCreds := parent.credsForCreate(creds, parent.isSGIDSet())
upperFD, err := vfsObj.OpenAt(ctx, createCreds, &pop, &vfs.OpenOptions{
Flags: opts.Flags&^vfs.FileCreationFlags | linux.O_CREAT | linux.O_EXCL,
Mode: opts.Mode,
})
Expand All @@ -1038,18 +1011,6 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
return nil, err
}

// Change the file's owner to the caller. We can't use upperFD.SetStat()
// because it will pick up creds from ctx.
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
Stat: parent.newChildOwnerStat(opts.Mode, creds),
}); err != nil {
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) metadata update failure: %v", cleanupErr))
} else if haveUpperWhiteout {
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
}
return nil, err
}
// Re-lookup to get a dentry representing the new file, which is needed for
// the returned FD.
child, _, err := fs.getChildLocked(ctx, parent, childName, ds)
Expand Down Expand Up @@ -1609,27 +1570,13 @@ func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
return err
}
}
if err := vfsObj.SymlinkAt(ctx, fs.creds, &pop, target); err != nil {
createCreds := parent.credsForCreate(rp.Credentials(), parent.isSGIDSet())
if err := vfsObj.SymlinkAt(ctx, createCreds, &pop, target); err != nil {
if haveUpperWhiteout {
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
}
return err
}
creds := rp.Credentials()
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
Stat: linux.Statx{
Mask: linux.STATX_UID | linux.STATX_GID,
UID: uint32(creds.EffectiveKUID),
GID: uint32(creds.EffectiveKGID),
},
}); err != nil {
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after SymlinkAt metadata update failure: %v", cleanupErr))
} else if haveUpperWhiteout {
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
}
return err
}
return nil
})
}
Expand Down
54 changes: 37 additions & 17 deletions pkg/sentry/fsimpl/overlay/overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ type filesystem struct {
// used for accesses to the filesystem's layers. creds is immutable.
creds *auth.Credentials

// createCreds is a cache of credentials that is used for create operations.
createCredsMu createCredsMutex `state:"nosave"`
createCreds map[createCredsKey]*auth.Credentials

// dirDevMinor is the device minor number used for directories. dirDevMinor
// is immutable.
dirDevMinor uint32
Expand Down Expand Up @@ -148,6 +152,12 @@ type layerDevNoAndIno struct {
ino uint64
}

// +stateify savable
type createCredsKey struct {
uid auth.KUID
gid auth.KGID
}

// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
mopts := vfs.GenericParseMountOptions(opts.Data)
Expand Down Expand Up @@ -876,25 +886,35 @@ func (d *dentry) mayDelete(creds *auth.Credentials, child *dentry) error {
)
}

// newChildOwnerStat returns a Statx for configuring the UID, GID, and mode of
// children.
func (d *dentry) newChildOwnerStat(mode linux.FileMode, creds *auth.Credentials) linux.Statx {
stat := linux.Statx{
Mask: uint32(linux.STATX_UID | linux.STATX_GID),
UID: uint32(creds.EffectiveKUID),
GID: uint32(creds.EffectiveKGID),
}
// Set GID and possibly the SGID bit if the parent is an SGID directory.
d.copyMu.RLock()
defer d.copyMu.RUnlock()
if d.mode.Load()&linux.ModeSetGID == linux.ModeSetGID {
stat.GID = d.gid.Load()
if stat.Mode&linux.ModeDirectory == linux.ModeDirectory {
stat.Mode = uint16(mode) | linux.ModeSetGID
stat.Mask |= linux.STATX_MODE
// credsForCreate returns the creds to use for creation operations.
func (d *dentry) credsForCreate(creds *auth.Credentials, isSGIDSet bool) *auth.Credentials {
// During creation operations, Linux uses a modified version of fs.creds. It
// sets the fsuid/fsgid to the caller's fsuid/fsgid. Note that gVisor doesn't
// support fsuid/fsgid and just uses euid/egid to determine file ownership
// for new files. So we just update euid/egid with the caller's euid/egid.
key := createCredsKey{
uid: creds.EffectiveKUID,
gid: creds.EffectiveKGID,
}
if isSGIDSet {
key.gid = auth.KGID(d.gid.Load())
}
if d.fs.creds.EffectiveKUID == key.uid && d.fs.creds.EffectiveKGID == key.gid {
return d.fs.creds
}
d.fs.createCredsMu.Lock()
defer d.fs.createCredsMu.Unlock()
newCreds, ok := d.fs.createCreds[key]
if !ok {
newCreds = d.fs.creds.Fork()
newCreds.EffectiveKUID = key.uid
newCreds.EffectiveKGID = key.gid
if d.fs.createCreds == nil {
d.fs.createCreds = make(map[createCredsKey]*auth.Credentials)
}
d.fs.createCreds[key] = newCreds
}
return stat
return newCreds
}

// fileDescription is embedded by overlay implementations of
Expand Down
Loading