Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions pkg/sentry/fsimpl/erofs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@ go_template_instance(
},
)

go_template_instance(
name = "dentry_list",
out = "dentry_list.go",
package = "erofs",
prefix = "dentry",
template = "//pkg/ilist:generic_list",
types = {
"Element": "*dentry",
"Linker": "*dentry",
},
)

go_template_instance(
name = "dentry_refs",
out = "dentry_refs.go",
Expand All @@ -43,6 +55,7 @@ go_template_instance(
go_library(
name = "erofs",
srcs = [
"dentry_list.go",
"dentry_refs.go",
"directory.go",
"erofs.go",
Expand Down
17 changes: 16 additions & 1 deletion pkg/sentry/fsimpl/erofs/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,23 @@ func (i *inode) lookup(name string) (uint64, error) {
return dirents[idx].Ino, nil
}

// lookup returns a ref'd dentry for the child with the given name.
func (d *dentry) lookup(ctx context.Context, name string) (*dentry, error) {
// Fast path, dentry already exists.
d.dirMu.RLock()
child, ok := d.childMap[name]
d.dirMu.RUnlock()
if ok {
child.IncRef()
d.dirMu.RUnlock()
return child, nil
}
d.dirMu.RUnlock()

// Slow path, create a new dentry.
d.dirMu.Lock()
defer d.dirMu.Unlock()
if child, ok := d.childMap[name]; ok {
child.IncRef()
return child, nil
}

Expand All @@ -117,7 +121,18 @@ func (d *dentry) lookup(ctx context.Context, name string) (*dentry, error) {
}
child.parent.Store(d)
child.name = name

// newDentry returned refs=1, that's the parent's ref via childMap.
d.childMap[name] = child

// Return another ref to caller.
child.IncRef()

// Parent is no longer a leaf, remove from cache. We use uncacheSelf()
// instead of checkCaching() because we hold d.dirMu and checkCaching()
// tries to acquire d.dirMu.RLock().
d.uncacheSelf()

return child, nil
}

Expand Down
173 changes: 157 additions & 16 deletions pkg/sentry/fsimpl/erofs/erofs.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ const (
// +stateify savable
type FilesystemType struct{}

const defaultMaxCachedDentries = 1000

// filesystem implements vfs.FilesystemImpl.
//
// +stateify savable
Expand Down Expand Up @@ -76,6 +78,13 @@ type filesystem struct {

// ancestryMu is required by genericfstree.
ancestryMu sync.RWMutex `state:"nosave"`

cacheMu sync.Mutex `state:"nosave"`
// +checklocks:cacheMu
cachedDentries dentryList
// +checklocks:cacheMu
cachedDentriesLen uint64
maxCachedDentries uint64
}

// InternalFilesystemOptions may be passed as
Expand Down Expand Up @@ -128,11 +137,12 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}

fs := &filesystem{
mopts: opts.Data,
iopts: iopts,
image: image,
devMinor: devMinor,
mf: imageMemmapFile{image: image},
mopts: opts.Data,
iopts: iopts,
image: image,
devMinor: devMinor,
mf: imageMemmapFile{image: image},
maxCachedDentries: defaultMaxCachedDentries,
}
fs.vfsfs.Init(vfsObj, &fstype, fs)
cu.Add(func() { fs.vfsfs.DecRef(ctx) })
Expand Down Expand Up @@ -359,8 +369,7 @@ func (i *inode) fileType() uint16 {

// dentry implements vfs.DentryImpl.
//
// The filesystem is read-only and currently we never drop the cached dentries
// until the filesystem is unmounted. The reference model works like this:
// The reference model works like this:
//
// - The initial reference count of each dentry is one, which is the reference
// held by the parent (so when the reference count is one, it also means that
Expand All @@ -372,8 +381,9 @@ func (i *inode) fileType() uint16 {
// - The reference count of root dentry is two. One reference is returned to
// the caller of `GetFilesystem()`, and the other is held by `fs`.
//
// TODO: This can lead to unbounded memory growth in sentry due to the ever-growing
// dentry tree. We should have a dentry LRU cache, similar to what fsimpl/gofer does.
// - Dentries with refs==1 and no children are placed on an LRU cache list.
// When the cache is full, the least recently used dentry is evicted by
// removing it from its parent's childMap and DecRef'ing it.
//
// +stateify savable
type dentry struct {
Expand All @@ -400,6 +410,12 @@ type dentry struct {
// dentry represents a directory.
// +checklocks:dirMu
childMap map[string]*dentry

// cached indicates whether this dentry is on the fs.cachedDentries LRU list.
// +checklocks:inode.fs.cacheMu
cached bool

dentryEntry
}

// The caller is expected to handle dentry insertion into dentry tree.
Expand All @@ -418,15 +434,140 @@ func (fs *filesystem) newDentry(nid uint64) (*dentry, error) {

// DecRef implements vfs.DentryImpl.DecRef.
func (d *dentry) DecRef(ctx context.Context) {
destroyed := false
d.dentryRefs.DecRef(func() {
d.dirMu.Lock()
for _, c := range d.childMap {
c.DecRef(ctx)
}
d.childMap = nil
d.dirMu.Unlock()
d.inode.DecRef(ctx)
destroyed = true
d.destroy(ctx)
})
if !destroyed {
d.checkCaching(ctx)
}
}

// checkCaching evaluates whether d should be on the LRU cache list.
// A dentry is cache-eligible when refs == 1 (only the parent's reference via
// childMap) and it has no children. Called after every DecRef that does not
// destroy d.
//
// Lock ordering: dirMu before cacheMu.
func (d *dentry) checkCaching(ctx context.Context) {
fs := d.inode.fs

// Hold dirMu.RLock across cacheMu so that hasChildren and refs are
// read atomically. Without this, a concurrent lookup could add a child
// between reading hasChildren and locking cacheMu, causing a dentry
// with children to incorrectly enter the cache.
d.dirMu.RLock()
fs.cacheMu.Lock()

hasChildren := len(d.childMap) > 0
refs := d.ReadRefs()
if refs <= 0 {
fs.cacheMu.Unlock()
d.dirMu.RUnlock()
return
}

// Not eligible for caching — remove from cache if it was there.
if refs > 1 || hasChildren {
if d.cached {
fs.cachedDentries.Remove(d)
fs.cachedDentriesLen--
d.cached = false
}
fs.cacheMu.Unlock()
d.dirMu.RUnlock()
return
}

// Already cached — move to front (most recently used).
if d.cached {
fs.cachedDentries.Remove(d)
fs.cachedDentries.PushFront(d)
fs.cacheMu.Unlock()
d.dirMu.RUnlock()
return
}

// Add to cache.
fs.cachedDentries.PushFront(d)
fs.cachedDentriesLen++
d.cached = true
shouldEvict := fs.cachedDentriesLen > fs.maxCachedDentries
fs.cacheMu.Unlock()
d.dirMu.RUnlock()

if shouldEvict {
fs.evictLRUDentry(ctx)
}
}

// uncacheSelf removes d from the LRU cache list without severing d from the
// dentry tree (d stays in its parent's childMap). Used when d is no longer
// cache-eligible because it just gained a child.
//
// Precondition: the caller holds d.dirMu.
func (d *dentry) uncacheSelf() {
fs := d.inode.fs
fs.cacheMu.Lock()
if d.cached {
fs.cachedDentries.Remove(d)
fs.cachedDentriesLen--
d.cached = false
}
fs.cacheMu.Unlock()
}

// evictLRUDentry removes the least recently used dentry from the cache and
// severs it from the dentry tree by deleting it from its parent's childMap and
// dropping the parent's reference.
func (fs *filesystem) evictLRUDentry(ctx context.Context) {
// Pop the least recently used dentry from the cache.
fs.cacheMu.Lock()
victim := fs.cachedDentries.Back()
if victim == nil {
fs.cacheMu.Unlock()
return
}
fs.cachedDentries.Remove(victim)
fs.cachedDentriesLen--
victim.cached = false // +checklocksforce: fs == victim.inode.fs.
fs.cacheMu.Unlock()

parent := victim.parent.Load()
if parent == nil {
return
}

// Revalidate under both locks (lock ordering: dirMu before cacheMu).
// Between removing victim from the cache and acquiring these locks, a
// concurrent lookup may have taken and released a ref on victim, causing
// checkCaching to re-add it to the LRU. A concurrent lookup may also
// still be holding an extra ref. In either case, abort the eviction.
parent.dirMu.Lock()
fs.cacheMu.Lock()
if victim.cached || victim.ReadRefs() > 1 {
fs.cacheMu.Unlock()
parent.dirMu.Unlock()
return
}
fs.cacheMu.Unlock()

// Sever victim from the dentry tree and destroy it.
delete(parent.childMap, victim.name)
parent.dirMu.Unlock()
victim.DecRef(ctx)
parent.checkCaching(ctx)
}

func (d *dentry) destroy(ctx context.Context) {
d.dirMu.Lock()
for _, child := range d.childMap {
child.DecRef(ctx)
}
d.childMap = nil
d.dirMu.Unlock()
d.inode.DecRef(ctx)
}

// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
Expand Down
Loading
Loading