-
Notifications
You must be signed in to change notification settings - Fork 51
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Local disk cache: part 1 #530
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") | ||
|
||
go_library( | ||
name = "diskcache", | ||
srcs = [ | ||
"diskcache.go", | ||
"sys_darwin.go", | ||
"sys_linux.go", | ||
"sys_windows.go", | ||
], | ||
importpath = "github.com/bazelbuild/remote-apis-sdks/go/pkg/diskcache", | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
"//go/pkg/digest", | ||
"@com_github_bazelbuild_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", | ||
"@com_github_golang_glog//:go_default_library", | ||
"@org_golang_google_protobuf//proto:go_default_library", | ||
], | ||
) | ||
|
||
go_test( | ||
name = "diskcache_test", | ||
srcs = ["diskcache_test.go"], | ||
embed = [":diskcache"], | ||
deps = [ | ||
"//go/pkg/digest", | ||
"//go/pkg/testutil", | ||
"@com_github_bazelbuild_remote_apis//build/bazel/remote/execution/v2:remote_execution_go_proto", | ||
"@com_github_google_go_cmp//cmp:go_default_library", | ||
"@com_github_pborman_uuid//:go_default_library", | ||
"@org_golang_x_sync//errgroup:go_default_library", | ||
], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,323 @@ | ||
// Package diskcache implements a local disk LRU CAS cache. | ||
package diskcache | ||
|
||
import ( | ||
"container/heap" | ||
"context" | ||
"fmt" | ||
"io" | ||
"io/fs" | ||
"os" | ||
"path/filepath" | ||
"strconv" | ||
"strings" | ||
"sync" | ||
"sync/atomic" | ||
"time" | ||
|
||
"github.com/bazelbuild/remote-apis-sdks/go/pkg/digest" | ||
log "github.com/golang/glog" | ||
) | ||
|
||
type key struct { | ||
digest digest.Digest | ||
} | ||
|
||
// An qitem is something we manage in a priority queue. | ||
type qitem struct { | ||
key key | ||
lat time.Time // The last accessed time of the file. | ||
index int // The index of the item in the heap. | ||
mu sync.RWMutex // Protects the data-structure consistency for the given digest. | ||
} | ||
|
||
// A priorityQueue implements heap.Interface and holds qitems. | ||
type priorityQueue struct { | ||
items []*qitem | ||
n int | ||
} | ||
|
||
func (q *priorityQueue) Len() int { | ||
return q.n | ||
} | ||
|
||
func (q *priorityQueue) Less(i, j int) bool { | ||
// We want Pop to give us the oldest item. | ||
return q.items[i].lat.Before(q.items[j].lat) | ||
} | ||
|
||
func (q priorityQueue) Swap(i, j int) { | ||
q.items[i], q.items[j] = q.items[j], q.items[i] | ||
q.items[i].index = i | ||
q.items[j].index = j | ||
} | ||
|
||
func (q *priorityQueue) Push(x any) { | ||
if q.n == cap(q.items) { | ||
// Resize the queue | ||
old := q.items | ||
q.items = make([]*qitem, 2*cap(old)) // Initial capacity needs to be > 0. | ||
copy(q.items, old) | ||
} | ||
item := x.(*qitem) | ||
item.index = q.n | ||
q.items[item.index] = item | ||
q.n++ | ||
} | ||
|
||
func (q *priorityQueue) Pop() any { | ||
item := q.items[q.n-1] | ||
q.items[q.n-1] = nil // avoid memory leak | ||
item.index = -1 // for safety | ||
q.n-- | ||
return item | ||
} | ||
|
||
// bumps item to the head of the queue. | ||
func (q *priorityQueue) Bump(item *qitem) { | ||
// Sanity check, necessary because of possible racing between Bump and GC: | ||
if item.index < 0 || item.index >= q.n || q.items[item.index].key != item.key { | ||
return | ||
} | ||
item.lat = time.Now() | ||
heap.Fix(q, item.index) | ||
} | ||
|
||
const maxConcurrentRequests = 1000 | ||
|
||
// DiskCache is a local disk LRU CAS and Action Cache cache. | ||
type DiskCache struct { | ||
root string // path to the root directory of the disk cache. | ||
maxCapacityBytes uint64 // if disk size exceeds this, old items will be evicted as needed. | ||
mu sync.Mutex // protects the queue. | ||
store sync.Map // map of keys to qitems. | ||
queue *priorityQueue // keys by last accessed time. | ||
sizeBytes int64 // total size. | ||
ctx context.Context | ||
shutdown chan bool | ||
gcTick uint64 | ||
gcReq chan uint64 | ||
testGcTicks chan uint64 | ||
} | ||
|
||
func New(ctx context.Context, root string, maxCapacityBytes uint64) *DiskCache { | ||
res := &DiskCache{ | ||
root: root, | ||
maxCapacityBytes: maxCapacityBytes, | ||
ctx: ctx, | ||
queue: &priorityQueue{ | ||
items: make([]*qitem, 1000), | ||
}, | ||
gcReq: make(chan uint64, maxConcurrentRequests), | ||
shutdown: make(chan bool), | ||
} | ||
heap.Init(res.queue) | ||
_ = os.MkdirAll(root, os.ModePerm) | ||
// We use Git's directory/file naming structure as inspiration: | ||
// https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#:~:text=The%20subdirectory%20is%20named%20with%20the%20first%202%20characters%20of%20the%20SHA%2D1%2C%20and%20the%20filename%20is%20the%20remaining%2038%20characters. | ||
var wg sync.WaitGroup | ||
wg.Add(256) | ||
for i := 0; i < 256; i++ { | ||
prefixDir := filepath.Join(root, fmt.Sprintf("%02x", i)) | ||
go func() { | ||
defer wg.Done() | ||
_ = os.MkdirAll(prefixDir, os.ModePerm) | ||
_ = filepath.WalkDir(prefixDir, func(path string, d fs.DirEntry, err error) error { | ||
// We log and continue on all errors, because cache read errors are not critical. | ||
if err != nil { | ||
log.Errorf("Error reading cache directory: %v", err) | ||
return nil | ||
} | ||
if d.IsDir() { | ||
return nil | ||
} | ||
subdir := filepath.Base(filepath.Dir(path)) | ||
k, err := res.getKeyFromFileName(subdir + d.Name()) | ||
if err != nil { | ||
log.Errorf("Error parsing cached file name %s: %v", path, err) | ||
return nil | ||
} | ||
atime, err := GetLastAccessTime(path) | ||
if err != nil { | ||
log.Errorf("Error getting last accessed time of %s: %v", path, err) | ||
return nil | ||
} | ||
it := &qitem{ | ||
key: k, | ||
lat: atime, | ||
} | ||
size, err := res.getItemSize(k) | ||
if err != nil { | ||
log.Errorf("Error getting file size of %s: %v", path, err) | ||
return nil | ||
} | ||
res.store.Store(k, it) | ||
atomic.AddInt64(&res.sizeBytes, size) | ||
res.mu.Lock() | ||
heap.Push(res.queue, it) | ||
res.mu.Unlock() | ||
return nil | ||
}) | ||
}() | ||
} | ||
wg.Wait() | ||
go res.gc() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think gc should only be called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I understand -- gc is a daemon thread, it blocks on the GC request channel (gcReq) and only works if there are things to do (total size is over capacity). And we only send a GC request to the channel if we reach capacity. The current line only starts the daemon thread. Did you mean changes the gc thread itself, or to places where a GC request is added to the channel? |
||
return res | ||
} | ||
|
||
func (d *DiskCache) getItemSize(k key) (int64, error) { | ||
return k.digest.Size, nil | ||
} | ||
|
||
// Releases resources and terminates the GC daemon. Should be the last call to the DiskCache. | ||
func (d *DiskCache) Shutdown() { | ||
d.shutdown <- true | ||
} | ||
|
||
func (d *DiskCache) TotalSizeBytes() uint64 { | ||
return uint64(atomic.LoadInt64(&d.sizeBytes)) | ||
} | ||
|
||
func (d *DiskCache) getKeyFromFileName(fname string) (key, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't need to be a member function. |
||
pair := strings.Split(fname, ".") | ||
if len(pair) != 2 { | ||
return key{}, fmt.Errorf("expected file name in the form [ac_]hash/size, got %s", fname) | ||
} | ||
size, err := strconv.ParseInt(pair[1], 10, 64) | ||
if err != nil { | ||
return key{}, fmt.Errorf("invalid size in digest %s: %s", fname, err) | ||
} | ||
dg, err := digest.New(pair[0], size) | ||
if err != nil { | ||
return key{}, fmt.Errorf("invalid digest from file name %s: %v", fname, err) | ||
} | ||
return key{digest: dg}, nil | ||
} | ||
|
||
func (d *DiskCache) getPath(k key) string { | ||
return filepath.Join(d.root, k.digest.Hash[:2], fmt.Sprintf("%s.%d", k.digest.Hash[2:], k.digest.Size)) | ||
} | ||
|
||
func (d *DiskCache) StoreCas(dg digest.Digest, path string) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think generalizing this method to
We should also document that the cache does not enforce any relationship between |
||
if dg.Size > int64(d.maxCapacityBytes) { | ||
return fmt.Errorf("blob size %d exceeds DiskCache capacity %d", dg.Size, d.maxCapacityBytes) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A more graceful behavior is to try to trigger a synchronous GC first before erroring out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why? This will fail for sure regardless of GC. Note that we compare with |
||
} | ||
it := &qitem{ | ||
key: key{digest: dg}, | ||
lat: time.Now(), | ||
} | ||
it.mu.Lock() | ||
defer it.mu.Unlock() | ||
_, exists := d.store.LoadOrStore(it.key, it) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. d.store is already synchronized by it.mu. I'd vote to make store a normal map[digest]*qitem instead; it would be faster. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, no,
(from https://pkg.go.dev/sync#Map) We have a classical case 1 here, where almost always a key is only written once but read many times from concurrent threads. A sync map should greatly reduce lock contention. |
||
if exists { | ||
return nil | ||
} | ||
d.mu.Lock() | ||
heap.Push(d.queue, it) | ||
d.mu.Unlock() | ||
if err := copyFile(path, d.getPath(it.key), dg.Size); err != nil { | ||
return err | ||
} | ||
newSize := uint64(atomic.AddInt64(&d.sizeBytes, dg.Size)) | ||
if newSize > d.maxCapacityBytes { | ||
select { | ||
case d.gcReq <- atomic.AddUint64(&d.gcTick, 1): | ||
default: | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (d *DiskCache) gc() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this implementation may have the following issues:
The capacity issue can be mitigated by evicting a percentage of the cache rather than just enough to meet the threshold. This accommodates several new items before triggering another eviction. The synchronization issue can be traded off with a simpler implementation that uses a single lock. Updating the list and the map should be an atomic operation in which the entire list is shared and therefore is the bottleneck. Actually, hits are bounded by IO anyways. Misses on the other hand can be faster with less locking. Then again, misses are always followed by updates which are bounded by IO. I think IO is more of a bottleneck than synchronization so it's worth going for a simple locking flow. How bad is it if eviction events were blocking? I think if eviction saturates IO bandwidth it should be the same whether it's async or sync. Maybe blocking would be faster if we consider disk locality. I also think that most users would prefer using a large cache to avoid eviction anyways. Without a background eviction task, the cache would be simpler to work with. |
||
for { | ||
select { | ||
case <-d.shutdown: | ||
return | ||
case <-d.ctx.Done(): | ||
return | ||
case t := <-d.gcReq: | ||
// Evict old entries until total size is below cap. | ||
for uint64(atomic.LoadInt64(&d.sizeBytes)) > d.maxCapacityBytes { | ||
d.mu.Lock() | ||
it := heap.Pop(d.queue).(*qitem) | ||
d.mu.Unlock() | ||
size, err := d.getItemSize(it.key) | ||
if err != nil { | ||
log.Errorf("error getting item size for %v: %v", it.key, err) | ||
size = 0 | ||
} | ||
atomic.AddInt64(&d.sizeBytes, -size) | ||
it.mu.Lock() | ||
// We only delete the files, and not the prefix directories, because the prefixes are not worth worrying about. | ||
if err := os.Remove(d.getPath(it.key)); err != nil { | ||
log.Errorf("Error removing file: %v", err) | ||
} | ||
d.store.Delete(it.key) | ||
it.mu.Unlock() | ||
} | ||
if d.testGcTicks != nil { | ||
select { | ||
case d.testGcTicks <- t: | ||
default: | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Copy file contents retaining the source permissions. | ||
func copyFile(src, dst string, size int64) error { | ||
srcInfo, err := os.Stat(src) | ||
if err != nil { | ||
return err | ||
} | ||
in, err := os.Open(src) | ||
if err != nil { | ||
return err | ||
} | ||
defer in.Close() | ||
out, err := os.Create(dst) | ||
if err != nil { | ||
return err | ||
} | ||
if err := out.Chmod(srcInfo.Mode()); err != nil { | ||
return err | ||
} | ||
defer out.Close() | ||
_, err = io.Copy(out, in) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we move the file copy function into a separate source (similar to
CloneFile utils can be found here as an example: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great idea, thank you! I didn't know of clonefile. I renamed the system-specific files to |
||
if err != nil { | ||
return err | ||
} | ||
// Required sanity check: sometimes the copy pretends to succeed, but doesn't, if | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you expand on that? That's news to me. Or you mean in the general case of a race condition? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general, if we write to a file while concurrently deleting it, the Copy might not return an error (even if no bytes were actually copied), and neither the Stat. But the Stat will return different size than expected (usually, 0). |
||
// the file is being concurrently deleted. | ||
dstInfo, err := os.Stat(dst) | ||
if err != nil { | ||
return err | ||
} | ||
if dstInfo.Size() != size { | ||
return fmt.Errorf("copy of %s to %s failed: src/dst size mismatch: wanted %d, got %d", src, dst, size, dstInfo.Size()) | ||
} | ||
return nil | ||
} | ||
|
||
// If the digest exists in the disk cache, copy the file contents to the given path. | ||
func (d *DiskCache) LoadCas(dg digest.Digest, path string) bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd propose to rename from LoadCas to CopyFromCache There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I realize the sister function is called StoreCas. I don't know if it's a nomenclature reused in this code base, if so ignore this comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, Load* and Store* are consistent with sync.Map's naming, we also sometimes use Get/Update. I'll leave the naming decisions to @mrahs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the cache can have three main methods: putting bytes, getting bytes, copying bytes to a file path. Since the cache deals with bytes, I'm inclined towards |
||
k := key{digest: dg} | ||
iUntyped, loaded := d.store.Load(k) | ||
if !loaded { | ||
return false | ||
} | ||
it := iUntyped.(*qitem) | ||
it.mu.RLock() | ||
if err := copyFile(d.getPath(k), path, dg.Size); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This simplifies the control flow:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, thank you. |
||
// It is not possible to prevent a race with GC; hence, we return false on copy errors. | ||
it.mu.RUnlock() | ||
return false | ||
} | ||
it.mu.RUnlock() | ||
|
||
d.mu.Lock() | ||
d.queue.Bump(it) | ||
d.mu.Unlock() | ||
return true | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are extreme cases where this could happen (e.g. disk full). I'd recommend to use https://pkg.go.dev/golang.org/x/sync/errgroup#WithContext for error management instead of logging as a silent failure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, basically my main problem here was I didn't want to return an error from New, because that would mean that the client Opt can return an error on Apply, and the code currently doesn't support that.
To be fair, that is the right thing to do, imo -- options can, in general, fail. So I'd propose a separate independent PR to add an
error
return type to Apply, and then I can propagate all the errors properly from here. @mrahs does that SGTY?Another idea (pushing the problem further down the line) would be to return an error from New now, but then log + ignore it in Apply. Which is what I'll do now, until @mrahs weighs in with whether he's okay with me changing the Apply type.
Thank you!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd push error handling on the user by making the
client.Opt
accept a*DiskCache
instance.