From c9692410f388d5227a544d8d136dba34f1f63126 Mon Sep 17 00:00:00 2001 From: sumeerbhola Date: Fri, 10 May 2024 11:09:07 -0400 Subject: [PATCH] objstorageprovider: small readahead improvements - The limit was not being updated in certain cases, which delayed when readahead would occur. This is fixed, and new test case exercises this. - The code in recordCacheHit is almost identical to the one in maybeReadahead and the main comprehension challenge of the code is the various predicates (and their code comments with examples), which are repeated (without the examples). These are now merged into a single helper method. - A small side-effect of the previous change is that numReads is incremented on a cache hit when we are already above the threshold. This has no actual effect on when readahead will happen. And arguably, this new behavior is more principled. - One of the test cases had a missing - and was not being exercised, and resulting in an incorrect error in a later test case. This is fixed. - There is additional invariant documentation. --- objstorage/objstorageprovider/readahead.go | 85 +++++++++---------- .../objstorageprovider/readahead_test.go | 4 +- .../objstorageprovider/testdata/readahead | 75 +++++++++++----- 3 files changed, 99 insertions(+), 65 deletions(-) diff --git a/objstorage/objstorageprovider/readahead.go b/objstorage/objstorageprovider/readahead.go index e07017c909..e29155d88a 100644 --- a/objstorage/objstorageprovider/readahead.go +++ b/objstorage/objstorageprovider/readahead.go @@ -40,40 +40,7 @@ func makeReadaheadState(maxReadaheadSize int64) readaheadState { } func (rs *readaheadState) recordCacheHit(offset, blockLength int64) { - currentReadEnd := offset + blockLength - if rs.numReads >= minFileReadsForReadahead { - if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { - // This is a read that would have resulted in a readahead, had it - // not been a cache hit. - rs.limit = currentReadEnd - return - } - if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { - // We read too far away from rs.limit to benefit from readahead in - // any scenario. Reset all variables. - rs.numReads = 1 - rs.limit = currentReadEnd - rs.size = initialReadaheadSize - rs.prevSize = 0 - return - } - // Reads in the range [rs.limit - rs.prevSize, rs.limit] end up - // here. This is a read that is potentially benefitting from a past - // readahead. - return - } - if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { - // Blocks are being read sequentially and would benefit from readahead - // down the line. - rs.numReads++ - return - } - // We read too far ahead of the last read, or before it. This indicates - // a random read, where readahead is not desirable. Reset all variables. - rs.numReads = 1 - rs.limit = currentReadEnd - rs.size = initialReadaheadSize - rs.prevSize = 0 + _ = rs.maybeReadaheadOrCacheHit(offset, blockLength, false) } // maybeReadahead updates state and determines whether to issue a readahead / @@ -81,6 +48,13 @@ func (rs *readaheadState) recordCacheHit(offset, blockLength int64) { // Returns a size value (greater than 0) that should be prefetched if readahead // would be beneficial. func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { + return rs.maybeReadaheadOrCacheHit(offset, blockLength, true) +} + +// The return value should be ignored if !readahead. +func (rs *readaheadState) maybeReadaheadOrCacheHit( + offset, blockLength int64, readahead bool, +) int64 { currentReadEnd := offset + blockLength if rs.numReads >= minFileReadsForReadahead { // The minimum threshold of sequential reads to justify reading ahead @@ -119,18 +93,22 @@ func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { // // rs.numReads++ - rs.limit = offset + rs.size - rs.prevSize = rs.size - // Increase rs.size for the next read. - rs.size *= 2 - if rs.size > rs.maxReadaheadSize { - rs.size = rs.maxReadaheadSize + if readahead { + rs.limit = offset + rs.size + rs.prevSize = rs.size + // Increase rs.size for the next read. + rs.size *= 2 + if rs.size > rs.maxReadaheadSize { + rs.size = rs.maxReadaheadSize + } + } else { + // This is a read that would have resulted in a readahead, had it + // not been a cache hit. + rs.limit = currentReadEnd } return rs.prevSize } if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { - // The above conditional has rs.limit > rs.prevSize to confirm that - // rs.limit - rs.prevSize would not underflow. // We read too far away from rs.limit to benefit from readahead in // any scenario. Reset all variables. // The case where we read too far ahead: @@ -156,7 +134,16 @@ func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { return 0 } - // Reads in the range [rs.limit - rs.prevSize, rs.limit] end up + // The previous if-block predicates were all false. This mechanically implies: + // + // INVARIANT: + // !(currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize) && + // !(currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize) + // Which mechanically simplifies to: + // currentReadEnd < rs.limit && currentReadEnd >= rs.limit-rs.prevSize && + // offset <= rs.limit+rs.maxReadaheadSize + // + // So reads in the range [rs.limit - rs.prevSize, rs.limit] end up // here. This is a read that is potentially benefitting from a past // readahead, but there's no reason to issue a readahead call at the // moment. @@ -171,6 +158,8 @@ func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { rs.numReads++ return 0 } + // Not yet at the numReads threshold to justify readahead. But we want to + // capture whether readahead will be beneficial in the future. if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { // Blocks are being read sequentially and would benefit from readahead // down the line. @@ -182,6 +171,16 @@ func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { // offset currentReadEnd // rs.numReads++ + // It is possible to fall here when rs.limit has not been initialized. If + // we don't initialize, rs.limit, it is possible that the first read + // offset was at rs.limit+rs.maxReadaheadSize-delta and the enclosing + // if-block predicate was true, and the next read is sequential but has + // offset > rs.limit+rs.maxReadaheadSize (if we left rs.limit at 0), and + // the enclosing if-block predicate will be false and we will incorrectly + // think that readahead is not beneficial. The same issue arises if + // rs.limit has been initialized and currentReadEnd is advancing past + // rs.limit. + rs.limit = currentReadEnd return 0 } // We read too far ahead of the last read, or before it. This indicates diff --git a/objstorage/objstorageprovider/readahead_test.go b/objstorage/objstorageprovider/readahead_test.go index a44d78ac58..86aff7b52e 100644 --- a/objstorage/objstorageprovider/readahead_test.go +++ b/objstorage/objstorageprovider/readahead_test.go @@ -20,9 +20,7 @@ func TestMaybeReadahead(t *testing.T) { cacheHit := false switch d.Cmd { case "reset": - rs.size = initialReadaheadSize - rs.limit = 0 - rs.numReads = 0 + rs = makeReadaheadState(rs.maxReadaheadSize) return "" case "cache-read": diff --git a/objstorage/objstorageprovider/testdata/readahead b/objstorage/objstorageprovider/testdata/readahead index d2bb217314..adfe4ec610 100644 --- a/objstorage/objstorageprovider/testdata/readahead +++ b/objstorage/objstorageprovider/testdata/readahead @@ -8,7 +8,7 @@ readahead: 0 numReads: 1 size: 65536 prevSize: 0 -limit: 0 +limit: 2064 read 2096, 16 @@ -17,7 +17,7 @@ readahead: 0 numReads: 2 size: 65536 prevSize: 0 -limit: 0 +limit: 2112 read 2112, 16 @@ -84,7 +84,7 @@ readahead: 0 numReads: 2 size: 65536 prevSize: 0 -limit: 16208 +limit: 16209 # The next read is too far ahead to benefit from readahead # (i.e. 540497 > 16208 (limit) + (512 << 10) (maxReadaheadSize)) @@ -126,45 +126,49 @@ readahead: 0 numReads: 2 size: 65536 prevSize: 0 -limit: 16 +limit: 7796 read -7680, 16 +7800, 16 ---- readahead: 65536 numReads: 3 size: 131072 prevSize: 65536 -limit: 73216 +limit: 73336 read -7780, 16 ---- +7880, 16 +---- readahead: 0 numReads: 4 size: 131072 prevSize: 65536 -limit: 73216 +limit: 73336 read -7880, 16 +7980, 16 ---- -expected 2 args: offset, size +readahead: 0 +numReads: 5 +size: 131072 +prevSize: 65536 +limit: 73336 read -7980, 16 +8080, 16 ---- readahead: 0 -numReads: 4 +numReads: 6 size: 131072 prevSize: 65536 -limit: 73216 +limit: 73336 read 73416, 16 ---- readahead: 131072 -numReads: 5 +numReads: 7 size: 262144 prevSize: 131072 limit: 204488 @@ -173,7 +177,7 @@ read 204488, 16 ---- readahead: 262144 -numReads: 6 +numReads: 8 size: 262144 prevSize: 262144 limit: 466632 @@ -184,7 +188,7 @@ read 466632, 16 ---- readahead: 262144 -numReads: 7 +numReads: 9 size: 262144 prevSize: 262144 limit: 728776 @@ -195,7 +199,7 @@ cache-read 728770, 16 ---- readahead: 0 -numReads: 7 +numReads: 10 size: 262144 prevSize: 262144 limit: 728786 @@ -204,7 +208,7 @@ read 728780, 16 ---- readahead: 262144 -numReads: 8 +numReads: 11 size: 262144 prevSize: 262144 limit: 990924 @@ -219,3 +223,36 @@ numReads: 1 size: 65536 prevSize: 0 limit: 1216 + +reset +---- + +# Offset 240KiB, length 32KiB. +read +245760, 32768 +---- +readahead: 0 +numReads: 1 +size: 65536 +prevSize: 0 +limit: 278528 + +# Offset 280KiB, length 32KiB. +read +286720, 32768 +---- +readahead: 0 +numReads: 2 +size: 65536 +prevSize: 0 +limit: 319488 + +# Offset 320KiB, length 32KiB. +read +327680, 32768 +---- +readahead: 65536 +numReads: 3 +size: 131072 +prevSize: 65536 +limit: 393216