diff --git a/storage/watchable_store.go b/storage/watchable_store.go index ebad2a64de9f..275cdfe4cee1 100644 --- a/storage/watchable_store.go +++ b/storage/watchable_store.go @@ -17,6 +17,7 @@ package storage import ( "fmt" "log" + "math" "sync" "time" @@ -174,6 +175,7 @@ func (s *watchableStore) NewWatcher() Watcher { } func (s *watchableStore) watch(key []byte, prefix bool, startRev, id int64, ch chan<- storagepb.Event) (*watching, CancelFunc) { + s.mu.Lock() defer s.mu.Unlock() @@ -241,57 +243,133 @@ func (s *watchableStore) syncWatchingsLoop() { } } -// syncWatchings syncs the watchings in the unsyncd map. -func (s *watchableStore) syncWatchings() { +// rangeUnsynced ranges on all unsynced watchings and returns all +// revision bytes, key-value pairs and next revision. +func (s *watchableStore) rangeUnsynced() ([][]byte, []storagepb.KeyValue, int64, map[string]map[*watching]struct{}, error) { + // get minimum and maximum revision to get all key-value pairs in unsynced + minRev, maxRev := int64(math.MaxInt64), int64(math.MinInt64) + keyToWatchings := make(map[string]map[*watching]struct{}) _, curRev, _ := s.store.Range(nil, nil, 0, 0) for w := range s.unsynced { - var end []byte - if w.prefix { - end = make([]byte, len(w.key)) - copy(end, w.key) - end[len(w.key)-1]++ - } - limit := cap(w.ch) - len(w.ch) - // the channel is full, try it in the next round - if limit == 0 { + if w.cur > 0 && w.cur <= s.store.compactMainRev { + // TODO: handle this ErrCompacted error + log.Printf("storage: %v", ErrCompacted) + delete(s.unsynced, w) continue } - revbs, kvs, nextRev, err := s.store.RangeHistory(w.key, end, int64(limit), w.cur) - if err != nil { - // TODO: send error event to watching + if w.cur > s.store.currentRev.main { + // TODO: handle this ErrFutureRev error + log.Printf("storage: %v", ErrFutureRev) delete(s.unsynced, w) continue } - // push events to the channel - for i, kv := range kvs { - var evt storagepb.Event_EventType - switch { - case isTombstone(revbs[i]): - evt = storagepb.DELETE - default: - evt = storagepb.PUT + k := string(w.key) + + if w.cur > curRev { + if err := unsafeAddWatching(&s.synced, k, w); err != nil { + log.Panicf("error unsafeAddWatching (%v) for key %s", err, k) } + delete(s.unsynced, w) + continue + } + + if minRev >= w.cur { + minRev = w.cur + } + if maxRev <= w.cur { + maxRev = w.cur + } + + if v, ok := keyToWatchings[k]; !ok { + keyToWatchings[k] = make(map[*watching]struct{}) + keyToWatchings[k][w] = struct{}{} + } else { + v[w] = struct{}{} + } + } + + // to prevent returning nothing + if minRev == maxRev { + maxRev++ + } + + min, max := newRevBytes(), newRevBytes() + revToBytes(revision{main: minRev}, min) + revToBytes(revision{main: maxRev, sub: maxRev}, max) + + s.store.mu.Lock() + defer s.store.mu.Unlock() + + // UnsafeRange returns keys and values. And in boltdb, keys are revisions. + tx := s.store.b.BatchTx() + tx.Lock() + ks, vs := tx.UnsafeRange(keyBucketName, min, max, 0) + tx.Unlock() + + revbs := [][]byte{} + kvs := []storagepb.KeyValue{} + for i, v := range vs { + var kv storagepb.KeyValue + if err := kv.Unmarshal(v); err != nil { + return nil, nil, 0, nil, fmt.Errorf("storage: cannot unmarshal event: %v", err) + } + if _, ok := keyToWatchings[string(kv.Key)]; !ok { + continue + } + revbs = append(revbs, ks[i]) + kvs = append(kvs, kv) + } + + return revbs, kvs, s.store.currentRev.main + 1, keyToWatchings, nil +} + +// syncWatchings syncs the watchings in the unsyncd map. +func (s *watchableStore) syncWatchings() { + + if len(s.unsynced) == 0 { + return + } + + revbs, kvs, _, keyToWatchings, err := s.rangeUnsynced() + if err != nil { + log.Panicf("error s.rangeUnsynced: %v", err) + } + // push events to the channel + for i, kv := range kvs { + var evt storagepb.Event_EventType + switch { + case isTombstone(revbs[i]): + evt = storagepb.DELETE + default: + evt = storagepb.PUT + } + + k := string(kv.Key) + + wm := make(map[*watching]struct{}) + if v, ok := keyToWatchings[k]; ok { + wm = v + } else { + continue // in synced + } + + for w := range wm { w.ch <- storagepb.Event{ Type: evt, Kv: &kv, WatchID: w.id, } - pendingEventsGauge.Inc() - } - // switch to tracking future events if needed - if nextRev > curRev { - k := string(w.key) if err := unsafeAddWatching(&s.synced, k, w); err != nil { log.Panicf("error unsafeAddWatching (%v) for key %s", err, k) } delete(s.unsynced, w) - continue + + pendingEventsGauge.Inc() } - // put it back to try it in the next round - w.cur = nextRev } + slowWatchingGauge.Set(float64(len(s.unsynced))) } diff --git a/storage/watchable_store_bench_test.go b/storage/watchable_store_bench_test.go index 92b8273a4ff0..9d82ed8627ab 100644 --- a/storage/watchable_store_bench_test.go +++ b/storage/watchable_store_bench_test.go @@ -20,6 +20,43 @@ import ( "testing" ) +// Benchmarks on cancel function performance for synced watchers. +func BenchmarkWatchableStoreSyncedCancel(b *testing.B) { + // arbitrary number of watchers + watcherN := 1000000 + + s := newWatchableStore(tmpPath) + defer func() { + s.store.Close() + os.Remove(tmpPath) + }() + + // Put a key so that we can spawn watchers on that key + testKey := []byte("foo") + testValue := []byte("bar") + s.Put(testKey, testValue) + + w := s.NewWatcher() + + cancels := make([]CancelFunc, watcherN) + for i := 0; i < watcherN; i++ { + // 0 for startRev to keep watchers in synced + _, cancel := w.Watch(testKey, true, 0) + cancels[i] = cancel + } + + // randomly cancel watchers to make it not biased towards + // data structures with an order, such as slice. + ix := rand.Perm(watcherN) + + b.ResetTimer() + b.ReportAllocs() + + for _, idx := range ix { + cancels[idx]() + } +} + // Benchmarks on cancel function performance for unsynced watchers // in a WatchableStore. It creates k*N watchers to populate unsynced // with a reasonably large number of watchers. And measures the time it @@ -30,7 +67,8 @@ import ( func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) { const k int = 2 benchSampleSize := b.N - watcherSize := k * benchSampleSize + watcherN := k * benchSampleSize + // manually create watchableStore instead of newWatchableStore // because newWatchableStore periodically calls syncWatchersLoop // method to sync watchers in unsynced map. We want to keep watchers @@ -62,8 +100,8 @@ func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) { w := s.NewWatcher() - cancels := make([]CancelFunc, watcherSize) - for i := 0; i < watcherSize; i++ { + cancels := make([]CancelFunc, watcherN) + for i := 0; i < watcherN; i++ { // non-0 value to keep watchers in unsynced _, cancel := w.Watch(testKey, true, 1) cancels[i] = cancel @@ -71,7 +109,7 @@ func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) { // random-cancel N watchers to make it not biased towards // data structures with an order, such as slice. - ix := rand.Perm(watcherSize) + ix := rand.Perm(watcherN) b.ResetTimer() b.ReportAllocs() @@ -81,39 +119,3 @@ func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) { cancels[idx]() } } - -func BenchmarkWatchableStoreSyncedCancel(b *testing.B) { - s := newWatchableStore(tmpPath) - defer func() { - s.store.Close() - os.Remove(tmpPath) - }() - - // Put a key so that we can spawn watchers on that key - testKey := []byte("foo") - testValue := []byte("bar") - s.Put(testKey, testValue) - - w := s.NewWatcher() - - // put 1 million watchers on the same key - const watcherSize = 1000000 - - cancels := make([]CancelFunc, watcherSize) - for i := 0; i < watcherSize; i++ { - // 0 for startRev to keep watchers in synced - _, cancel := w.Watch(testKey, true, 0) - cancels[i] = cancel - } - - // randomly cancel watchers to make it not biased towards - // data structures with an order, such as slice. - ix := rand.Perm(watcherSize) - - b.ResetTimer() - b.ReportAllocs() - - for _, idx := range ix { - cancels[idx]() - } -} diff --git a/storage/watchable_store_test.go b/storage/watchable_store_test.go index 9fe9600dc63c..fb9ed2b98b2e 100644 --- a/storage/watchable_store_test.go +++ b/storage/watchable_store_test.go @@ -15,6 +15,8 @@ package storage import ( + "bytes" + "fmt" "os" "testing" ) @@ -61,6 +63,9 @@ func TestNewWatcherCancel(t *testing.T) { // TestCancelUnsynced tests if running CancelFunc removes watchings from unsynced. func TestCancelUnsynced(t *testing.T) { + // arbitrary number of watchers + watcherN := 150 + // manually create watchableStore instead of newWatchableStore // because newWatchableStore automatically calls syncWatchers // method to sync watchers in unsynced map. We want to keep watchers @@ -89,9 +94,6 @@ func TestCancelUnsynced(t *testing.T) { w := s.NewWatcher() - // arbitrary number for watcher - watcherN := 100 - // create watcherN of CancelFunc of // synced and unsynced cancels := make([]CancelFunc, watcherN) @@ -114,11 +116,75 @@ func TestCancelUnsynced(t *testing.T) { } } +// TestRangeUnsynced populates unsynced watchings to test if it +// returns correct key-value pairs and nextRev. +func TestRangeUnsynced(t *testing.T) { + // arbitrary number of watchers + watcherN := 150 + + s := &watchableStore{ + store: newStore(tmpPath), + unsynced: make(map[*watching]struct{}), + synced: make(map[string]map[*watching]struct{}), + } + + defer func() { + s.store.Close() + os.Remove(tmpPath) + }() + + keys := make([][]byte, watcherN) + vals := make([][]byte, watcherN) + for i := 0; i < watcherN; i++ { + keys[i] = []byte(fmt.Sprintf("%d_Foo", i+1)) + vals[i] = []byte(fmt.Sprintf("%d_Bar", i+1)) + } + + for i := 1; i <= watcherN; i++ { + s.Put(keys[i-1], vals[i-1]) + w := s.NewWatcher() + // use non-0 to keep watchers in unsynced + w.Watch(keys[i-1], false, int64(i)) + } + + revbs, kvs, nextRev, keyToWatchings, err := s.rangeUnsynced() + if err != nil { + t.Error(err) + } + + if len(revbs) != len(kvs) { + t.Errorf("len(revbs) = %d, len(kvs) = %d, want %d == %d", len(revbs), len(kvs), len(revbs), len(kvs)) + } + if len(revbs) != watcherN { + t.Errorf("len(revbs) = %d, want = %d", len(revbs), watcherN) + } + if len(kvs) != watcherN { + t.Errorf("len(kvs) = %d, want = %d", len(kvs), watcherN) + } + if len(keyToWatchings) != watcherN { + t.Errorf("len(keyToWatchings) = %d, want = %d", len(keyToWatchings), watcherN) + } + if nextRev != int64(watcherN+1) { + t.Errorf("nextRev = %d, want = %d", nextRev, watcherN) + } + for i, v := range kvs { + if !bytes.Equal(keys[i], v.Key) { + t.Errorf("v.Key = %s, want = %s", v.Key, keys[i]) + } + if !bytes.Equal(vals[i], v.Value) { + t.Errorf("v.Value = %s, want = %s", v.Value, vals[i]) + } + } +} + // TestSyncWatchings populates unsynced watching map and // tests syncWatchings method to see if it correctly sends // events to channel of unsynced watchings and moves these // watchings to synced. func TestSyncWatchings(t *testing.T) { + // arbitrary number of watchers + watcherN := 150 + s := &watchableStore{ store: newStore(tmpPath), unsynced: make(map[*watching]struct{}), @@ -136,9 +202,6 @@ func TestSyncWatchings(t *testing.T) { w := s.NewWatcher() - // arbitrary number for watcher - watcherN := 100 - for i := 0; i < watcherN; i++ { // use 1 to keep watchers in unsynced w.Watch(testKey, true, 1) @@ -149,16 +212,15 @@ func TestSyncWatchings(t *testing.T) { // synced should be empty // because we manually populate unsynced only if len(s.synced[string(testKey)]) != 0 { - t.Fatalf("synced[string(testKey)] size = %d, want 0", len(s.synced[string(testKey)])) + t.Fatalf("[BEFORE s.syncWatchings()] synced[string(testKey)] size = %d, want 0", len(s.synced[string(testKey)])) } // unsynced should not be empty // because we manually populated unsynced only if len(s.unsynced) == 0 { - t.Errorf("unsynced size = %d, want %d", len(s.unsynced), watcherN) + t.Errorf("[BEFORE s.syncWatchings()] unsynced size = %d, want %d", len(s.unsynced), watcherN) } - // this should move all unsynced watchings - // to synced ones + // this should move all unsynced watchings to synced ones s.syncWatchings() // After running s.syncWatchings() @@ -167,21 +229,21 @@ func TestSyncWatchings(t *testing.T) { // because syncWatchings populates synced // in this test case if len(s.synced[string(testKey)]) == 0 { - t.Errorf("synced[string(testKey)] size = 0, want %d", len(s.synced[string(testKey)])) + t.Errorf("[AFTER s.syncWatchings()] synced[string(testKey)] size = %d, want %d", len(s.synced[string(testKey)]), watcherN) } // unsynced should be empty // because syncWatchings is expected to move // all watchings from unsynced to synced // in this test case if len(s.unsynced) != 0 { - t.Errorf("unsynced size = %d, want 0", len(s.unsynced)) + t.Errorf("[AFTER s.syncWatchings()] unsynced size = %d, want 0", len(s.unsynced)) } // All of the watchings actually share one channel // so we only need to check one shared channel // (See watcher.go for more detail). if len(w.(*watcher).ch) != watcherN { - t.Errorf("watched event size = %d, want %d", len(w.(*watcher).ch), watcherN) + t.Errorf("[AFTER s.syncWatchings()] watched event size = %d, want %d", len(w.(*watcher).ch), watcherN) } }