storage: range all unsynced at once

This is for etcd-io#3848 to batch RangeHistory for all watchings at once.
gyuho · Dec 24, 2015 · f01f143 · f01f143
1 parent b79dae2
commit f01f143
Show file tree

Hide file tree

Showing 3 changed files with 225 additions and 83 deletions.
diff --git a/storage/watchable_store.go b/storage/watchable_store.go
@@ -17,6 +17,7 @@ package storage
 import (
 	"fmt"
 	"log"
+	"math"
 	"sync"
 	"time"
 
@@ -174,6 +175,7 @@ func (s *watchableStore) NewWatcher() Watcher {
 }
 
 func (s *watchableStore) watch(key []byte, prefix bool, startRev, id int64, ch chan<- storagepb.Event) (*watching, CancelFunc) {
+
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
@@ -241,57 +243,133 @@ func (s *watchableStore) syncWatchingsLoop() {
 	}
 }
 
-// syncWatchings syncs the watchings in the unsyncd map.
-func (s *watchableStore) syncWatchings() {
+// rangeUnsynced ranges on all unsynced watchings and returns all
+// revision bytes, key-value pairs and next revision.
+func (s *watchableStore) rangeUnsynced() ([][]byte, []storagepb.KeyValue, int64, map[string]map[*watching]struct{}, error) {
+	// get minimum and maximum revision to get all key-value pairs in unsynced
+	minRev, maxRev := int64(math.MaxInt64), int64(math.MinInt64)
+	keyToWatchings := make(map[string]map[*watching]struct{})
 	_, curRev, _ := s.store.Range(nil, nil, 0, 0)
 	for w := range s.unsynced {
-		var end []byte
-		if w.prefix {
-			end = make([]byte, len(w.key))
-			copy(end, w.key)
-			end[len(w.key)-1]++
-		}
-		limit := cap(w.ch) - len(w.ch)
-		// the channel is full, try it in the next round
-		if limit == 0 {
+		if w.cur > 0 && w.cur <= s.store.compactMainRev {
+			// TODO: handle this ErrCompacted error
+			log.Printf("storage: %v", ErrCompacted)
+			delete(s.unsynced, w)
 			continue
 		}
-		revbs, kvs, nextRev, err := s.store.RangeHistory(w.key, end, int64(limit), w.cur)
-		if err != nil {
-			// TODO: send error event to watching
+		if w.cur > s.store.currentRev.main {
+			// TODO: handle this ErrFutureRev error
+			log.Printf("storage: %v", ErrFutureRev)
 			delete(s.unsynced, w)
 			continue
 		}
 
-		// push events to the channel
-		for i, kv := range kvs {
-			var evt storagepb.Event_EventType
-			switch {
-			case isTombstone(revbs[i]):
-				evt = storagepb.DELETE
-			default:
-				evt = storagepb.PUT
+		k := string(w.key)
+
+		if w.cur > curRev {
+			if err := unsafeAddWatching(&s.synced, k, w); err != nil {
+				log.Panicf("error unsafeAddWatching (%v) for key %s", err, k)
 			}
+			delete(s.unsynced, w)
+			continue
+		}
+
+		if minRev >= w.cur {
+			minRev = w.cur
+		}
+		if maxRev <= w.cur {
+			maxRev = w.cur
+		}
+
+		if v, ok := keyToWatchings[k]; !ok {
+			keyToWatchings[k] = make(map[*watching]struct{})
+			keyToWatchings[k][w] = struct{}{}
+		} else {
+			v[w] = struct{}{}
+		}
+	}
+
+	// to prevent returning nothing
+	if minRev == maxRev {
+		maxRev++
+	}
+
+	min, max := newRevBytes(), newRevBytes()
+	revToBytes(revision{main: minRev}, min)
+	revToBytes(revision{main: maxRev, sub: maxRev}, max)
+
+	s.store.mu.Lock()
+	defer s.store.mu.Unlock()
+
+	// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
+	tx := s.store.b.BatchTx()
+	tx.Lock()
+	ks, vs := tx.UnsafeRange(keyBucketName, min, max, 0)
+	tx.Unlock()
+
+	revbs := [][]byte{}
+	kvs := []storagepb.KeyValue{}
+	for i, v := range vs {
+		var kv storagepb.KeyValue
+		if err := kv.Unmarshal(v); err != nil {
+			return nil, nil, 0, nil, fmt.Errorf("storage: cannot unmarshal event: %v", err)
+		}
+		if _, ok := keyToWatchings[string(kv.Key)]; !ok {
+			continue
+		}
+		revbs = append(revbs, ks[i])
+		kvs = append(kvs, kv)
+	}
+
+	return revbs, kvs, s.store.currentRev.main + 1, keyToWatchings, nil
+}
+
+// syncWatchings syncs the watchings in the unsyncd map.
+func (s *watchableStore) syncWatchings() {
+
+	if len(s.unsynced) == 0 {
+		return
+	}
+
+	revbs, kvs, _, keyToWatchings, err := s.rangeUnsynced()
+	if err != nil {
+		log.Panicf("error s.rangeUnsynced: %v", err)
+	}
 
+	// push events to the channel
+	for i, kv := range kvs {
+		var evt storagepb.Event_EventType
+		switch {
+		case isTombstone(revbs[i]):
+			evt = storagepb.DELETE
+		default:
+			evt = storagepb.PUT
+		}
+
+		k := string(kv.Key)
+
+		wm := make(map[*watching]struct{})
+		if v, ok := keyToWatchings[k]; ok {
+			wm = v
+		} else {
+			continue // in synced
+		}
+
+		for w := range wm {
 			w.ch <- storagepb.Event{
 				Type:    evt,
 				Kv:      &kv,
 				WatchID: w.id,
 			}
-			pendingEventsGauge.Inc()
-		}
-		// switch to tracking future events if needed
-		if nextRev > curRev {
-			k := string(w.key)
 			if err := unsafeAddWatching(&s.synced, k, w); err != nil {
 				log.Panicf("error unsafeAddWatching (%v) for key %s", err, k)
 			}
 			delete(s.unsynced, w)
-			continue
+
+			pendingEventsGauge.Inc()
 		}
-		// put it back to try it in the next round
-		w.cur = nextRev
 	}
+
 	slowWatchingGauge.Set(float64(len(s.unsynced)))
 }
 

diff --git a/storage/watchable_store_bench_test.go b/storage/watchable_store_bench_test.go
@@ -20,6 +20,43 @@ import (
 	"testing"
 )
 
+// Benchmarks on cancel function performance for synced watchers.
+func BenchmarkWatchableStoreSyncedCancel(b *testing.B) {
+	// arbitrary number of watchers
+	watcherN := 1000000
+
+	s := newWatchableStore(tmpPath)
+	defer func() {
+		s.store.Close()
+		os.Remove(tmpPath)
+	}()
+
+	// Put a key so that we can spawn watchers on that key
+	testKey := []byte("foo")
+	testValue := []byte("bar")
+	s.Put(testKey, testValue)
+
+	w := s.NewWatcher()
+
+	cancels := make([]CancelFunc, watcherN)
+	for i := 0; i < watcherN; i++ {
+		// 0 for startRev to keep watchers in synced
+		_, cancel := w.Watch(testKey, true, 0)
+		cancels[i] = cancel
+	}
+
+	// randomly cancel watchers to make it not biased towards
+	// data structures with an order, such as slice.
+	ix := rand.Perm(watcherN)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for _, idx := range ix {
+		cancels[idx]()
+	}
+}
+
 // Benchmarks on cancel function performance for unsynced watchers
 // in a WatchableStore. It creates k*N watchers to populate unsynced
 // with a reasonably large number of watchers. And measures the time it
@@ -30,7 +67,8 @@ import (
 func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) {
 	const k int = 2
 	benchSampleSize := b.N
-	watcherSize := k * benchSampleSize
+	watcherN := k * benchSampleSize
+
 	// manually create watchableStore instead of newWatchableStore
 	// because newWatchableStore periodically calls syncWatchersLoop
 	// method to sync watchers in unsynced map. We want to keep watchers
@@ -62,16 +100,16 @@ func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) {
 
 	w := s.NewWatcher()
 
-	cancels := make([]CancelFunc, watcherSize)
-	for i := 0; i < watcherSize; i++ {
+	cancels := make([]CancelFunc, watcherN)
+	for i := 0; i < watcherN; i++ {
 		// non-0 value to keep watchers in unsynced
 		_, cancel := w.Watch(testKey, true, 1)
 		cancels[i] = cancel
 	}
 
 	// random-cancel N watchers to make it not biased towards
 	// data structures with an order, such as slice.
-	ix := rand.Perm(watcherSize)
+	ix := rand.Perm(watcherN)
 
 	b.ResetTimer()
 	b.ReportAllocs()
@@ -81,39 +119,3 @@ func BenchmarkWatchableStoreUnsyncedCancel(b *testing.B) {
 		cancels[idx]()
 	}
 }
-
-func BenchmarkWatchableStoreSyncedCancel(b *testing.B) {
-	s := newWatchableStore(tmpPath)
-	defer func() {
-		s.store.Close()
-		os.Remove(tmpPath)
-	}()
-
-	// Put a key so that we can spawn watchers on that key
-	testKey := []byte("foo")
-	testValue := []byte("bar")
-	s.Put(testKey, testValue)
-
-	w := s.NewWatcher()
-
-	// put 1 million watchers on the same key
-	const watcherSize = 1000000
-
-	cancels := make([]CancelFunc, watcherSize)
-	for i := 0; i < watcherSize; i++ {
-		// 0 for startRev to keep watchers in synced
-		_, cancel := w.Watch(testKey, true, 0)
-		cancels[i] = cancel
-	}
-
-	// randomly cancel watchers to make it not biased towards
-	// data structures with an order, such as slice.
-	ix := rand.Perm(watcherSize)
-
-	b.ResetTimer()
-	b.ReportAllocs()
-
-	for _, idx := range ix {
-		cancels[idx]()
-	}
-}