google · jba · Jan 31, 2019 · Jan 18, 2019 · Jan 28, 2019 · Jan 29, 2019
diff --git a/pubsub/averager.go b/pubsub/averager.go
@@ -0,0 +1,80 @@
+// Copyright 2019 The Go Cloud Development Kit Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pubsub
+
+import (
+	"sync"
+	"time"
+)
+
+// An averager keeps track of an average value over a time interval.
+// It does so by tracking the average over several sub-intervals.
+type averager struct {
+	mu             sync.Mutex
+	buckets        []bucket // always nBuckets of these, last is most recent
+	bucketInterval time.Duration
+	end            time.Time // latest time we can currently record (end of the last bucket's interval)
+}
+
+type bucket struct {
+	total float64 // total of points in the bucket
+	count float64 // number of points in the bucket
+}
+
+// newAverager returns an averager that will average a value over dur,
+// by splitting it into nBuckets sub-intervals.
+func newAverager(dur time.Duration, nBuckets int) *averager {
+	bi := dur / time.Duration(nBuckets)
+	return &averager{
+		buckets:        make([]bucket, nBuckets),
+		bucketInterval: bi,
+		end:            time.Now().Add(bi),
+	}
+}
+
+// average returns the average value.
+// It returns NaN if there are no recorded points.
+func (a *averager) average() float64 {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	var total, n float64
+	for _, b := range a.buckets {
+		total += b.total
+		n += b.count
+	}
+	return total / n
+}
+
+// add adds a point to the average at the present time.
+func (a *averager) add(x float64) { a.addInternal(x, time.Now()) }
+
+// addInternal adds a point x to the average, at time t.
+// t should be time.Now() except during testing.
+func (a *averager) addInternal(x float64, t time.Time) {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	// Add new buckets and discard old ones until we can accommodate time t.
+	for t.After(a.end) {
+		a.buckets = append(a.buckets[1:], bucket{})
+		a.end = a.end.Add(a.bucketInterval)
+	}
+	// We only support adding to the most recent bucket.
+	if t.Before(a.end.Add(-a.bucketInterval)) {
+		panic("time too early")
+	}
+	b := &a.buckets[len(a.buckets)-1]
+	b.total += x
+	b.count++
+}
diff --git a/pubsub/averager_test.go b/pubsub/averager_test.go
@@ -0,0 +1,57 @@
+// Copyright 2019 The Go Cloud Development Kit Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pubsub
+
+import (
+	"testing"
+	"time"
+)
+
+func TestAverager(t *testing.T) {
+	a := newAverager(time.Minute, 4)
+	start := time.Now()
+	for i := 0; i < 10; i++ {
+		a.addInternal(5, start)
+	}
+	if got, want := a.average(), 5.0; got != want {
+		t.Errorf("got %f, want %f", got, want)
+	}
+
+	a = newAverager(time.Minute, 4)
+	start = time.Now()
+	n := 60
+	var total float64
+	for i := 0; i < n; i++ {
+		total += float64(i)
+		a.addInternal(float64(i), start.Add(time.Duration(i)*time.Second))
+	}
+	// All the points are within one minute of each other, so they should all be counted.
+	got := a.average()
+	want := total / float64(n)
+	if got != want {
+		t.Errorf("got %f, want %f", got, want)
+	}
+
+	// Values older than the duration are dropped.
+	a = newAverager(time.Minute, 4)
+	a.add(10)
+	if got, want := a.average(), 10.0; got != want {
+		t.Errorf("got %f, want %f", got, want)
+	}
+	a.addInternal(3, a.end.Add(2*time.Minute))
+	if got, want := a.average(), 3.0; got != want {
+		t.Errorf("got %f, want %f", got, want)
+	}
+}
diff --git a/...b/testdata/TestConformance/TestNonExistentSubscriptionSucceedsOnOpenButFailsOnSend.replay b/...b/testdata/TestConformance/TestNonExistentSubscriptionSucceedsOnOpenButFailsOnSend.replay
diff --git a/...cppubsub/testdata/TestConformance/TestNonExistentTopicSucceedsOnOpenButFailsOnSend.replay b/...cppubsub/testdata/TestConformance/TestNonExistentTopicSucceedsOnOpenButFailsOnSend.replay
diff --git a/pubsub/gcppubsub/testdata/TestConformance/TestSendReceive.replay b/pubsub/gcppubsub/testdata/TestConformance/TestSendReceive.replay
diff --git a/pubsub/gcppubsub/testdata/TestConformance/TestSendReceiveTwo.replay b/pubsub/gcppubsub/testdata/TestConformance/TestSendReceiveTwo.replay
diff --git a/pubsub/pubsub.go b/pubsub/pubsub.go
@@ -36,8 +36,10 @@ package pubsub // import "gocloud.dev/pubsub"
 import (
 	"context"
 	"fmt"
+	"math"
 	"reflect"
 	"sync"
+	"time"
 
 	gax "github.com/googleapis/gax-go"
 	"gocloud.dev/gcerrors"
@@ -211,12 +213,33 @@ type Subscription struct {
 	ackBatcher driver.Batcher
 	cancel     func() // for canceling all SendAcks calls
 
-	mu    sync.Mutex    // protects everything below
-	q     []*Message    // local queue of messages downloaded from server
-	err   error         // permanent error
-	waitc chan struct{} // for goroutines waiting on ReceiveBatch
+	mu                  sync.Mutex    // protects everything below
+	q                   []*Message    // local queue of messages downloaded from server
+	err                 error         // permanent error
+	waitc               chan struct{} // for goroutines waiting on ReceiveBatch
+	lastReceiveReturn   time.Time     // time that the last call to Receive returned
+	processTimeAverager *averager     // keeps a running average of the seconds to process a message
 }
 
+const (
+	// The desired length of a subscription's queue of messages (the messages pulled
+	// and waiting in memory to be doled out to Receive callers). The length is
+	// expressed in time; the relationship to number of messages is
+	//
+	//      lengthInMessages = desiredQueueLength / averageProcessTimePerMessage
+	//
+	// In other words, if it takes 100ms to process a message on average, and we want
+	// 2s worth of queued messages, then we need 2/.1 = 20 messages.
+	//
+	// If desiredQueueLength is too small, then there won't be a large enough buffer
+	// of messages to handle fluctuations in processing time, and the queue is likely
+	// to become empty, reducing throughput. If desiredQueueLength is too large, then
+	// messages will wait in memory for a long time, possibly timing out (that is,
+	// their ack deadline will be exceeded). Those messages could have been handled
+	// by another process receiving from the same subscription.
+	desiredQueueLength = 2 * time.Second
+)
+
 // Receive receives and returns the next message from the Subscription's queue,
 // blocking and polling if none are available. This method can be called
 // concurrently from multiple goroutines. The Ack() method of the returned
@@ -228,6 +251,12 @@ func (s *Subscription) Receive(ctx context.Context) (_ *Message, err error) {
 
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	defer func() { s.lastReceiveReturn = time.Now() }()
+
+	if !s.lastReceiveReturn.IsZero() {
+		s.processTimeAverager.add(float64(time.Since(s.lastReceiveReturn).Seconds()))
+	}
+
 	for {
 		// The lock is always held here, at the top of the loop.
 		if s.err != nil {
@@ -238,10 +267,12 @@ func (s *Subscription) Receive(ctx context.Context) (_ *Message, err error) {
 			// At least one message is available. Return it.
 			m := s.q[0]
 			s.q = s.q[1:]
+			// TODO(jba): pre-fetch more messages if the queue gets too small.
 			return m, nil
 		}
 		if s.waitc != nil {
 			// A call to ReceiveBatch is in flight. Wait for it.
+			// TODO(jba): support multiple calls in flight simultaneously.
 			waitc := s.waitc
 			s.mu.Unlock()
 			select {
@@ -255,12 +286,31 @@ func (s *Subscription) Receive(ctx context.Context) (_ *Message, err error) {
 		}
 		// No messages are available and there are no calls to ReceiveBatch in flight.
 		// Make a call.
+		//
+		// Ask for a number of messages that will give us the desired queue length.
+		// Unless we don't have information about process time (at the beginning), in
+		// which case just get one message.
+		nMessages := 1
+		avgProcessTime := s.processTimeAverager.average()
+		if !math.IsNaN(avgProcessTime) {
+			// Using Ceil guarantees at least one message.
+			n := math.Ceil(desiredQueueLength.Seconds() / avgProcessTime)
+			// Cap nMessages at some non-ridiculous value.
+			// Slight hack: we should be using a larger cap, like MaxInt32. But
+			// that messes up replay: since the tests take very little time to ack,
+			// n is very large, and since our averaging process is time-sensitive,
+			// values can differ slightly from run to run. The current cap happens
+			// to work, but we should come up with a more robust solution.
+			// (Currently it doesn't matter for performance, because gcppubsub
+			// caps maxMessages to 1000 anyway.)
+			nMessages = int(math.Min(n, 1000))
+		}
 		s.waitc = make(chan struct{})
 		s.mu.Unlock()
 		// Even though the mutex is unlocked, only one goroutine can be here.
 		// The only way here is if s.waitc was nil. This goroutine just set
 		// s.waitc to non-nil while holding the lock.
-		msgs, err := s.getNextBatch(ctx)
+		msgs, err := s.getNextBatch(ctx, nMessages)
 		s.mu.Lock()
 		close(s.waitc)
 		s.waitc = nil
@@ -276,14 +326,12 @@ func (s *Subscription) Receive(ctx context.Context) (_ *Message, err error) {
 }
 
 // getNextBatch gets the next batch of messages from the server and returns it.
-func (s *Subscription) getNextBatch(ctx context.Context) ([]*Message, error) {
+func (s *Subscription) getNextBatch(ctx context.Context, nMessages int) ([]*Message, error) {
 	var msgs []*driver.Message
 	for len(msgs) == 0 {
 		err := retry.Call(ctx, gax.Backoff{}, s.driver.IsRetryable, func() error {
 			var err error
-			// TODO(#691): dynamically adjust maxMessages
-			const maxMessages = 10
-			msgs, err = s.driver.ReceiveBatch(ctx, maxMessages)
+			msgs, err = s.driver.ReceiveBatch(ctx, nMessages)
 			return err
 		})
 		if err != nil {
@@ -342,8 +390,9 @@ var NewSubscription = newSubscription
 func newSubscription(d driver.Subscription, newAckBatcher func(context.Context, *Subscription) driver.Batcher) *Subscription {
 	ctx, cancel := context.WithCancel(context.Background())
 	s := &Subscription{
-		driver: d,
-		cancel: cancel,
+		driver:              d,
+		cancel:              cancel,
+		processTimeAverager: newAverager(time.Minute, 4),
 	}
 	if newAckBatcher == nil {
 		newAckBatcher = defaultAckBatcher