Merge pull request #17596 from foobar/optimize-sorted-merge-iterator

improvement(query): performance improvement for sorted merge iterator
influxdata · Jun 23, 2020 · 78a05d1 · 78a05d1
2 parents 1e7a2e2 + af8e66c
commit 78a05d1
Show file tree

Hide file tree

Showing 3 changed files with 602 additions and 1 deletion.
diff --git a/query/iterator.gen.go b/query/iterator.gen.go
@@ -11,6 +11,7 @@ import (
 	"context"
 	"io"
 	"sort"
+	"strings"
 	"sync"
 	"time"
 
@@ -374,6 +375,7 @@ func (itr *floatSortedMergeIterator) pop() (*FloatPoint, error) {
 			}
 			itr.heap.items = append(itr.heap.items, item)
 		}
+		itr.heap.detectFast()
 		heap.Init(itr.heap)
 		itr.init = true
 	}
@@ -411,11 +413,57 @@ func (itr *floatSortedMergeIterator) pop() (*FloatPoint, error) {
 type floatSortedMergeHeap struct {
 	opt   IteratorOptions
 	items []*floatSortedMergeHeapItem
+	// if each input comes from a unique single time series, we can make a shortcut.
+	// detection of the shortcut introduces some overhead but it gets significant
+	// performance improvement in cases like SELECT * FROM m GROUP BY *
+	fast bool
+}
+
+func (h *floatSortedMergeHeap) detectFast() {
+	for _, item := range h.items {
+		if item.itr.Stats().SeriesN != 1 {
+			return
+		}
+	}
+
+	hasDup := false
+	s := make([]*floatSortedMergeHeapItem, len(h.items))
+	copy(s, h.items)
+
+	less := func(i, j int) bool {
+		x, y := s[i].point, s[j].point
+		ret := strings.Compare(x.Name, y.Name)
+		if ret == 0 {
+			ret = strings.Compare(x.Tags.ID(), y.Tags.ID())
+		}
+		if ret != 0 {
+			// TT
+			// ret | == -1 | h.opt.Ascending | result
+			//  1  | false |  false          | true
+			// -1  | true  |  false          | false
+			//  1  | false |  true           | false
+			// -1  | true  |  true           | true
+			return ret == -1 == h.opt.Ascending
+		}
+		hasDup = true
+		return false
+	}
+	sort.Slice(s, less)
+	if !hasDup {
+		h.fast = true
+		for i, item := range s {
+			item.fastIdx = i
+		}
+	}
 }
 
 func (h *floatSortedMergeHeap) Len() int      { return len(h.items) }
 func (h *floatSortedMergeHeap) Swap(i, j int) { h.items[i], h.items[j] = h.items[j], h.items[i] }
 func (h *floatSortedMergeHeap) Less(i, j int) bool {
+	if h.fast {
+		return h.items[i].fastIdx < h.items[j].fastIdx
+	}
+
 	x, y := h.items[i].point, h.items[j].point
 
 	if h.opt.Ascending {
@@ -489,6 +537,8 @@ type floatSortedMergeHeapItem struct {
 	point *FloatPoint
 	err   error
 	itr   FloatIterator
+	// index for fast shortcut
+	fastIdx int
 }
 
 // floatIteratorScanner scans the results of a FloatIterator into a map.
@@ -3038,6 +3088,7 @@ func (itr *integerSortedMergeIterator) pop() (*IntegerPoint, error) {
 			}
 			itr.heap.items = append(itr.heap.items, item)
 		}
+		itr.heap.detectFast()
 		heap.Init(itr.heap)
 		itr.init = true
 	}
@@ -3075,11 +3126,57 @@ func (itr *integerSortedMergeIterator) pop() (*IntegerPoint, error) {
 type integerSortedMergeHeap struct {
 	opt   IteratorOptions
 	items []*integerSortedMergeHeapItem
+	// if each input comes from a unique single time series, we can make a shortcut.
+	// detection of the shortcut introduces some overhead but it gets significant
+	// performance improvement in cases like SELECT * FROM m GROUP BY *
+	fast bool
+}
+
+func (h *integerSortedMergeHeap) detectFast() {
+	for _, item := range h.items {
+		if item.itr.Stats().SeriesN != 1 {
+			return
+		}
+	}
+
+	hasDup := false
+	s := make([]*integerSortedMergeHeapItem, len(h.items))
+	copy(s, h.items)
+
+	less := func(i, j int) bool {
+		x, y := s[i].point, s[j].point
+		ret := strings.Compare(x.Name, y.Name)
+		if ret == 0 {
+			ret = strings.Compare(x.Tags.ID(), y.Tags.ID())
+		}
+		if ret != 0 {
+			// TT
+			// ret | == -1 | h.opt.Ascending | result
+			//  1  | false |  false          | true
+			// -1  | true  |  false          | false
+			//  1  | false |  true           | false
+			// -1  | true  |  true           | true
+			return ret == -1 == h.opt.Ascending
+		}
+		hasDup = true
+		return false
+	}
+	sort.Slice(s, less)
+	if !hasDup {
+		h.fast = true
+		for i, item := range s {
+			item.fastIdx = i
+		}
+	}
 }
 
 func (h *integerSortedMergeHeap) Len() int      { return len(h.items) }
 func (h *integerSortedMergeHeap) Swap(i, j int) { h.items[i], h.items[j] = h.items[j], h.items[i] }
 func (h *integerSortedMergeHeap) Less(i, j int) bool {
+	if h.fast {
+		return h.items[i].fastIdx < h.items[j].fastIdx
+	}
+
 	x, y := h.items[i].point, h.items[j].point
 
 	if h.opt.Ascending {
@@ -3153,6 +3250,8 @@ type integerSortedMergeHeapItem struct {
 	point *IntegerPoint
 	err   error
 	itr   IntegerIterator
+	// index for fast shortcut
+	fastIdx int
 }
 
 // integerIteratorScanner scans the results of a IntegerIterator into a map.
@@ -5702,6 +5801,7 @@ func (itr *unsignedSortedMergeIterator) pop() (*UnsignedPoint, error) {
 			}
 			itr.heap.items = append(itr.heap.items, item)
 		}
+		itr.heap.detectFast()
 		heap.Init(itr.heap)
 		itr.init = true
 	}
@@ -5739,11 +5839,57 @@ func (itr *unsignedSortedMergeIterator) pop() (*UnsignedPoint, error) {
 type unsignedSortedMergeHeap struct {
 	opt   IteratorOptions
 	items []*unsignedSortedMergeHeapItem
+	// if each input comes from a unique single time series, we can make a shortcut.
+	// detection of the shortcut introduces some overhead but it gets significant
+	// performance improvement in cases like SELECT * FROM m GROUP BY *
+	fast bool
+}
+
+func (h *unsignedSortedMergeHeap) detectFast() {
+	for _, item := range h.items {
+		if item.itr.Stats().SeriesN != 1 {
+			return
+		}
+	}
+
+	hasDup := false
+	s := make([]*unsignedSortedMergeHeapItem, len(h.items))
+	copy(s, h.items)
+
+	less := func(i, j int) bool {
+		x, y := s[i].point, s[j].point
+		ret := strings.Compare(x.Name, y.Name)
+		if ret == 0 {
+			ret = strings.Compare(x.Tags.ID(), y.Tags.ID())
+		}
+		if ret != 0 {
+			// TT
+			// ret | == -1 | h.opt.Ascending | result
+			//  1  | false |  false          | true
+			// -1  | true  |  false          | false
+			//  1  | false |  true           | false
+			// -1  | true  |  true           | true
+			return ret == -1 == h.opt.Ascending
+		}
+		hasDup = true
+		return false
+	}
+	sort.Slice(s, less)
+	if !hasDup {
+		h.fast = true
+		for i, item := range s {
+			item.fastIdx = i
+		}
+	}
 }
 
 func (h *unsignedSortedMergeHeap) Len() int      { return len(h.items) }
 func (h *unsignedSortedMergeHeap) Swap(i, j int) { h.items[i], h.items[j] = h.items[j], h.items[i] }
 func (h *unsignedSortedMergeHeap) Less(i, j int) bool {
+	if h.fast {
+		return h.items[i].fastIdx < h.items[j].fastIdx
+	}
+
 	x, y := h.items[i].point, h.items[j].point
 
 	if h.opt.Ascending {
@@ -5817,6 +5963,8 @@ type unsignedSortedMergeHeapItem struct {
 	point *UnsignedPoint
 	err   error
 	itr   UnsignedIterator
+	// index for fast shortcut
+	fastIdx int
 }
 
 // unsignedIteratorScanner scans the results of a UnsignedIterator into a map.
@@ -8366,6 +8514,7 @@ func (itr *stringSortedMergeIterator) pop() (*StringPoint, error) {
 			}
 			itr.heap.items = append(itr.heap.items, item)
 		}
+		itr.heap.detectFast()
 		heap.Init(itr.heap)
 		itr.init = true
 	}
@@ -8403,11 +8552,57 @@ func (itr *stringSortedMergeIterator) pop() (*StringPoint, error) {
 type stringSortedMergeHeap struct {
 	opt   IteratorOptions
 	items []*stringSortedMergeHeapItem
+	// if each input comes from a unique single time series, we can make a shortcut.
+	// detection of the shortcut introduces some overhead but it gets significant
+	// performance improvement in cases like SELECT * FROM m GROUP BY *
+	fast bool
+}
+
+func (h *stringSortedMergeHeap) detectFast() {
+	for _, item := range h.items {
+		if item.itr.Stats().SeriesN != 1 {
+			return
+		}
+	}
+
+	hasDup := false
+	s := make([]*stringSortedMergeHeapItem, len(h.items))
+	copy(s, h.items)
+
+	less := func(i, j int) bool {
+		x, y := s[i].point, s[j].point
+		ret := strings.Compare(x.Name, y.Name)
+		if ret == 0 {
+			ret = strings.Compare(x.Tags.ID(), y.Tags.ID())
+		}
+		if ret != 0 {
+			// TT
+			// ret | == -1 | h.opt.Ascending | result
+			//  1  | false |  false          | true
+			// -1  | true  |  false          | false
+			//  1  | false |  true           | false
+			// -1  | true  |  true           | true
+			return ret == -1 == h.opt.Ascending
+		}
+		hasDup = true
+		return false
+	}
+	sort.Slice(s, less)
+	if !hasDup {
+		h.fast = true
+		for i, item := range s {
+			item.fastIdx = i
+		}
+	}
 }
 
 func (h *stringSortedMergeHeap) Len() int      { return len(h.items) }
 func (h *stringSortedMergeHeap) Swap(i, j int) { h.items[i], h.items[j] = h.items[j], h.items[i] }
 func (h *stringSortedMergeHeap) Less(i, j int) bool {
+	if h.fast {
+		return h.items[i].fastIdx < h.items[j].fastIdx
+	}
+
 	x, y := h.items[i].point, h.items[j].point
 
 	if h.opt.Ascending {
@@ -8481,6 +8676,8 @@ type stringSortedMergeHeapItem struct {
 	point *StringPoint
 	err   error
 	itr   StringIterator
+	// index for fast shortcut
+	fastIdx int
 }
 
 // stringIteratorScanner scans the results of a StringIterator into a map.
@@ -11016,6 +11213,7 @@ func (itr *booleanSortedMergeIterator) pop() (*BooleanPoint, error) {
 			}
 			itr.heap.items = append(itr.heap.items, item)
 		}
+		itr.heap.detectFast()
 		heap.Init(itr.heap)
 		itr.init = true
 	}
@@ -11053,11 +11251,57 @@ func (itr *booleanSortedMergeIterator) pop() (*BooleanPoint, error) {
 type booleanSortedMergeHeap struct {
 	opt   IteratorOptions
 	items []*booleanSortedMergeHeapItem
+	// if each input comes from a unique single time series, we can make a shortcut.
+	// detection of the shortcut introduces some overhead but it gets significant
+	// performance improvement in cases like SELECT * FROM m GROUP BY *
+	fast bool
+}
+
+func (h *booleanSortedMergeHeap) detectFast() {
+	for _, item := range h.items {
+		if item.itr.Stats().SeriesN != 1 {
+			return
+		}
+	}
+
+	hasDup := false
+	s := make([]*booleanSortedMergeHeapItem, len(h.items))
+	copy(s, h.items)
+
+	less := func(i, j int) bool {
+		x, y := s[i].point, s[j].point
+		ret := strings.Compare(x.Name, y.Name)
+		if ret == 0 {
+			ret = strings.Compare(x.Tags.ID(), y.Tags.ID())
+		}
+		if ret != 0 {
+			// TT
+			// ret | == -1 | h.opt.Ascending | result
+			//  1  | false |  false          | true
+			// -1  | true  |  false          | false
+			//  1  | false |  true           | false
+			// -1  | true  |  true           | true
+			return ret == -1 == h.opt.Ascending
+		}
+		hasDup = true
+		return false
+	}
+	sort.Slice(s, less)
+	if !hasDup {
+		h.fast = true
+		for i, item := range s {
+			item.fastIdx = i
+		}
+	}
 }
 
 func (h *booleanSortedMergeHeap) Len() int      { return len(h.items) }
 func (h *booleanSortedMergeHeap) Swap(i, j int) { h.items[i], h.items[j] = h.items[j], h.items[i] }
 func (h *booleanSortedMergeHeap) Less(i, j int) bool {
+	if h.fast {
+		return h.items[i].fastIdx < h.items[j].fastIdx
+	}
+
 	x, y := h.items[i].point, h.items[j].point
 
 	if h.opt.Ascending {
@@ -11131,6 +11375,8 @@ type booleanSortedMergeHeapItem struct {
 	point *BooleanPoint
 	err   error
 	itr   BooleanIterator
+	// index for fast shortcut
+	fastIdx int
 }
 
 // booleanIteratorScanner scans the results of a BooleanIterator into a map.