From 4a443cd329044e7ac9550e6c0b17a4f5a24ee161 Mon Sep 17 00:00:00 2001 From: Daylon Wilkins Date: Fri, 5 Nov 2021 09:27:33 -0700 Subject: [PATCH] Reduced index range complexity, removed range overlapping --- memory/index.go | 135 ++++++++---------- sql/analyzer/indexes.go | 24 ++-- sql/index.go | 13 +- sql/index_builder.go | 41 ++++-- sql/range.go | 291 +++++++++++++++++++++++++++++++-------- sql/range_column_expr.go | 107 +++++++++++++- sql/range_cut.go | 32 ++++- sql/range_test.go | 245 ++++++++++++++++++++++++++++++++ 8 files changed, 724 insertions(+), 164 deletions(-) create mode 100644 sql/range_test.go diff --git a/memory/index.go b/memory/index.go index a0b17a8543..9b70ff43e3 100644 --- a/memory/index.go +++ b/memory/index.go @@ -85,89 +85,76 @@ func (i *Index) NewLookup(ctx *sql.Context, ranges ...sql.Range) (sql.IndexLooku exprs = mergeableIndex.ColumnExpressions() } - var completeExpr sql.Expression + var rangeCollectionExpr sql.Expression for _, rang := range ranges { - var filterExpr sql.Expression - for i, rangeColumn := range rang { - var rangeExpr sql.Expression - for _, rangeColumnExpr := range rangeColumn { - switch rangeColumnExpr.Type() { - // Both Empty and All may seem like strange inclusions, but if only one range is given we need some - // expression to evaluate, otherwise our expression would be a nil expression which would panic. - case sql.RangeType_Empty: - rangeExpr = or(rangeExpr, expression.NewEquals(expression.NewLiteral(1, sql.Int8), expression.NewLiteral(2, sql.Int8))) - case sql.RangeType_All: - rangeExpr = or(rangeExpr, expression.NewEquals(expression.NewLiteral(1, sql.Int8), expression.NewLiteral(1, sql.Int8))) - case sql.RangeType_GreaterThan: - lit, typ := getType(sql.GetRangeCutKey(rangeColumnExpr.LowerBound)) - rangeExpr = or(rangeExpr, expression.NewNullSafeGreaterThan(exprs[i], expression.NewLiteral(lit, typ))) - case sql.RangeType_GreaterOrEqual: - lit, typ := getType(sql.GetRangeCutKey(rangeColumnExpr.LowerBound)) - rangeExpr = or(rangeExpr, expression.NewNullSafeGreaterThanOrEqual(exprs[i], expression.NewLiteral(lit, typ))) - case sql.RangeType_LessThan: - lit, typ := getType(sql.GetRangeCutKey(rangeColumnExpr.UpperBound)) - rangeExpr = or(rangeExpr, expression.NewNullSafeLessThan(exprs[i], expression.NewLiteral(lit, typ))) - case sql.RangeType_LessOrEqual: - lit, typ := getType(sql.GetRangeCutKey(rangeColumnExpr.UpperBound)) - rangeExpr = or(rangeExpr, expression.NewNullSafeLessThanOrEqual(exprs[i], expression.NewLiteral(lit, typ))) - case sql.RangeType_ClosedClosed: - if ok, err := rangeColumnExpr.RepresentsEquals(); err != nil { - return nil, err - } else if ok { - lit, typ := getType(sql.GetRangeCutKey(rangeColumnExpr.LowerBound)) - if typ == sql.Null { - rangeExpr = or(rangeExpr, expression.NewIsNull(exprs[i])) - } else { - rangeExpr = or(rangeExpr, expression.NewNullSafeEquals(exprs[i], expression.NewLiteral(lit, typ))) - } + var rangeExpr sql.Expression + for i, rce := range rang { + var rangeColumnExpr sql.Expression + switch rce.Type() { + // Both Empty and All may seem like strange inclusions, but if only one range is given we need some + // expression to evaluate, otherwise our expression would be a nil expression which would panic. + case sql.RangeType_Empty: + rangeColumnExpr = expression.NewEquals(expression.NewLiteral(1, sql.Int8), expression.NewLiteral(2, sql.Int8)) + case sql.RangeType_All: + rangeColumnExpr = expression.NewEquals(expression.NewLiteral(1, sql.Int8), expression.NewLiteral(1, sql.Int8)) + case sql.RangeType_GreaterThan: + lit, typ := getType(sql.GetRangeCutKey(rce.LowerBound)) + rangeColumnExpr = expression.NewNullSafeGreaterThan(exprs[i], expression.NewLiteral(lit, typ)) + case sql.RangeType_GreaterOrEqual: + lit, typ := getType(sql.GetRangeCutKey(rce.LowerBound)) + rangeColumnExpr = expression.NewNullSafeGreaterThanOrEqual(exprs[i], expression.NewLiteral(lit, typ)) + case sql.RangeType_LessThan: + lit, typ := getType(sql.GetRangeCutKey(rce.UpperBound)) + rangeColumnExpr = expression.NewNullSafeLessThan(exprs[i], expression.NewLiteral(lit, typ)) + case sql.RangeType_LessOrEqual: + lit, typ := getType(sql.GetRangeCutKey(rce.UpperBound)) + rangeColumnExpr = expression.NewNullSafeLessThanOrEqual(exprs[i], expression.NewLiteral(lit, typ)) + case sql.RangeType_ClosedClosed: + if ok, err := rce.RepresentsEquals(); err != nil { + return nil, err + } else if ok { + lit, typ := getType(sql.GetRangeCutKey(rce.LowerBound)) + if typ == sql.Null { + rangeColumnExpr = expression.NewIsNull(exprs[i]) } else { - lowLit, lowTyp := getType(sql.GetRangeCutKey(rangeColumnExpr.LowerBound)) - upLit, upTyp := getType(sql.GetRangeCutKey(rangeColumnExpr.UpperBound)) - rangeExpr = or(rangeExpr, - and( - expression.NewNullSafeGreaterThanOrEqual(exprs[i], expression.NewLiteral(lowLit, lowTyp)), - expression.NewNullSafeLessThanOrEqual(exprs[i], expression.NewLiteral(upLit, upTyp)), - ), - ) + rangeColumnExpr = expression.NewNullSafeEquals(exprs[i], expression.NewLiteral(lit, typ)) } - case sql.RangeType_OpenOpen: - lowLit, lowTyp := getType(sql.GetRangeCutKey(rangeColumnExpr.LowerBound)) - upLit, upTyp := getType(sql.GetRangeCutKey(rangeColumnExpr.UpperBound)) - rangeExpr = or(rangeExpr, - and( - expression.NewNullSafeGreaterThan(exprs[i], expression.NewLiteral(lowLit, lowTyp)), - expression.NewNullSafeLessThan(exprs[i], expression.NewLiteral(upLit, upTyp)), - ), - ) - case sql.RangeType_OpenClosed: - lowLit, lowTyp := getType(sql.GetRangeCutKey(rangeColumnExpr.LowerBound)) - upLit, upTyp := getType(sql.GetRangeCutKey(rangeColumnExpr.UpperBound)) - rangeExpr = or(rangeExpr, - and( - expression.NewNullSafeGreaterThan(exprs[i], expression.NewLiteral(lowLit, lowTyp)), - expression.NewNullSafeLessThanOrEqual(exprs[i], expression.NewLiteral(upLit, upTyp)), - ), - ) - case sql.RangeType_ClosedOpen: - lowLit, lowTyp := getType(sql.GetRangeCutKey(rangeColumnExpr.LowerBound)) - upLit, upTyp := getType(sql.GetRangeCutKey(rangeColumnExpr.UpperBound)) - rangeExpr = or(rangeExpr, - and( - expression.NewNullSafeGreaterThanOrEqual(exprs[i], expression.NewLiteral(lowLit, lowTyp)), - expression.NewNullSafeLessThan(exprs[i], expression.NewLiteral(upLit, upTyp)), - ), + } else { + lowLit, lowTyp := getType(sql.GetRangeCutKey(rce.LowerBound)) + upLit, upTyp := getType(sql.GetRangeCutKey(rce.UpperBound)) + rangeColumnExpr = and( + expression.NewNullSafeGreaterThanOrEqual(exprs[i], expression.NewLiteral(lowLit, lowTyp)), + expression.NewNullSafeLessThanOrEqual(exprs[i], expression.NewLiteral(upLit, upTyp)), ) } + case sql.RangeType_OpenOpen: + lowLit, lowTyp := getType(sql.GetRangeCutKey(rce.LowerBound)) + upLit, upTyp := getType(sql.GetRangeCutKey(rce.UpperBound)) + rangeColumnExpr = and( + expression.NewNullSafeGreaterThan(exprs[i], expression.NewLiteral(lowLit, lowTyp)), + expression.NewNullSafeLessThan(exprs[i], expression.NewLiteral(upLit, upTyp)), + ) + case sql.RangeType_OpenClosed: + lowLit, lowTyp := getType(sql.GetRangeCutKey(rce.LowerBound)) + upLit, upTyp := getType(sql.GetRangeCutKey(rce.UpperBound)) + rangeColumnExpr = and( + expression.NewNullSafeGreaterThan(exprs[i], expression.NewLiteral(lowLit, lowTyp)), + expression.NewNullSafeLessThanOrEqual(exprs[i], expression.NewLiteral(upLit, upTyp)), + ) + case sql.RangeType_ClosedOpen: + lowLit, lowTyp := getType(sql.GetRangeCutKey(rce.LowerBound)) + upLit, upTyp := getType(sql.GetRangeCutKey(rce.UpperBound)) + rangeColumnExpr = and( + expression.NewNullSafeGreaterThanOrEqual(exprs[i], expression.NewLiteral(lowLit, lowTyp)), + expression.NewNullSafeLessThan(exprs[i], expression.NewLiteral(upLit, upTyp)), + ) } - if rangeExpr == nil { - continue - } - filterExpr = and(filterExpr, rangeExpr) + rangeExpr = and(rangeExpr, rangeColumnExpr) } - completeExpr = or(completeExpr, filterExpr) + rangeCollectionExpr = or(rangeCollectionExpr, rangeExpr) } - return NewIndexLookup(ctx, mergeableIndex, completeExpr, ranges...), nil + return NewIndexLookup(ctx, mergeableIndex, rangeCollectionExpr, ranges...), nil } // ColumnExpressionTypes implements the interface sql.Index. diff --git a/sql/analyzer/indexes.go b/sql/analyzer/indexes.go index e85cfebb16..701038d561 100644 --- a/sql/analyzer/indexes.go +++ b/sql/analyzer/indexes.go @@ -128,10 +128,10 @@ func getIndexes( foundRightIdx := false if rightIdx, ok := rightIndexes[table]; ok { if canMergeIndexes(leftIdx.lookup, rightIdx.lookup) { - var allRanges []sql.Range - allRanges = append([]sql.Range{}, leftIdx.lookup.Ranges()...) + var allRanges sql.RangeCollection + allRanges = append(sql.RangeCollection{}, leftIdx.lookup.Ranges()...) allRanges = append(allRanges, rightIdx.lookup.Ranges()...) - newRanges, err := sql.SimplifyRanges(allRanges...) + newRanges, err := sql.RemoveOverlappingRanges(allRanges...) if err != nil { return nil, nil } @@ -184,15 +184,15 @@ func getIndexes( return nil, errInvalidInRightEvaluation.New(value) } - var toUnion []sql.Range + var toUnion sql.RangeCollection for _, val := range values { - ranges := sql.NewIndexBuilder(ctx, idx).Equals(ctx, colExprs[0].String(), val).Range() - if ranges == nil { + ranges := sql.NewIndexBuilder(ctx, idx).Equals(ctx, colExprs[0].String(), val).Ranges() + if len(ranges) == 0 { return nil, nil } - toUnion = append(toUnion, ranges) + toUnion = append(toUnion, ranges...) } - allRanges, err := sql.SimplifyRanges(toUnion...) + allRanges, err := sql.RemoveOverlappingRanges(toUnion...) if err != nil { return nil, err } @@ -503,13 +503,13 @@ func getNegatedIndexes( return nil, errInvalidInRightEvaluation.New(value) } - var toIntersect []sql.Range + var toIntersect sql.RangeCollection for _, val := range values { - ranges := sql.NewIndexBuilder(ctx, idx).NotEquals(ctx, normalizedExpressions[0].String(), val).Range() - if ranges == nil { + ranges := sql.NewIndexBuilder(ctx, idx).NotEquals(ctx, normalizedExpressions[0].String(), val).Ranges() + if len(ranges) == 0 { return nil, nil } - toIntersect = append(toIntersect, ranges) + toIntersect = append(toIntersect, ranges...) } allRanges := sql.IntersectRanges(toIntersect...) if allRanges == nil { diff --git a/sql/index.go b/sql/index.go index 72e6f12fc4..216b2412f8 100644 --- a/sql/index.go +++ b/sql/index.go @@ -39,13 +39,12 @@ type Index interface { // IsGenerated returns whether this index was generated. Generated indexes // are used for index access, but are not displayed (such as with SHOW INDEXES). IsGenerated() bool - // NewLookup returns a new IndexLookup for the ranges given. Ranges represent filters over columns (RangeColumns), - // and it is possible for ranges to overlap (however they will not be subsets of one another). Each Range - // is ordered by the column expressions (as returned by Expressions) with the RangeColumn representing the - // searchable area for each column expression. Multiple RangeColumnExprs per RangeColumn are equivalent to the union - // of those RangeColumnExprs, e.g. (x < 1 OR x > 10) will return two RangeColumnExprs, where matching either is - // valid for the column "x". If an integrator is unable to process the given ranges, then a nil may be returned. An - // error should be returned only in the event that an error occurred. + // NewLookup returns a new IndexLookup for the ranges given. Ranges represent filters over columns. Each Range + // is ordered by the column expressions (as returned by Expressions) with the RangeColumnExpr representing the + // searchable area for each column expression. Each Range given will not overlap with any other ranges. Additionally, + // all ranges will have the same length, and may represent a partial index (matching a prefix rather than the entire + // index). If an integrator is unable to process the given ranges, then a nil may be returned. An error should be + // returned only in the event that an error occurred. NewLookup(ctx *Context, ranges ...Range) (IndexLookup, error) // ColumnExpressionTypes returns each expression and its associated Type. Each expression string should exactly // match the string returned from Index.Expressions(). diff --git a/sql/index_builder.go b/sql/index_builder.go index 01b2b03b33..a6cd890816 100644 --- a/sql/index_builder.go +++ b/sql/index_builder.go @@ -31,7 +31,7 @@ type IndexBuilder struct { isInvalid bool err error colExprTypes map[string]Type - ranges map[string]RangeColumn + ranges map[string][]RangeColumnExpr } // NewIndexBuilder returns a new IndexBuilder. Used internally to construct a range that will later be passed to @@ -46,7 +46,7 @@ func NewIndexBuilder(ctx *Context, idx Index) *IndexBuilder { isInvalid: false, err: nil, colExprTypes: colExprTypes, - ranges: make(map[string]RangeColumn), + ranges: make(map[string][]RangeColumnExpr), } } @@ -154,12 +154,12 @@ func (b *IndexBuilder) LessOrEqual(ctx *Context, colExpr string, key interface{} return b } -// Range returns the range for this index builder. If the builder is invalid for any reason then this returns nil. -func (b *IndexBuilder) Range() Range { +// Ranges returns all ranges for this index builder. If the builder is invalid for any reason then this returns nil. +func (b *IndexBuilder) Ranges() RangeCollection { if b.err != nil || b.isInvalid { return nil } - var rangeCollection Range + var allColumns [][]RangeColumnExpr for _, colExpr := range b.idx.Expressions() { ranges, ok := b.ranges[colExpr] if !ok { @@ -167,9 +167,34 @@ func (b *IndexBuilder) Range() Range { // not have an entry for then we've hit all the ranges. break } - rangeCollection = append(rangeCollection, ranges) + allColumns = append(allColumns, ranges) + } + + // In the builder ranges map we store multiple column expressions per column, however we want all permutations to + // be their own range, so here we're creating a new range for every permutation. + colCounts := make([]int, len(allColumns)) + permutation := make([]int, len(allColumns)) + for i, rangeColumn := range allColumns { + colCounts[i] = len(rangeColumn) + } + var ranges []Range + exit := false + for !exit { + exit = true + currentRange := make(Range, len(allColumns)) + for colIdx, exprCount := range colCounts { + permutation[colIdx] = (permutation[colIdx] + 1) % exprCount + if permutation[colIdx] != 0 { + exit = false + break + } + } + for colIdx, exprIdx := range permutation { + currentRange[colIdx] = allColumns[colIdx][exprIdx] + } + ranges = append(ranges, currentRange) } - return rangeCollection + return ranges } // Build constructs a new IndexLookup based on the ranges that have been built internally by this builder. @@ -179,7 +204,7 @@ func (b *IndexBuilder) Build(ctx *Context) (IndexLookup, error) { } else if b.isInvalid { return nil, nil } else { - return b.idx.NewLookup(ctx, b.Range()) + return b.idx.NewLookup(ctx, b.Ranges()...) } } diff --git a/sql/range.go b/sql/range.go index 519b930e73..d76966a251 100644 --- a/sql/range.go +++ b/sql/range.go @@ -14,15 +14,14 @@ package sql +import "strings" + // RangeCollection is a collection of ranges that represent different (non-overlapping) filter expressions. type RangeCollection []Range // Range is a collection of RangeColumns that are ordered by the column expressions as returned by their parent // index. A single range represents a set of values intended for iteration by an integrator's index. -type Range []RangeColumn - -// RangeColumn is a slice of RangeColumnExprs meant to represent a set of discrete, non-overlapping RangeColumnExprs. -type RangeColumn []RangeColumnExpr +type Range []RangeColumnExpr // Intersect attempts to intersect the given RangeCollection with the calling RangeCollection. This ensures that each // Range belonging to the same collection is treated as a union with respect to that same collection, rather than @@ -40,7 +39,7 @@ func (ranges RangeCollection) Intersect(otherRanges RangeCollection) (RangeColle } } } - newRanges, err := SimplifyRanges(newRanges...) + newRanges, err := RemoveOverlappingRanges(newRanges...) if err != nil { return nil, err } @@ -50,48 +49,138 @@ func (ranges RangeCollection) Intersect(otherRanges RangeCollection) (RangeColle return newRanges, nil } +// String returns this RangeCollection as a string for display purposes. +func (ranges RangeCollection) String() string { + sb := strings.Builder{} + sb.WriteByte('[') + for i, rang := range ranges { + if i != 0 { + sb.WriteString(", ") + } + sb.WriteString(rang.String()) + } + sb.WriteByte(']') + return sb.String() +} + +// DebugString returns this RangeCollection as a string for debugging purposes. +func (ranges RangeCollection) DebugString() string { + sb := strings.Builder{} + sb.WriteByte('[') + for i, rang := range ranges { + if i != 0 { + sb.WriteString(", ") + } + sb.WriteString(rang.DebugString()) + } + sb.WriteByte(']') + return sb.String() +} + // AsEmpty returns a Range full of empty RangeColumns with the same types as the calling Range. func (rang Range) AsEmpty() Range { emptyRange := make(Range, len(rang)) for i := range rang { - emptyRange[i] = RangeColumn{EmptyRangeColumnExpr(rang[i][0].typ)} + emptyRange[i] = EmptyRangeColumnExpr(rang[i].typ) } return emptyRange } -// RangesByColumnExpression returns the RangeColumn that belongs to the given column expression. If an index does not -// contain the given column expression then a nil is returned. -func (rang Range) RangesByColumnExpression(idx Index, colExpr string) RangeColumn { +// Copy returns a duplicate of this Range. +func (rang Range) Copy() Range { + newRange := make(Range, len(rang)) + for i, colExpr := range rang { + newRange[i] = colExpr // RangeColumnExpr and all of its members are non-pointer types, so they're copied + } + return newRange +} + +// ExpressionByColumnName returns the RangeColumnExpr that belongs to the given column expression. If an index does not +// contain the column expression then false is returned. +func (rang Range) ExpressionByColumnName(idx Index, colExpr string) (RangeColumnExpr, bool) { for i, idxColExpr := range idx.Expressions() { if idxColExpr == colExpr { if i < len(rang) { - return rang[i] + return rang[i], true } break } } - return nil + return RangeColumnExpr{}, false } -// Intersect attempts to intersect the given Range with the calling Range. +// Equals evaluates whether the calling Range is equivalent to the given Range. +func (rang Range) Equals(otherRange Range) (bool, error) { + if len(rang) != len(otherRange) { + return false, nil + } + for i := range rang { + if ok, err := rang[i].Equals(otherRange[i]); err != nil || !ok { + return false, err + } + } + return true, nil +} + +// Intersect intersects the given Range with the calling Range. func (rang Range) Intersect(otherRange Range) (Range, error) { if len(rang) != len(otherRange) { return nil, nil } newRangeCollection := make(Range, len(rang)) for i := range rang { - intersectedRanges, ok, err := rang[i].TryIntersect(otherRange[i]) + intersectedRange, ok, err := rang[i].TryIntersect(otherRange[i]) if err != nil { return nil, err } if !ok { return rang.AsEmpty(), nil } - newRangeCollection[i] = intersectedRanges + newRangeCollection[i] = intersectedRange } return newRangeCollection, nil } +// TryUnion attempts to union the given Range with the calling Range. This can only do a union if one Range is a subset +// of the other, or if all columns except for one are equivalent. Returns true if the union was successful. +func (rang Range) TryUnion(otherRange Range) (Range, bool, error) { + if len(rang) != len(otherRange) { + return nil, false, nil + } + if ok, err := rang.IsSupersetOf(otherRange); err != nil { + return nil, false, err + } else if ok { + return rang, true, nil + } + if ok, err := otherRange.IsSupersetOf(rang); err != nil { + return nil, false, err + } else if ok { + return otherRange, true, nil + } + + indexToUnion := -1 + // The superset checks will cover if every column expr is equivalent + for i := 0; i < len(rang); i++ { + if ok, err := rang[i].Equals(otherRange[i]); err != nil { + return nil, false, err + } else if !ok { + // Only one column may not equal another + if indexToUnion == -1 { + indexToUnion = i + } else { + return nil, false, nil + } + } + } + unionedLastExpr, ok, err := rang[indexToUnion].TryUnion(otherRange[indexToUnion]) + if err != nil || !ok { + return nil, false, err + } + unionedRange := rang.Copy() + unionedRange[indexToUnion] = unionedLastExpr + return unionedRange, true, nil +} + // IsSubsetOf evaluates whether the calling Range is fully encompassed by the given Range. func (rang Range) IsSubsetOf(otherRange Range) (bool, error) { if len(rang) != len(otherRange) { @@ -111,64 +200,116 @@ func (rang Range) IsSupersetOf(otherRange Range) (bool, error) { return otherRange.IsSubsetOf(rang) } -// Equals returns whether the calling RangeColumn is equal to the given RangeColumn. -func (rc RangeColumn) Equals(other RangeColumn) (bool, error) { - if len(rc) != len(other) { +// Overlaps returns whether the calling Range and given Range have overlapping values, which would result in the same +// values being returned from some subset of both ranges. +func (rang Range) Overlaps(otherRange Range) (bool, error) { + if len(rang) != len(otherRange) { return false, nil } - for i := range rc { - if ok, err := rc[i].Equals(other[i]); err != nil || !ok { + for i := range rang { + _, ok, err := rang[i].Overlaps(otherRange[i]) + if err != nil || !ok { return false, err } } return true, nil } -// TryIntersect attempts to intersect the calling RangeColumn with the given RangeColumn. If the intersection fails or -// results in an empty RangeColumn then nil and false are returned. -func (rc RangeColumn) TryIntersect(other RangeColumn) (RangeColumn, bool, error) { - var newRangeColumn []RangeColumnExpr - var err error - for _, rr := range rc { - for _, or := range other { - newRange, ok, err := rr.TryIntersect(or) - if err != nil { - return nil, false, err - } - if ok { - newRangeColumn = append(newRangeColumn, newRange) - } - } +// RemoveOverlap removes any overlap that the given Range may have with the calling Range. If the two ranges do not +// overlap then they're both returned. If one is a subset of the other/may be unioned then only one Range is returned. +// Otherwise, this returns a set of ranges that do not overlap with each other. +// +// If the two ranges do not overlap then false is returned, otherwise returns true. +func (rang Range) RemoveOverlap(otherRange Range) (RangeCollection, bool, error) { + // If the ranges do not overlap then they would never return the same value at any point. + if ok, err := rang.Overlaps(otherRange); err != nil || !ok { + return []Range{rang, otherRange}, false, err } - newRangeColumn, err = SimplifyRangeColumn(newRangeColumn...) - if err != nil || len(newRangeColumn) == 0 { + // If the two ranges may be unioned then we just do that and return. + // Also allows us to not have to worry about the case where every column is equivalent. + if unionedRange, ok, err := rang.TryUnion(otherRange); err != nil { return nil, false, err + } else if ok { + return []Range{unionedRange}, true, nil } - return newRangeColumn, true, nil + + var ranges []Range + for i := range rang { + if ok, err := rang[i].Equals(otherRange[i]); err != nil { + return nil, false, err + } else if ok { + continue + } + // Get the RangeColumnExpr that overlaps both RangeColumnExprs + overlapExpr, _, err := rang[i].Overlaps(otherRange[i]) + if err != nil { + return nil, false, err + } + // Subtract the overlapping range from each existing range. + // This will give us a collection of ranges that do not have any overlap. + range1Subtracted, err := rang[i].Subtract(overlapExpr) + if err != nil { + return nil, false, err + } + for _, newColExpr := range range1Subtracted { + ranges = append(ranges, rang.replace(i, newColExpr)) + } + range2Subtracted, err := otherRange[i].Subtract(overlapExpr) + if err != nil { + return nil, false, err + } + for _, newColExpr := range range2Subtracted { + ranges = append(ranges, otherRange.replace(i, newColExpr)) + } + // Create two ranges that replace each respective RangeColumnExpr with the overlapping one, giving us two + // ranges that are guaranteed to overlap (and are a subset of the originals). We can then recursively call this + // function on the new overlapping ranges which will eventually return a set of non-overlapping ranges. + newRanges, _, err := rang.replace(i, overlapExpr).RemoveOverlap(otherRange.replace(i, overlapExpr)) + if err != nil { + return nil, false, err + } + ranges = append(ranges, newRanges...) + break + } + + return ranges, true, nil } -// IsSubsetOf evaluates whether the calling RangeColumn is fully encompassed by the given RangeColumn. -func (rc RangeColumn) IsSubsetOf(other RangeColumn) (bool, error) { - for _, rce := range rc { - isSubset := false - for _, otherRCE := range other { - if ok, err := rce.IsSubsetOf(otherRCE); err != nil { - return false, err - } else if ok { - isSubset = true - break - } +// String returns this Range as a string for display purposes. +func (rang Range) String() string { + sb := strings.Builder{} + sb.WriteByte('{') + for i, colExpr := range rang { + if i != 0 { + sb.WriteString(", ") } - if !isSubset { - return false, nil + sb.WriteString(colExpr.String()) + } + sb.WriteByte('}') + return sb.String() +} + +// DebugString returns this Range as a string for debugging purposes. +func (rang Range) DebugString() string { + sb := strings.Builder{} + sb.WriteByte('{') + for i, colExpr := range rang { + if i != 0 { + sb.WriteString(", ") } + sb.WriteString(colExpr.DebugString()) } - return true, nil + sb.WriteByte('}') + return sb.String() } -// IsSupersetOf evaluates whether the calling RangeColumn fully encompasses the given RangeColumn. -func (rc RangeColumn) IsSupersetOf(other RangeColumn) (bool, error) { - return other.IsSubsetOf(rc) +// replace returns a new Range with the column at the given index replaced by the given RangeColumnExpr. Does NOT +// perform any validation checks such as the index being within the bounds of the Range or the RangeColumnExpr having +// the same type as the other columns, so use with caution. +func (rang Range) replace(i int, colExpr RangeColumnExpr) Range { + newRange := rang.Copy() + newRange[i] = colExpr + return newRange } // IntersectRanges intersects each Range for each column expression. If a RangeColumnExpr ends up with no valid ranges @@ -208,6 +349,38 @@ func IntersectRanges(ranges ...Range) Range { return rang } +// RemoveOverlappingRanges removes all overlap between all ranges. +func RemoveOverlappingRanges(ranges ...Range) (RangeCollection, error) { + if len(ranges) == 0 { + return nil, nil + } + + // There are more efficient ways to do these comparisons, but this is just a simple implementation for now + var newRanges RangeCollection + for i := 0; i < len(ranges); i++ { + hadOverlap := false + for nri := 0; nri < len(newRanges); nri++ { + if resultingRanges, ok, err := ranges[i].RemoveOverlap(newRanges[nri]); err != nil { + return nil, err + } else if ok { + hadOverlap = true + // Remove the overlapping Range from newRanges + nrLast := len(newRanges) - 1 + newRanges[nri], newRanges[nrLast] = newRanges[nrLast], newRanges[nri] + newRanges = newRanges[:nrLast] + // Add the new ranges to the end of the given slice allowing us to compare those against everything else. + ranges = append(ranges, resultingRanges...) + break + } + } + if !hadOverlap { + newRanges = append(newRanges, ranges[i]) + } + } + + return newRanges, nil +} + // SimplifyRanges operates differently depending on whether the given RangeCollection represent a single RangeColumn or // multiple RangeColumns, as they have different rules. If the collections contain a single RangeColumn then they are // all unioned together. If the collections contain multiple RangeColumns then all RangeColumns that are a subset of @@ -224,12 +397,12 @@ func SimplifyRanges(ranges ...Range) (RangeCollection, error) { if len(ranges[0]) == 1 { var allRangeColExprs []RangeColumnExpr returnAllRange := true - for _, rangeCollection := range ranges { - if len(rangeCollection) != 1 { + for _, rang := range ranges { + if len(rang) != 1 { returnAllRange = false break } - allRangeColExprs = append(allRangeColExprs, rangeCollection[0]...) + allRangeColExprs = append(allRangeColExprs, rang[0]) } if returnAllRange { var err error @@ -238,9 +411,9 @@ func SimplifyRanges(ranges ...Range) (RangeCollection, error) { return nil, err } if len(allRangeColExprs) == 0 { - return []Range{{RangeColumn{EmptyRangeColumnExpr(ranges[0][0][0].typ)}}}, nil + return []Range{{EmptyRangeColumnExpr(ranges[0][0].typ)}}, nil } - return []Range{{allRangeColExprs}}, nil + return []Range{allRangeColExprs}, nil } } diff --git a/sql/range_column_expr.go b/sql/range_column_expr.go index f7a6f4f29f..60dad14c02 100644 --- a/sql/range_column_expr.go +++ b/sql/range_column_expr.go @@ -17,6 +17,7 @@ package sql import ( "fmt" "sort" + "strings" ) // RangeType returns what a RangeColumnExpr represents, such as a GreaterThan on some column, or a column set between @@ -185,6 +186,83 @@ func (r RangeColumnExpr) IsConnected(other RangeColumnExpr) (bool, error) { return comp <= 0, nil } +// Overlaps evaluates whether the given RangeColumnExpr overlaps the calling RangeColumnExpr. If they do, returns the +// overlapping region as a RangeColumnExpr. +func (r RangeColumnExpr) Overlaps(other RangeColumnExpr) (RangeColumnExpr, bool, error) { + if r.typ.String() != other.typ.String() { + return EmptyRangeColumnExpr(r.typ), false, nil + } + comp, err := r.LowerBound.Compare(other.UpperBound, r.typ) + if err != nil || comp >= 0 { + return EmptyRangeColumnExpr(r.typ), false, err + } + comp, err = other.LowerBound.Compare(r.UpperBound, r.typ) + if err != nil || comp >= 0 { + return EmptyRangeColumnExpr(r.typ), false, err + } + lowerbound, err := GetRangeCutMax(r.typ, r.LowerBound, other.LowerBound) + if err != nil { + return EmptyRangeColumnExpr(r.typ), false, err + } + upperbound, err := GetRangeCutMin(r.typ, r.UpperBound, other.UpperBound) + if err != nil { + return EmptyRangeColumnExpr(r.typ), false, err + } + return RangeColumnExpr{ + LowerBound: lowerbound, + UpperBound: upperbound, + typ: r.typ, + }, true, nil +} + +// Subtract removes the given RangeColumnExpr from the calling RangeColumnExpr. In the event that the given +// RangeColumnExpr is a strict subset of the calling RangeColumnExpr, two RangeColumnExprs will be returned. If the +// given RangeColumnExpr does not overlap the calling RangeColumnExpr, then the calling RangeColumnExpr is returned. +// If the calling RangeColumnExpr is a strict subset (or equivalent) of the given RangeColumnExpr, then an empty slice +// is returned. In all other cases, a slice with a single RangeColumnExpr will be returned. +func (r RangeColumnExpr) Subtract(other RangeColumnExpr) ([]RangeColumnExpr, error) { + _, overlaps, err := r.Overlaps(other) + if err != nil { + return nil, err + } + if !overlaps { + return []RangeColumnExpr{r}, nil + } + lComp, err := r.LowerBound.Compare(other.LowerBound, r.typ) + if err != nil { + return nil, err + } + uComp, err := r.UpperBound.Compare(other.UpperBound, r.typ) + if err != nil { + return nil, err + } + switch (3 * (lComp + 1)) + (uComp + 1) { + case 0: // lComp == -1 && uComp == -1 + return []RangeColumnExpr{{r.LowerBound, other.LowerBound, r.typ}}, nil + case 1: // lComp == -1 && uComp == 0 + return []RangeColumnExpr{{r.LowerBound, other.LowerBound, r.typ}}, nil + case 2: // lComp == -1 && uComp == 1 + return []RangeColumnExpr{ + {r.LowerBound, other.LowerBound, r.typ}, + {other.UpperBound, r.UpperBound, r.typ}, + }, nil + case 3: // lComp == 0 && uComp == -1 + return nil, nil + case 4: // lComp == 0 && uComp == 0 + return nil, nil + case 5: // lComp == 0 && uComp == 1 + return []RangeColumnExpr{{other.UpperBound, r.UpperBound, r.typ}}, nil + case 6: // lComp == 1 && uComp == -1 + return nil, nil + case 7: // lComp == 1 && uComp == 0 + return nil, nil + case 8: // lComp == 1 && uComp == 1 + return []RangeColumnExpr{{other.UpperBound, r.UpperBound, r.typ}}, nil + default: // should never be hit + panic(fmt.Errorf("unknown RangeColumnExpr subtraction case: %d", (3*(lComp+1))+(uComp+1))) + } +} + // IsSubsetOf evaluates whether the calling RangeColumnExpr is fully encompassed by the given RangeColumnExpr. func (r RangeColumnExpr) IsSubsetOf(other RangeColumnExpr) (bool, error) { if r.typ.String() != other.typ.String() { @@ -206,9 +284,32 @@ func (r RangeColumnExpr) IsSupersetOf(other RangeColumnExpr) (bool, error) { return other.IsSubsetOf(r) } -// String returns this RangeColumnExpr as a string for debugging purposes. Will panic on errors. +// String returns this RangeColumnExpr as a string for display purposes. func (r RangeColumnExpr) String() string { - return fmt.Sprintf("RangeColumnExpr(%s, %s)", r.LowerBound.String(), r.UpperBound.String()) + return fmt.Sprintf("(%s, %s)", r.LowerBound.String(), r.UpperBound.String()) +} + +// DebugString returns this RangeColumnExpr as a string for debugging purposes. +func (r RangeColumnExpr) DebugString() string { + sb := strings.Builder{} + switch r.LowerBound.(type) { + case Above: + sb.WriteString("(" + fmt.Sprint(GetRangeCutKey(r.LowerBound))) + case Below: + sb.WriteString("[" + fmt.Sprint(GetRangeCutKey(r.LowerBound))) + case AboveAll, BelowAll: + sb.WriteString("(-∞") + } + sb.WriteString(", ") + switch r.UpperBound.(type) { + case Above: + sb.WriteString(fmt.Sprint(GetRangeCutKey(r.UpperBound)) + "]") + case Below: + sb.WriteString(fmt.Sprint(GetRangeCutKey(r.UpperBound)) + ")") + case AboveAll, BelowAll: + sb.WriteString("∞)") + } + return sb.String() } // TryIntersect attempts to intersect the given RangeColumnExpr with the calling RangeColumnExpr. Returns true if the @@ -358,7 +459,7 @@ func (r *rangeColumnExprSlice) Less(i, j int) bool { } // SimplifyRangeColumn combines all RangeColumnExprs that are connected and returns a new slice. -func SimplifyRangeColumn(rces ...RangeColumnExpr) (RangeColumn, error) { +func SimplifyRangeColumn(rces ...RangeColumnExpr) ([]RangeColumnExpr, error) { if len(rces) == 0 { return rces, nil } diff --git a/sql/range_cut.go b/sql/range_cut.go index 8c71f13543..09e2522643 100644 --- a/sql/range_cut.go +++ b/sql/range_cut.go @@ -22,7 +22,7 @@ import ( type RangeCut interface { // Compare returns an integer stating the relative position of the calling RangeCut to the given RangeCut. Compare(RangeCut, Type) (int, error) - // String returns the RangeCut as a string for debugging purposes. Will panic on errors. + // String returns the RangeCut as a string for display purposes. String() string // TypeAsLowerBound returns the bound type if the calling RangeCut is the lower bound of a range. TypeAsLowerBound() RangeBoundType @@ -57,6 +57,36 @@ func GetRangeCutKey(c RangeCut) interface{} { } } +// GetRangeCutMax returns the RangeCut with the highest value. +func GetRangeCutMax(typ Type, cuts ...RangeCut) (RangeCut, error) { + maxCut := cuts[0] + for i := 1; i < len(cuts); i++ { + comp, err := maxCut.Compare(cuts[i], typ) + if err != nil { + return maxCut, err + } + if comp == -1 { + maxCut = cuts[i] + } + } + return maxCut, nil +} + +// GetRangeCutMin returns the RangeCut with the lowest value. +func GetRangeCutMin(typ Type, cuts ...RangeCut) (RangeCut, error) { + minCut := cuts[0] + for i := 1; i < len(cuts); i++ { + comp, err := minCut.Compare(cuts[i], typ) + if err != nil { + return minCut, err + } + if comp == 1 { + minCut = cuts[i] + } + } + return minCut, nil +} + // Above represents the position immediately above the contained key. type Above struct { key interface{} diff --git a/sql/range_test.go b/sql/range_test.go new file mode 100644 index 0000000000..284b414c05 --- /dev/null +++ b/sql/range_test.go @@ -0,0 +1,245 @@ +// Copyright 2021 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression" +) + +var rangeType = sql.Uint8 + +func TestRangeOverlapTwoColumns(t *testing.T) { + ctx := sql.NewEmptyContext() + x, y, _, values2, _ := setup() + + tests := []struct { + reference sql.Expression + ranges sql.RangeCollection + }{ + { + or( + and(lt(x, 2), gt(y, 5)), + and(gt(x, 8), gt(y, 5)), + and(gt(x, 5), gt(y, 8)), + ), + sql.RangeCollection{ + r(rlt(2), rgt(5)), + r(rgt(8), rgt(5)), + r(rgt(5), rgt(8)), + }, + }, + { + or( + and(lt(x, 2), gt(y, 5)), + and(gt(x, 8), gt(y, 5)), + and(gt(x, 5), lt(y, 8)), + ), + sql.RangeCollection{ + r(rlt(2), rgt(5)), + r(rgt(8), rgt(5)), + r(rgt(5), rlt(8)), + }, + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("Expr: %s\nRange: %s", test.reference.String(), test.ranges.DebugString()), func(t *testing.T) { + discreteRanges, err := sql.RemoveOverlappingRanges(test.ranges...) + require.NoError(t, err) + for _, row := range values2 { + referenceBool, err := test.reference.Eval(ctx, row) + require.NoError(t, err) + rangeBool := evalRanges(t, discreteRanges, row) + assert.Equal(t, referenceBool, rangeBool, fmt.Sprintf("%v: DiscreteRanges: %s", row, discreteRanges.DebugString())) + } + }) + } +} + +func TestRangeOverlapThreeColumns(t *testing.T) { + ctx := sql.NewEmptyContext() + x, y, z, _, values3 := setup() + + tests := []struct { + reference sql.Expression + ranges sql.RangeCollection + }{ + { + or( + and(gt(x, 2), gt(y, 2), gt(z, 2)), + and(lt(x, 8), lt(y, 8), lt(z, 8)), + ), + sql.RangeCollection{ + r(rgt(2), rgt(2), rgt(2)), + r(rlt(8), rlt(8), rlt(8)), + }, + }, + { + or( + and(gte(x, 3), gte(y, 4), gt(z, 5)), + and(lte(x, 6), lt(y, 7), lte(z, 8)), + ), + sql.RangeCollection{ + r(rgte(3), rgte(4), rgt(5)), + r(rlte(6), rlt(7), rlte(8)), + }, + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("Expr: %s\nRange: %s", test.reference.String(), test.ranges.DebugString()), func(t *testing.T) { + discreteRanges, err := sql.RemoveOverlappingRanges(test.ranges...) + require.NoError(t, err) + for _, row := range values3 { + referenceBool, err := test.reference.Eval(ctx, row) + require.NoError(t, err) + rangeBool := evalRanges(t, discreteRanges, row) + assert.Equal(t, referenceBool, rangeBool, fmt.Sprintf("%v: DiscreteRanges: %s", row, discreteRanges.DebugString())) + } + }) + } +} + +func setup() (x, y, z sql.Expression, values2, values3 [][]interface{}) { + values2 = make([][]interface{}, 0, 100) + values3 = make([][]interface{}, 0, 1000) + for i := byte(1); i <= 10; i++ { + for j := byte(1); j <= 10; j++ { + for k := byte(1); k <= 10; k++ { + values3 = append(values3, []interface{}{i, j, k}) + } + values2 = append(values2, []interface{}{i, j}) + } + } + x = expression.NewGetField(0, rangeType, "x", true) + y = expression.NewGetField(1, rangeType, "y", true) + z = expression.NewGetField(2, rangeType, "z", true) + return +} + +func evalRanges(t *testing.T, ranges []sql.Range, row []interface{}) bool { + found := false + for _, rang := range ranges { + if evalRange(t, rang, row) { + if !found { + found = true + } else { + assert.FailNow(t, "overlap in ranges") + } + } + } + return found +} + +func evalRange(t *testing.T, rang sql.Range, row []interface{}) bool { + rowRange := make(sql.Range, len(rang)) + for i, val := range row { + rowRange[i] = sql.ClosedRangeColumnExpr(val, val, rangeType) + } + ok, err := rang.IsSupersetOf(rowRange) + require.NoError(t, err) + return ok +} + +func eq(field sql.Expression, val uint8) sql.Expression { + return expression.NewNullSafeEquals(field, expression.NewLiteral(val, rangeType)) +} + +func lt(field sql.Expression, val uint8) sql.Expression { + return expression.NewNullSafeLessThan(field, expression.NewLiteral(val, rangeType)) +} + +func lte(field sql.Expression, val uint8) sql.Expression { + return expression.NewNullSafeLessThanOrEqual(field, expression.NewLiteral(val, rangeType)) +} + +func gt(field sql.Expression, val uint8) sql.Expression { + return expression.NewNullSafeGreaterThan(field, expression.NewLiteral(val, rangeType)) +} + +func gte(field sql.Expression, val uint8) sql.Expression { + return expression.NewNullSafeGreaterThanOrEqual(field, expression.NewLiteral(val, rangeType)) +} + +func not(field sql.Expression, val uint8) sql.Expression { + return expression.NewNot(eq(field, val)) +} + +func r(colExprs ...sql.RangeColumnExpr) sql.Range { + return colExprs +} + +func req(val byte) sql.RangeColumnExpr { + return sql.ClosedRangeColumnExpr(val, val, rangeType) +} + +func rlt(val byte) sql.RangeColumnExpr { + return sql.LessThanRangeColumnExpr(val, rangeType) +} + +func rlte(val byte) sql.RangeColumnExpr { + return sql.LessOrEqualRangeColumnExpr(val, rangeType) +} + +func rgt(val byte) sql.RangeColumnExpr { + return sql.GreaterThanRangeColumnExpr(val, rangeType) +} + +func rgte(val byte) sql.RangeColumnExpr { + return sql.GreaterOrEqualRangeColumnExpr(val, rangeType) +} + +func rcc(lowerbound, upperbound byte) sql.RangeColumnExpr { + return sql.CustomRangeColumnExpr(lowerbound, upperbound, sql.Closed, sql.Closed, rangeType) +} + +func rco(lowerbound, upperbound byte) sql.RangeColumnExpr { + return sql.CustomRangeColumnExpr(lowerbound, upperbound, sql.Closed, sql.Open, rangeType) +} + +func roc(lowerbound, upperbound byte) sql.RangeColumnExpr { + return sql.CustomRangeColumnExpr(lowerbound, upperbound, sql.Open, sql.Closed, rangeType) +} + +func roo(lowerbound, upperbound byte) sql.RangeColumnExpr { + return sql.CustomRangeColumnExpr(lowerbound, upperbound, sql.Open, sql.Open, rangeType) +} + +func or(expressions ...sql.Expression) sql.Expression { + if len(expressions) == 1 { + return expressions[0] + } + if expressions[0] == nil { + return or(expressions[1:]...) + } + return expression.NewOr(expressions[0], or(expressions[1:]...)) +} + +func and(expressions ...sql.Expression) sql.Expression { + if len(expressions) == 1 { + return expressions[0] + } + if expressions[0] == nil { + return and(expressions[1:]...) + } + return expression.NewAnd(expressions[0], and(expressions[1:]...)) +}