This repository has been archived by the owner on Feb 16, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 106
/
distinct.go
80 lines (71 loc) · 2.34 KB
/
distinct.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
// Copyright 2019 eBay Inc.
// Primary authors: Simon Fell, Diego Ongaro,
// Raymond Kroeker, and Sathish Kandasamy.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package exec
import (
"context"
"fmt"
"github.com/ebay/akutan/query/planner/plandef"
"github.com/ebay/akutan/util/parallel"
)
// newDistinctOp returns a new operator for the distinct operation. When
// executed it will remote duplicate rows from the output results.
func newDistinctOp(op *plandef.DistinctOp, inputs []queryOperator) operator {
if len(inputs) != 1 {
panic(fmt.Sprintf("DistinctOp operation with unexpected inputs: %v", len(inputs)))
}
return &distinctOp{
def: op,
input: inputs[0],
}
}
type distinctOp struct {
def *plandef.DistinctOp
input queryOperator
}
func (op *distinctOp) columns() Columns {
return op.input.columns()
}
func (op *distinctOp) operator() plandef.Operator {
return op.def
}
func (op *distinctOp) execute(ctx context.Context, binder valueBinder, res results) error {
if binder.len() != 1 {
panic(fmt.Sprintf("distinctOp operator %v unexpectedly bulk bound to %d rows",
op.def, binder.len()))
}
inputResCh := make(chan ResultChunk, 4)
wait := parallel.Go(func() {
// Distinct doesn't care about the variable names, just populate column
// indexes based on the number of input columns.
colIndexes := make([]int, len(op.input.columns()))
for i := range colIndexes {
colIndexes[i] = i
}
seen := make(map[string]struct{})
for chunk := range inputResCh {
res.setFinalStatistics(chunk.FinalStatistics)
for i := range chunk.offsets {
key := chunk.identityKeysOf(i, colIndexes)
if _, exists := seen[string(key)]; !exists {
seen[string(key)] = struct{}{}
res.add(ctx, chunk.offsets[i], chunk.Facts[i], chunk.Row(i))
}
}
}
})
err := op.input.run(ctx, binder, inputResCh)
wait()
return err
}