-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
distinct.go
39 lines (33 loc) · 1.32 KB
/
distinct.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package filter
import (
"github.com/apache/beam/sdks/go/pkg/beam"
)
// Distinct removes all duplicates from a collection, under coder equality. It
// expects a PCollection<T> as input and returns a PCollection<T> with
// duplicates removed.
func Distinct(s beam.Scope, col beam.PCollection) beam.PCollection {
s = s.Scope("filter.Distinct")
pre := beam.ParDo(s, mapFn, col)
post := beam.CombinePerKey(s, mergeFn, pre)
return beam.DropValue(s, post)
}
func mapFn(elm beam.T) (beam.T, int) {
return elm, 1
}
func mergeFn(_, _ int) int {
return 1
}