+
+ Distinct |
+
+ {{< highlight "java" >}}
+DataSet dataSet = // [...]
+dataSet.distinct();
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+DataStream dataStream = // [...]
+dataStream.keyBy(value -> value)
+ .reduce((value1, value2) -> value1);
+ {{< /highlight >}}
+ |
+
+
+ Hash-Partition |
+
+ {{< highlight "java" >}}
+DataSet> dataSet = // [...]
+dataSet.partitionByHash(value -> value.f0);
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+DataStream> dataStream = // [...]
+// partition by the hashcode of key
+dataStream.partitionCustom(
+ (key, numSubpartition) -> key.hashCode() % numSubpartition,
+ value -> value.f0);
+ {{< /highlight >}}
+ |
+
+
+ Reduce on Full DataSet |
+
+ {{< highlight "java" >}}
+DataSet dataSet = // [...]
+dataSet.reduce(new ReduceFunction<>(){
+ // implement user-defined reduce logic
+ });
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+DataStream dataStream = // [...]
+dataStream.windowAll(EndOfStreamWindows.get())
+ .reduce(new ReduceFunction<>(){
+ // implement user-defined reduce logic
+ });
+ {{< /highlight >}}
+ |
+
+
+ Aggregate on Full DataSet |
+
+ {{< highlight "java" >}}
+DataSet> dataSet = // [...]
+// compute sum of the second field
+dataSet.aggregate(SUM, 1);
+// compute min of the second field
+dataSet.aggregate(MIN, 1);
+// compute max of the second field
+dataSet.aggregate(MAX, 1);
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+DataStream> dataStream = // [...]
+// compute sum of the second field
+dataStream.windowAll(EndOfStreamWindows.get())
+ .sum(1);
+// compute min of the second field
+dataStream.windowAll(EndOfStreamWindows.get())
+ .min(1);
+// compute max of the second field
+dataStream.windowAll(EndOfStreamWindows.get())
+ .max(1);
+ {{< /highlight >}}
+ |
+
+
+ GroupReduce on Full DataSet |
+
+ {{< highlight "java" >}}
+DataSet dataSet = // [...]
+dataSet.reduceGroup(new GroupReduceFunction<>(){
+ // implement user-defined group reduce logic
+ });
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+DataStream dataStream = // [...]
+dataStream.windowAll(EndOfStreamWindows.get())
+ .apply(new WindowFunction<>(){
+ // implement user-defined group reduce logic
+ });
+ {{< /highlight >}}
+ |
+
+
+ GroupReduce on Grouped DataSet |
+
+ {{< highlight "java" >}}
+DataSet> dataSet = // [...]
+dataSet.groupBy(value -> value.f0)
+ .reduceGroup(new GroupReduceFunction<>(){
+ // implement user-defined group reduce logic
+ });
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+DataStream> dataStream = // [...]
+dataStream.keyBy(value -> value.f0)
+ .window(EndOfStreamWindows.get())
+ .apply(new WindowFunction<>(){
+ // implement user-defined group reduce logic
+ });
+ {{< /highlight >}}
+ |
+
+
+ First-n |
+
+ {{< highlight "java" >}}
+dataSet.first(n)
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+dataStream.windowAll(EndOfStreamWindows.get())
+ .apply(new AllWindowFunction<>(){
+ // implement first-n logic
+ });
+ {{< /highlight >}}
+ |
+
+
+ Join |
+
+ {{< highlight "java" >}}
+DataSet> dataSet1 = // [...]
+DataSet> dataSet2 = // [...]
+dataSet1.join(dataSet2)
+ .where(value -> value.f0)
+ .equalTo(value -> value.f0)
+ .with(new JoinFunction<>(){
+ // implement user-defined join logic
+ });
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+DataStream> dataStream1 = // [...]
+DataStream> dataStream2 = // [...]
+dataStream1.join(dataStream2)
+ .where(value -> value.f0)
+ .equalTo(value -> value.f0)
+ .window(EndOfStreamWindows.get()))
+ .apply(new JoinFunction<>(){
+ // implement user-defined join logic
+ });
+ {{< /highlight >}}
+ |
+
+
+ CoGroup |
+
+ {{< highlight "java" >}}
+DataSet> dataSet1 = // [...]
+DataSet> dataSet2 = // [...]
+dataSet1.coGroup(dataSet2)
+ .where(value -> value.f0)
+ .equalTo(value -> value.f0)
+ .with(new CoGroupFunction<>(){
+ // implement user-defined co group logic
+ });
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+DataStream> dataStream1 = // [...]
+DataStream> dataStream2 = // [...]
+dataStream1.coGroup(dataStream2)
+ .where(value -> value.f0)
+ .equalTo(value -> value.f0)
+ .window(EndOfStreamWindows.get()))
+ .apply(new CoGroupFunction<>(){
+ // implement user-defined co group logic
+ });
+ {{< /highlight >}}
+ |
+
+
+ OuterJoin |
+
+ {{< highlight "java" >}}
+DataSet> dataSet1 = // [...]
+DataSet> dataSet2 = // [...]
+// left outer join
+dataSet1.leftOuterJoin(dataSet2)
+ .where(dataSet1.f0)
+ .equalTo(dataSet2.f0)
+ .with(new JoinFunction<>(){
+ // implement user-defined left outer join logic
+ });
+// right outer join
+dataSet1.rightOuterJoin(dataSet2)
+ .where(dataSet1.f0)
+ .equalTo(dataSet2.f0)
+ .with(new JoinFunction<>(){
+ // implement user-defined right outer join logic
+ });
+ {{< /highlight >}}
+ |
+
+ {{< highlight "java" >}}
+ DataStream> dataStream1 = // [...]
+ DataStream> dataStream2 = // [...]
+ // left outer join
+ dataStream1.coGroup(dataStream2)
+ .where(value -> value.f0)
+ .equalTo(value -> value.f0)
+ .window(EndOfStreamWindows.get())
+ .apply((leftIterable, rightInterable, collector) -> {
+ if(!rightInterable.iterator().hasNext()){
+ // implement user-defined left outer join logic
+ }
+ });
+ // right outer join
+ dataStream1.coGroup(dataStream2)
+ .where(value -> value.f0)
+ .equalTo(value -> value.f0)
+ .window(EndOfStreamWindows.get())
+ .apply((leftIterable, rightInterable, collector) -> {
+ if(!leftIterable.iterator().hasNext()){
+ // implement user-defined right outer join logic
+ }
+ });
+ {{< /highlight >}}
+ |
+
+
+