Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,8 @@ public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());

// --- group by key only.
Long bundleSize =
context.getSerializableOptions().get().as(SparkPipelineOptions.class).getBundleSize();
Partitioner partitioner =
(bundleSize > 0)
? new HashPartitioner(context.getSparkContext().defaultParallelism())
: null;
JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupedByKey =
GroupCombineFunctions.groupByKeyOnly(inRDD, keyCoder, wvCoder, partitioner);
GroupCombineFunctions.groupByKeyOnly(inRDD, keyCoder, wvCoder, getPartitioner(context));

// --- now group also by window.
// for batch, GroupAlsoByWindow uses an in-memory StateInternals.
Expand Down Expand Up @@ -377,6 +371,7 @@ public void evaluate(
(KvCoder) context.getInput(transform).getCoder(),
windowingStrategy.getWindowFn().windowCoder(),
(JavaRDD) inRDD,
getPartitioner(context),
(MultiDoFnFunction) multiDoFnFunction);
} else {
all = inRDD.mapPartitionsToPair(multiDoFnFunction);
Expand Down Expand Up @@ -420,14 +415,15 @@ private static <K, V, OutputT> JavaPairRDD<TupleTag<?>, WindowedValue<?>> statef
KvCoder<K, V> kvCoder,
Coder<? extends BoundedWindow> windowCoder,
JavaRDD<WindowedValue<KV<K, V>>> kvInRDD,
Partitioner partitioner,
MultiDoFnFunction<KV<K, V>, OutputT> doFnFunction) {
Coder<K> keyCoder = kvCoder.getKeyCoder();

final WindowedValue.WindowedValueCoder<V> wvCoder =
WindowedValue.FullWindowedValueCoder.of(kvCoder.getValueCoder(), windowCoder);

JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupRDD =
GroupCombineFunctions.groupByKeyOnly(kvInRDD, keyCoder, wvCoder, null);
GroupCombineFunctions.groupByKeyOnly(kvInRDD, keyCoder, wvCoder, partitioner);

return groupRDD
.map(
Expand Down Expand Up @@ -550,6 +546,14 @@ public String toNativeString() {
};
}

private static Partitioner getPartitioner(EvaluationContext context) {
Long bundleSize =
context.getSerializableOptions().get().as(SparkPipelineOptions.class).getBundleSize();
return (bundleSize > 0)
? null
: new HashPartitioner(context.getSparkContext().defaultParallelism());
}

private static final Map<String, TransformEvaluator<?>> EVALUATORS = new HashMap<>();

static {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
import org.apache.beam.sdk.values.WindowingStrategy;
import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.ImmutableMap;
import org.apache.spark.Accumulator;
import org.apache.spark.HashPartitioner;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaSparkContext$;
Expand Down Expand Up @@ -305,7 +306,11 @@ public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupedByKeyStream =
dStream.transform(
rdd ->
GroupCombineFunctions.groupByKeyOnly(rdd, coder.getKeyCoder(), wvCoder, null));
GroupCombineFunctions.groupByKeyOnly(
rdd,
coder.getKeyCoder(),
wvCoder,
new HashPartitioner(rdd.rdd().sparkContext().defaultParallelism())));

// --- now group also by window.
JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream =
Expand Down