-
Notifications
You must be signed in to change notification settings - Fork 4.6k
[BEAM-115] Unify Java and Python WindowingStrategy representations. #3222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,19 +17,24 @@ | |
| */ | ||
| package org.apache.beam.runners.core.construction; | ||
|
|
||
| import static com.google.common.base.Preconditions.checkArgument; | ||
|
|
||
| import com.google.protobuf.Any; | ||
| import com.google.protobuf.ByteString; | ||
| import com.google.protobuf.BytesValue; | ||
| import com.google.protobuf.InvalidProtocolBufferException; | ||
| import com.google.protobuf.util.Durations; | ||
| import com.google.protobuf.util.Timestamps; | ||
| import java.io.IOException; | ||
| import java.io.Serializable; | ||
| import org.apache.beam.sdk.common.runner.v1.RunnerApi; | ||
| import org.apache.beam.sdk.common.runner.v1.RunnerApi.Components; | ||
| import org.apache.beam.sdk.common.runner.v1.RunnerApi.FunctionSpec; | ||
| import org.apache.beam.sdk.common.runner.v1.RunnerApi.OutputTime; | ||
| import org.apache.beam.sdk.common.runner.v1.RunnerApi.SdkFunctionSpec; | ||
| import org.apache.beam.sdk.common.runner.v1.StandardWindowFns; | ||
| import org.apache.beam.sdk.transforms.windowing.FixedWindows; | ||
| import org.apache.beam.sdk.transforms.windowing.GlobalWindows; | ||
| import org.apache.beam.sdk.transforms.windowing.Sessions; | ||
| import org.apache.beam.sdk.transforms.windowing.SlidingWindows; | ||
| import org.apache.beam.sdk.transforms.windowing.TimestampCombiner; | ||
| import org.apache.beam.sdk.transforms.windowing.Trigger; | ||
| import org.apache.beam.sdk.transforms.windowing.Window.ClosingBehavior; | ||
|
|
@@ -153,9 +158,16 @@ public static TimestampCombiner timestampCombinerFromProto(RunnerApi.OutputTime | |
| } | ||
| } | ||
|
|
||
| // This URN says that the WindowFn is just a UDF blob the indicated SDK understands | ||
| public static final String GLOBAL_WINDOWS_FN = "beam:windowfn:global_windows:v0.1"; | ||
| public static final String FIXED_WINDOWS_FN = "beam:windowfn:fixed_windows:v0.1"; | ||
| public static final String SLIDING_WINDOWS_FN = "beam:windowfn:sliding_windows:v0.1"; | ||
| public static final String SESSION_WINDOWS_FN = "beam:windowfn:session_windows:v0.1"; | ||
| // This URN says that the WindowFn is just a UDF blob the Java SDK understands | ||
| // TODO: standardize such things | ||
| public static final String CUSTOM_WINDOWFN_URN = "urn:beam:windowfn:javasdk:0.1"; | ||
| public static final String SERIALIZED_JAVA_WINDOWFN_URN = "beam:windowfn:javasdk:v0.1"; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious - does the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes it clearer that this number is a version field.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd say drop it
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We use 'v' elsewhere (e.g. in the package name itself). Otherwise it looks like a ratio or something.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is in the package name to make it a valid package name. Not a huge deal. If you feel strongly about it I'm fine with it. |
||
| public static final String OLD_SERIALIZED_JAVA_WINDOWFN_URN = "urn:beam:windowfn:javasdk:0.1"; | ||
| // Remove this once the dataflow worker understands all the above formats. | ||
| private static final boolean USE_OLD_SERIALIZED_JAVA_WINDOWFN_URN = true; | ||
|
|
||
| /** | ||
| * Converts a {@link WindowFn} into a {@link RunnerApi.MessageWithComponents} where {@link | ||
|
|
@@ -164,19 +176,80 @@ public static TimestampCombiner timestampCombinerFromProto(RunnerApi.OutputTime | |
| */ | ||
| public static SdkFunctionSpec toProto( | ||
| WindowFn<?, ?> windowFn, @SuppressWarnings("unused") SdkComponents components) { | ||
| return SdkFunctionSpec.newBuilder() | ||
| // TODO: Set environment ID | ||
| .setSpec( | ||
| FunctionSpec.newBuilder() | ||
| .setUrn(CUSTOM_WINDOWFN_URN) | ||
| .setParameter( | ||
| Any.pack( | ||
| BytesValue.newBuilder() | ||
| .setValue( | ||
| ByteString.copyFrom( | ||
| SerializableUtils.serializeToByteArray(windowFn))) | ||
| .build()))) | ||
| .build(); | ||
| // TODO: Set environment IDs | ||
| if (USE_OLD_SERIALIZED_JAVA_WINDOWFN_URN) { | ||
| return SdkFunctionSpec.newBuilder() | ||
| .setSpec( | ||
| FunctionSpec.newBuilder() | ||
| .setUrn(OLD_SERIALIZED_JAVA_WINDOWFN_URN) | ||
| .setParameter( | ||
| Any.pack( | ||
| BytesValue.newBuilder() | ||
| .setValue( | ||
| ByteString.copyFrom( | ||
| SerializableUtils.serializeToByteArray(windowFn))) | ||
| .build()))) | ||
| .build(); | ||
| } else if (windowFn instanceof GlobalWindows) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should have some sort of registration scheme for WindowFns in the medium-long term, even if only to simplify this method. File a JIRA?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| return SdkFunctionSpec.newBuilder() | ||
| .setSpec(FunctionSpec.newBuilder().setUrn(GLOBAL_WINDOWS_FN)) | ||
| .build(); | ||
| } else if (windowFn instanceof FixedWindows) { | ||
| return SdkFunctionSpec.newBuilder() | ||
| .setSpec( | ||
| FunctionSpec.newBuilder() | ||
| .setUrn(FIXED_WINDOWS_FN) | ||
| .setParameter( | ||
| Any.pack( | ||
| StandardWindowFns.FixedWindowsPayload.newBuilder() | ||
| .setSize(Durations.fromMillis( | ||
| ((FixedWindows) windowFn).getSize().getMillis())) | ||
| .setOffset(Timestamps.fromMillis( | ||
| ((FixedWindows) windowFn).getOffset().getMillis())) | ||
| .build()))) | ||
| .build(); | ||
| } else if (windowFn instanceof SlidingWindows) { | ||
| return SdkFunctionSpec.newBuilder() | ||
| .setSpec( | ||
| FunctionSpec.newBuilder() | ||
| .setUrn(SLIDING_WINDOWS_FN) | ||
| .setParameter( | ||
| Any.pack( | ||
| StandardWindowFns.SlidingWindowsPayload.newBuilder() | ||
| .setSize(Durations.fromMillis( | ||
| ((SlidingWindows) windowFn).getSize().getMillis())) | ||
| .setOffset(Timestamps.fromMillis( | ||
| ((SlidingWindows) windowFn).getOffset().getMillis())) | ||
| .setPeriod(Durations.fromMillis( | ||
| ((SlidingWindows) windowFn).getPeriod().getMillis())) | ||
| .build()))) | ||
| .build(); | ||
| } else if (windowFn instanceof Sessions) { | ||
| return SdkFunctionSpec.newBuilder() | ||
| .setSpec( | ||
| FunctionSpec.newBuilder() | ||
| .setUrn(SESSION_WINDOWS_FN) | ||
| .setParameter( | ||
| Any.pack( | ||
| StandardWindowFns.SessionsPayload.newBuilder() | ||
| .setGapSize(Durations.fromMillis( | ||
| ((Sessions) windowFn).getGapDuration().getMillis())) | ||
| .build()))) | ||
| .build(); | ||
| } else { | ||
| return SdkFunctionSpec.newBuilder() | ||
| .setSpec( | ||
| FunctionSpec.newBuilder() | ||
| .setUrn(SERIALIZED_JAVA_WINDOWFN_URN) | ||
| .setParameter( | ||
| Any.pack( | ||
| BytesValue.newBuilder() | ||
| .setValue( | ||
| ByteString.copyFrom( | ||
| SerializableUtils.serializeToByteArray(windowFn))) | ||
| .build()))) | ||
| .build(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -261,18 +334,38 @@ public static RunnerApi.WindowingStrategy toProto( | |
|
|
||
| public static WindowFn<?, ?> windowFnFromProto(SdkFunctionSpec windowFnSpec) | ||
| throws InvalidProtocolBufferException { | ||
| checkArgument( | ||
| windowFnSpec.getSpec().getUrn().equals(CUSTOM_WINDOWFN_URN), | ||
| "Only Java-serialized %s instances are supported, with URN %s. But found URN %s", | ||
| WindowFn.class.getSimpleName(), | ||
| CUSTOM_WINDOWFN_URN, | ||
| windowFnSpec.getSpec().getUrn()); | ||
|
|
||
| Object deserializedWindowFn = | ||
| SerializableUtils.deserializeFromByteArray( | ||
| switch (windowFnSpec.getSpec().getUrn()) { | ||
| case GLOBAL_WINDOWS_FN: | ||
| return new GlobalWindows(); | ||
| case FIXED_WINDOWS_FN: | ||
| StandardWindowFns.FixedWindowsPayload fixedParams = | ||
| windowFnSpec.getSpec().getParameter().unpack( | ||
| StandardWindowFns.FixedWindowsPayload.class); | ||
| return FixedWindows.of( | ||
| Duration.millis(Durations.toMillis(fixedParams.getSize()))) | ||
| .withOffset(Duration.millis(Timestamps.toMillis(fixedParams.getOffset()))); | ||
| case SLIDING_WINDOWS_FN: | ||
| StandardWindowFns.SlidingWindowsPayload slidingParams = | ||
| windowFnSpec.getSpec().getParameter().unpack( | ||
| StandardWindowFns.SlidingWindowsPayload.class); | ||
| return SlidingWindows.of( | ||
| Duration.millis(Durations.toMillis(slidingParams.getSize()))) | ||
| .every(Duration.millis(Durations.toMillis(slidingParams.getPeriod()))) | ||
| .withOffset(Duration.millis(Timestamps.toMillis(slidingParams.getOffset()))); | ||
| case SESSION_WINDOWS_FN: | ||
| StandardWindowFns.SessionsPayload sessionParams = | ||
| windowFnSpec.getSpec().getParameter().unpack( | ||
| StandardWindowFns.SessionsPayload.class); | ||
| return Sessions.withGapDuration( | ||
| Duration.millis(Durations.toMillis(sessionParams.getGapSize()))); | ||
| case SERIALIZED_JAVA_WINDOWFN_URN: | ||
| case OLD_SERIALIZED_JAVA_WINDOWFN_URN: | ||
| return (WindowFn<?, ?>) SerializableUtils.deserializeFromByteArray( | ||
| windowFnSpec.getSpec().getParameter().unpack(BytesValue.class).getValue().toByteArray(), | ||
| "WindowFn"); | ||
|
|
||
| return (WindowFn<?, ?>) deserializedWindowFn; | ||
| default: | ||
| throw new IllegalArgumentException( | ||
| "Unknown or unsupported WindowFn: " + windowFnSpec.getSpec().getUrn()); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: use |
||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| /* | ||
| * Protocol Buffers describing the Runner API, which is the runner-independent, | ||
| * SDK-independent definition of the Beam model. | ||
| */ | ||
|
|
||
| syntax = "proto3"; | ||
|
|
||
| package org.apache.beam.runner_api.v1; | ||
|
|
||
| option java_package = "org.apache.beam.sdk.common.runner.v1"; | ||
| option java_outer_classname = "StandardWindowFns"; | ||
|
|
||
| import "google/protobuf/duration.proto"; | ||
| import "google/protobuf/timestamp.proto"; | ||
|
|
||
| // beam:windowfn:global_windows:v0.1 | ||
| // empty payload | ||
|
|
||
| // beam:windowfn:fixed_windows:v0.1 | ||
| message FixedWindowsPayload { | ||
| google.protobuf.Duration size = 1; | ||
| google.protobuf.Timestamp offset = 2; | ||
| } | ||
|
|
||
| // beam:windowfn:sliding_windows:v0.1 | ||
| message SlidingWindowsPayload { | ||
| google.protobuf.Duration size = 1; | ||
| google.protobuf.Timestamp offset = 2; | ||
| google.protobuf.Duration period = 3; | ||
| } | ||
|
|
||
| // beam:windowfn:session_windows:v0.1 | ||
| message SessionsPayload { | ||
| google.protobuf.Duration gap_size = 1; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW the
urn:at the beginning is the URI scheme that makes it a URN. The official syntax is"urn:"Namespace:Namespace-specific format(https://tools.ietf.org/html/rfc8141#section-2)
I agree the scheme is mostly pointless but wanted to just note the departure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(in particular, top level schemes and the remainder of the URI have exactly the same relationship as URN namespaces and the NSS, as far as I can tell)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't know urns were an official thing with a spec... We can prefix everything with urn: if you want.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I just wanted to note it. I find the prefixing a bit tedious. I've been doing it because following internet standards is my first instinct, but I don't think it is worthwhile. We should lay out the namespaces in a section of the runner guide on the website.