From dddbe671a0d663045110b89ad9bb85ce9a7e7051 Mon Sep 17 00:00:00 2001 From: Richard Deurwaarder Date: Sat, 26 May 2018 14:59:32 +0200 Subject: [PATCH 01/26] [FLINK-9311] [pubsub] Add PubSubSource without checkpointing --- .../flink-connector-pubsub/pom.xml | 75 ++++++++++++ .../connectors/pubsub/PubSubSource.java | 109 ++++++++++++++++++ .../connectors/pubsub/SubscriberFactory.java | 66 +++++++++++ .../SerializableCredentialsProvider.java | 55 +++++++++ .../connectors/pubsub/PubSubSourceTest.java | 87 ++++++++++++++ .../pubsub/SubscriberFactoryTest.java | 58 ++++++++++ flink-connectors/pom.xml | 1 + 7 files changed, 451 insertions(+) create mode 100644 flink-connectors/flink-connector-pubsub/pom.xml create mode 100644 flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/PubSubSource.java create mode 100644 flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/SubscriberFactory.java create mode 100644 flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/common/SerializableCredentialsProvider.java create mode 100644 flink-connectors/flink-connector-pubsub/src/test/java/org/apache/flink/streaming/connectors/pubsub/PubSubSourceTest.java create mode 100644 flink-connectors/flink-connector-pubsub/src/test/java/org/apache/flink/streaming/connectors/pubsub/SubscriberFactoryTest.java diff --git a/flink-connectors/flink-connector-pubsub/pom.xml b/flink-connectors/flink-connector-pubsub/pom.xml new file mode 100644 index 0000000000000..0a8013e9934d1 --- /dev/null +++ b/flink-connectors/flink-connector-pubsub/pom.xml @@ -0,0 +1,75 @@ + + + + + 4.0.0 + + + org.apache.flink + flink-connectors + 1.6-SNAPSHOT + .. + + + flink-connector-pubsub_${scala.binary.version} + flink-connector-pubsub + + jar + + + 0.42.1-beta + + + + + + org.apache.flink + flink-streaming-java_${scala.binary.version} + ${project.version} + provided + + + + com.google.cloud + google-cloud-pubsub + ${pubsub.version} + + + + org.apache.flink + flink-streaming-java_${scala.binary.version} + ${project.version} + test + test-jar + + + + org.apache.flink + flink-runtime_${scala.binary.version} + ${project.version} + test-jar + test + + + + + diff --git a/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/PubSubSource.java b/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/PubSubSource.java new file mode 100644 index 0000000000000..4651f98fad7e3 --- /dev/null +++ b/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/PubSubSource.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.pubsub; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; +import org.apache.flink.streaming.connectors.pubsub.common.SerializableCredentialsProvider; + +import com.google.cloud.pubsub.v1.AckReplyConsumer; +import com.google.cloud.pubsub.v1.MessageReceiver; +import com.google.pubsub.v1.ProjectSubscriptionName; +import com.google.pubsub.v1.PubsubMessage; + +import java.io.IOException; + +import static org.apache.flink.streaming.connectors.pubsub.common.SerializableCredentialsProvider.credentialsProviderFromEnvironmentVariables; + + +/** + * PubSub Source, this Source will consume PubSub messages from a subscription and Acknowledge them as soon as they have been received. + */ +public class PubSubSource extends RichParallelSourceFunction implements MessageReceiver { + private final DeserializationSchema deserializationSchema; + private final SubscriberFactory subscriberFactory; + + private transient SourceContext sourceContext = null; + + PubSubSource(SubscriberFactory subscriberFactory, DeserializationSchema deserializationSchema) { + this.deserializationSchema = deserializationSchema; + this.subscriberFactory = subscriberFactory; + } + + /** + * Convenience factory method to return a PubSubSource with default application credentials based on environment variables. ({@link org.apache.flink.streaming.connectors.pubsub.common.SerializableCredentialsProvider}) + * @param projectSubscriptionName The google project and subscription to read from + * @param deserializationSchema Schema to deserialize the {@link PubsubMessage} + * @param The type of messages that will be read + * @return Returns a RichParallelSourceFunction which reads from a PubSub subscription + * @throws Exception exception is thrown when no default application credentials can be found + */ + public static PubSubSource withDefaultApplicationCredentials(ProjectSubscriptionName projectSubscriptionName, DeserializationSchema deserializationSchema) throws Exception { + return withCustomApplicationCredentials(projectSubscriptionName, deserializationSchema, credentialsProviderFromEnvironmentVariables()); + } + + /** + * Factory method to return a PubSubSource. + * @param projectSubscriptionName The google project and subscription to read from + * @param deserializationSchema Schema to deserialize the {@link PubsubMessage} + * @param serializableCredentialsProvider CredentialsProvider used to give the correct permissions to read from PubSub + * @param The type of messages that will be read + * @return Returns a RichParallelSourceFunction which reads from a PubSub subscription + */ + public static PubSubSource withCustomApplicationCredentials(ProjectSubscriptionName projectSubscriptionName, DeserializationSchema deserializationSchema, SerializableCredentialsProvider serializableCredentialsProvider) { + return new PubSubSource<>(new SubscriberFactory(serializableCredentialsProvider, projectSubscriptionName), deserializationSchema); + } + + @Override + public void open(Configuration configuration) throws Exception { + super.open(configuration); + subscriberFactory.initialize(this); + } + + @Override + public void run(SourceContext sourceContext) { + this.sourceContext = sourceContext; + subscriberFactory.startBlocking(); + } + + @Override + public void receiveMessage(PubsubMessage message, AckReplyConsumer consumer) { + if (sourceContext == null) { + consumer.nack(); + return; + } + + sourceContext.collect(uncheckedExceptionDeserialize(message.getData().toByteArray())); + consumer.ack(); + } + + @Override + public void cancel() { + subscriberFactory.stop(); + } + + private OUT uncheckedExceptionDeserialize(byte[] bytes) { + try { + return deserializationSchema.deserialize(bytes); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/SubscriberFactory.java b/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/SubscriberFactory.java new file mode 100644 index 0000000000000..e115d5b356250 --- /dev/null +++ b/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/SubscriberFactory.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.pubsub; + +import org.apache.flink.streaming.connectors.pubsub.common.SerializableCredentialsProvider; + +import com.google.api.core.ApiService; +import com.google.cloud.pubsub.v1.MessageReceiver; +import com.google.cloud.pubsub.v1.Subscriber; +import com.google.pubsub.v1.ProjectSubscriptionName; + +import java.io.Serializable; + +class SubscriberFactory implements Serializable { + private final SerializableCredentialsProvider serializableCredentialsProvider; + private final String projectId; + private final String subscriptionId; + + private transient Subscriber subscriber; + + SubscriberFactory(SerializableCredentialsProvider serializableCredentialsProvider, ProjectSubscriptionName projectSubscriptionName) { + this.serializableCredentialsProvider = serializableCredentialsProvider; + this.projectId = projectSubscriptionName.getProject(); + this.subscriptionId = projectSubscriptionName.getSubscription(); + } + + void initialize(MessageReceiver messageReceiver) { + this.subscriber = Subscriber.newBuilder(ProjectSubscriptionName.of(projectId, subscriptionId), messageReceiver) + .setCredentialsProvider(serializableCredentialsProvider) + .build(); + } + + void startBlocking() { + ApiService apiService = subscriber.startAsync(); + apiService.awaitRunning(); + + if (apiService.state() != ApiService.State.RUNNING) { + throw new IllegalStateException("Could not start PubSubSubscriber, ApiService.State: " + apiService.state()); + } + apiService.awaitTerminated(); + } + + void stop() { + subscriber.stopAsync().awaitTerminated(); + } + + Subscriber getSubscriber() { + return subscriber; + } + +} diff --git a/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/common/SerializableCredentialsProvider.java b/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/common/SerializableCredentialsProvider.java new file mode 100644 index 0000000000000..d7657d4e7884e --- /dev/null +++ b/flink-connectors/flink-connector-pubsub/src/main/java/org/apache/flink/streaming/connectors/pubsub/common/SerializableCredentialsProvider.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.pubsub.common; + +import com.google.api.gax.core.CredentialsProvider; +import com.google.auth.Credentials; + +import java.io.IOException; +import java.io.Serializable; + +import static com.google.cloud.pubsub.v1.SubscriptionAdminSettings.defaultCredentialsProviderBuilder; + +/** + * Wrapper class for CredentialsProvider to make it Serializable. This can be used to pass on Credentials to SourceFunctions + */ +public class SerializableCredentialsProvider implements CredentialsProvider, Serializable { + private final Credentials credentials; + + /** + * @param credentials + */ + public SerializableCredentialsProvider(Credentials credentials) { + this.credentials = credentials; + } + + /** + * Creates a SerializableCredentialsProvider for a PubSubSubscription based on environment variables. {@link com.google.cloud.pubsub.v1.SubscriptionAdminSettings} + * @return serializableCredentialsProvider + * @throws IOException thrown by {@link Credentials} + */ + public static SerializableCredentialsProvider credentialsProviderFromEnvironmentVariables() throws IOException { + Credentials credentials = defaultCredentialsProviderBuilder().build().getCredentials(); + return new SerializableCredentialsProvider(credentials); + } + + @Override + public Credentials getCredentials() { + return credentials; + } +} diff --git a/flink-connectors/flink-connector-pubsub/src/test/java/org/apache/flink/streaming/connectors/pubsub/PubSubSourceTest.java b/flink-connectors/flink-connector-pubsub/src/test/java/org/apache/flink/streaming/connectors/pubsub/PubSubSourceTest.java new file mode 100644 index 0000000000000..393d142508c9e --- /dev/null +++ b/flink-connectors/flink-connector-pubsub/src/test/java/org/apache/flink/streaming/connectors/pubsub/PubSubSourceTest.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.pubsub; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.streaming.api.functions.source.SourceFunction; + +import com.google.cloud.pubsub.v1.AckReplyConsumer; +import com.google.protobuf.ByteString; +import com.google.pubsub.v1.PubsubMessage; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.runners.MockitoJUnitRunner; + +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + + +/** + * Test for {@link PubSubSource}. + */ +@RunWith(MockitoJUnitRunner.class) +public class PubSubSourceTest { + private static final String MESSAGE = "Message"; + private static final byte[] SERIALIZED_MESSAGE = MESSAGE.getBytes(); + @Mock + private SubscriberFactory subscriberFactory; + @Mock + private SourceFunction.SourceContext sourceContext; + @Mock + private DeserializationSchema deserializationSchema; + @Mock + private AckReplyConsumer ackReplyConsumer; + + @Test + public void testOpen() throws Exception { + PubSubSource pubSubSource = new PubSubSource<>(subscriberFactory, deserializationSchema); + pubSubSource.open(null); + + verify(subscriberFactory, times(1)).initialize(pubSubSource); + } + + @Test + public void testRun() { + PubSubSource pubSubSource = new PubSubSource<>(subscriberFactory, deserializationSchema); + pubSubSource.run(sourceContext); + + verify(subscriberFactory, times(1)).startBlocking(); + } + + @Test + public void testWithoutCheckpoints() throws Exception { + when(deserializationSchema.deserialize(SERIALIZED_MESSAGE)).thenReturn(MESSAGE); + + PubSubSource pubSubSource = new PubSubSource<>(subscriberFactory, deserializationSchema); + pubSubSource.open(null); + pubSubSource.run(sourceContext); + + pubSubSource.receiveMessage(pubSubMessage(), ackReplyConsumer); + + verify(sourceContext, times(1)).collect(MESSAGE); + verify(ackReplyConsumer, times(1)).ack(); + } + + private PubsubMessage pubSubMessage() { + return PubsubMessage.newBuilder() + .setData(ByteString.copyFrom(SERIALIZED_MESSAGE)) + .build(); + } +} diff --git a/flink-connectors/flink-connector-pubsub/src/test/java/org/apache/flink/streaming/connectors/pubsub/SubscriberFactoryTest.java b/flink-connectors/flink-connector-pubsub/src/test/java/org/apache/flink/streaming/connectors/pubsub/SubscriberFactoryTest.java new file mode 100644 index 0000000000000..59725f1ea99e1 --- /dev/null +++ b/flink-connectors/flink-connector-pubsub/src/test/java/org/apache/flink/streaming/connectors/pubsub/SubscriberFactoryTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.pubsub; + +import org.apache.flink.streaming.connectors.pubsub.common.SerializableCredentialsProvider; + +import com.google.cloud.pubsub.v1.MessageReceiver; +import com.google.pubsub.v1.ProjectSubscriptionName; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.runners.MockitoJUnitRunner; + +import static org.apache.flink.api.java.ClosureCleaner.ensureSerializable; +import static org.apache.flink.streaming.connectors.pubsub.common.SerializableCredentialsProvider.credentialsProviderFromEnvironmentVariables; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; + +/** + * Tests for {@link SubscriberFactory}. + */ +@RunWith(MockitoJUnitRunner.class) +public class SubscriberFactoryTest { + @Mock + private SerializableCredentialsProvider credentialsProvider; + + @Mock + private MessageReceiver messageReceiver; + + @Test + public void testSerializedSubscriberBuilder() throws Exception { + SubscriberFactory factory = new SubscriberFactory(credentialsProviderFromEnvironmentVariables(), ProjectSubscriptionName.of("projectId", "subscriptionId")); + ensureSerializable(factory); + } + + @Test + public void testInitialisation() { + SubscriberFactory factory = new SubscriberFactory(credentialsProvider, ProjectSubscriptionName.of("projectId", "subscriptionId")); + factory.initialize(messageReceiver); + + assertThat(factory.getSubscriber().getSubscriptionNameString(), is(ProjectSubscriptionName.format("projectId", "subscriptionId"))); + } +} diff --git a/flink-connectors/pom.xml b/flink-connectors/pom.xml index 782d2beb66c46..0d93cc05c5dc1 100644 --- a/flink-connectors/pom.xml +++ b/flink-connectors/pom.xml @@ -55,6 +55,7 @@ under the License. flink-connector-nifi flink-connector-cassandra flink-connector-filesystem + flink-connector-pubsub