Skip to content

Commit

Permalink
[BEAM-6207] Refactored SyntheticDataPubSubPublisher into SyntheticDat…
Browse files Browse the repository at this point in the history
…aPublisher

- Publisher can now publish to both sinks simultaneously
- cleaned up code publishing data to sinks.
  • Loading branch information
Michal Walenia committed Jan 30, 2019
1 parent b2b0b28 commit 07bc1c6
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 38 deletions.
2 changes: 0 additions & 2 deletions sdks/java/io/synthetic/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,11 @@ dependencies {
shadow library.java.jackson_annotations
shadow library.java.jackson_databind
shadow library.java.guava
shadow library.java.kafka_clients

shadowTest library.java.vendored_guava_20_0
testCompile library.java.junit
testCompile library.java.hamcrest_core
testCompile library.java.hamcrest_library
shadow project(path: ":beam-sdks-java-core", configuration: "shadow")
shadow project(path: ":beam-runners-direct-java", configuration: "shadow")
shadow project(path: ":beam-sdks-java-io-kafka", configuration: "shadow")
}
3 changes: 3 additions & 0 deletions sdks/java/testing/load-tests/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,14 @@ configurations {
}

dependencies {
shadow library.java.kafka_clients

shadow project(path: ":beam-sdks-java-core", configuration: "shadow")
shadow project(path: ":beam-runners-direct-java", configuration: "shadow")
shadow project(path: ":beam-sdks-java-io-synthetic", configuration: "shadow")
shadow project(path: ":beam-sdks-java-test-utils", configuration: "shadow")
shadow project(path: ":beam-sdks-java-io-google-cloud-platform", configuration: "shadow")
shadow project(path: ":beam-sdks-java-io-kafka", configuration: "shadow")

gradleRun project(path: project.path, configuration: "shadow")
gradleRun project(path: runnerDependency, configuration: "shadow")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@
import org.apache.beam.sdk.io.synthetic.SyntheticOptions;
import org.apache.beam.sdk.io.synthetic.SyntheticSourceOptions;
import org.apache.beam.sdk.options.ApplicationNameOptions;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.Validation;
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.transforms.SimpleFunction;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.kafka.common.serialization.StringSerializer;

/**
Expand All @@ -53,20 +53,23 @@
*
* <pre>
* ./gradlew :beam-sdks-java-load-tests:run -PloadTest.args='
* --insertionPipelineTopic=TOPIC_NAME
* --pubSubTopic=TOPIC_NAME
* --kafkaBootstrapServerAddress=SERVER_ADDRESS
* --kafkaTopic=KAFKA_TOPIC_NAME
* --sourceOptions={"numRecords":1000,...}'
* -PloadTest.mainClass="org.apache.beam.sdk.loadtests.SyntheticDataPubSubPublisher"
* -PloadTest.mainClass="org.apache.beam.sdk.loadtests.SyntheticDataPublisher"
* </pre>
*
* Parameter kafkaBootstrapServerAddress is optional. If provided, pipeline topic will be treated as
* Kafka topic name and records will be published to Kafka instead of PubSub.
* <p>If parameters related to a specific sink are provided (Kafka or PubSub), the pipeline writes
* to the sink. Writing to both sinks is also acceptable.
*/
public class SyntheticDataPubSubPublisher {
public class SyntheticDataPublisher {

private static final KvCoder<byte[], byte[]> RECORD_CODER =
KvCoder.of(ByteArrayCoder.of(), ByteArrayCoder.of());

private static Options options;

/** Options for the pipeline. */
public interface Options extends PipelineOptions, ApplicationNameOptions {

Expand All @@ -77,58 +80,63 @@ public interface Options extends PipelineOptions, ApplicationNameOptions {
void setSourceOptions(String sourceOptions);

@Description("PubSub topic to publish to")
@Validation.Required
String getInsertionPipelineTopic();
String getPubSubTopic();

void setInsertionPipelineTopic(String topic);
void setPubSubTopic(String topic);

@Description("Kafka server address")
@Default.String("")
String getKafkaBootstrapServerAddress();

void setKafkaBootstrapServerAddress(String address);

@Description("Kafka topic")
String getKafkaTopic();

void setKafkaTopic(String topic);
}

public static void main(String[] args) throws IOException {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

SyntheticSourceOptions sourceOptions =
SyntheticOptions.fromJsonString(options.getSourceOptions(), SyntheticSourceOptions.class);

Pipeline pipeline = Pipeline.create(options);
PCollection<KV<byte[], byte[]>> syntheticData =
pipeline.apply("Read synthetic data", Read.from(new SyntheticBoundedSource(sourceOptions)));

if (!options.getKafkaBootstrapServerAddress().isEmpty()) {
pipeline
.apply("Read synthetic data", Read.from(new SyntheticBoundedSource(sourceOptions)))
.apply("Map to Kafka messages", MapElements.via(new MapKVToString()))
.apply(
"Write to Kafka",
KafkaIO.<Void, String>write()
.withBootstrapServers(options.getKafkaBootstrapServerAddress())
.withTopic(options.getInsertionPipelineTopic())
.withValueSerializer(StringSerializer.class)
.values());
} else {
pipeline
.apply("Read synthetic data", Read.from(new SyntheticBoundedSource(sourceOptions)))
.apply("Map to PubSub messages", MapElements.via(new MapBytesToPubSubMessage()))
.apply(
"Write to PubSub", PubsubIO.writeMessages().to(options.getInsertionPipelineTopic()));
if (options.getKafkaBootstrapServerAddress() != null && options.getKafkaTopic() != null) {
writeToKafka(syntheticData);
}
if (options.getPubSubTopic() != null) {
writeToPubSub(syntheticData);
}
pipeline.run().waitUntilFinish();
}

private static void writeToPubSub(PCollection<KV<byte[], byte[]>> collection) {
collection
.apply("Map to PubSub messages", MapElements.via(new MapBytesToPubSubMessage()))
.apply("Write to PubSub", PubsubIO.writeMessages().to(options.getPubSubTopic()));
}

private static void writeToKafka(PCollection<KV<byte[], byte[]>> collection) {
collection
.apply("Map to Kafka messages", MapElements.via(new MapKVToString()))
.apply(
"Write to Kafka",
KafkaIO.<Void, String>write()
.withBootstrapServers(options.getKafkaBootstrapServerAddress())
.withTopic(options.getKafkaTopic())
.withValueSerializer(StringSerializer.class)
.values());
}

private static class MapKVToString extends SimpleFunction<KV<byte[], byte[]>, String> {
@Override
public String apply(KV<byte[], byte[]> input) {
StringBuilder stringBuilder =
new StringBuilder()
.append("{")
.append(Arrays.toString(input.getKey()))
.append(",")
.append(Arrays.toString(input.getValue()))
.append("}");
return stringBuilder.toString();
return String.format(
"{%s,%s}", Arrays.toString(input.getKey()), Arrays.toString(input.getValue()));
}
}

Expand Down

0 comments on commit 07bc1c6

Please sign in to comment.