Skip to content

Commit

Permalink
Merge bb60ce8 into 21e1772
Browse files Browse the repository at this point in the history
  • Loading branch information
wzorgdrager committed Apr 2, 2019
2 parents 21e1772 + bb60ce8 commit 445bc8e
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 5 deletions.
Expand Up @@ -18,9 +18,11 @@
*/
package org.codefeedr.pipeline

import org.apache.flink.api.common.restartstrategy.RestartStrategies.RestartStrategyConfiguration
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.configuration.{ConfigConstants, Configuration}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.runtime.state.StateBackend
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala._
import org.codefeedr.Properties
import org.codefeedr.buffer.BufferType.BufferType
Expand All @@ -33,11 +35,17 @@ import org.codefeedr.pipeline.RuntimeType.RuntimeType
* @param bufferProperties The properties of the Buffer.
* @param keyManager The key manager which provide API call management at stage-level.
* @param streamTimeCharacteristic The TimeCharacteristic of the whole pipeline. Event, Ingestion or Processing.
* @param restartStrategy The RestartStrategy of the whole pipeline.
* @param checkpointing Captures if checkpointing is enabled and if so, what the interval is.
*/
case class PipelineProperties(bufferType: BufferType,
bufferProperties: Properties,
keyManager: KeyManager,
streamTimeCharacteristic: TimeCharacteristic)
streamTimeCharacteristic: TimeCharacteristic,
restartStrategy: RestartStrategyConfiguration,
checkpointing: Option[Long],
checkpointingMode: CheckpointingMode,
stateBackend: StateBackend)

/** The Pipeline holds all the data and logic to execute a CodeFeedr job.
* It stores all stages (Flink jobs) and connects them by setting up buffers (like Kafka).
Expand All @@ -63,6 +71,13 @@ case class Pipeline(var name: String,
_environment = StreamExecutionEnvironment.getExecutionEnvironment
_environment.setStreamTimeCharacteristic(
pipelineProperties.streamTimeCharacteristic)
_environment.setRestartStrategy(pipelineProperties.restartStrategy)
_environment.setStateBackend(pipelineProperties.stateBackend)

if (pipelineProperties.checkpointing.isDefined) {
_environment.enableCheckpointing(pipelineProperties.checkpointing.get,
pipelineProperties.checkpointingMode)
}
}

_environment
Expand Down
Expand Up @@ -18,7 +18,12 @@
*/
package org.codefeedr.pipeline

import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.api.common.restartstrategy.RestartStrategies.RestartStrategyConfiguration
import org.apache.flink.runtime.executiongraph.restart.RestartStrategy
import org.apache.flink.runtime.state.StateBackend
import org.apache.flink.runtime.state.memory.MemoryStateBackend
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.{
DataStream,
StreamExecutionEnvironment
Expand Down Expand Up @@ -80,6 +85,19 @@ class PipelineBuilder extends Logging {
/** The name of the pipeline, "CodeFeedr pipeline" by default. */
protected var name = "CodeFeedr pipeline"

/** The RestartStrategy. Default: [[RestartStrategies.noRestart()]] */
protected var restartStrategy = RestartStrategies.noRestart()

/** The Checkpointing interval. Default: None (No checkpointing). */
protected var checkpointing: Option[Long] = None

/** The StateBackend. Default: [[org.apache.flink.runtime.state.memory.MemoryStateBackend]] */
protected var stateBackend: StateBackend = new MemoryStateBackend()

/** The checkpointing mode. Default is exactly once.*/
protected var checkpointingMode: CheckpointingMode =
CheckpointingMode.EXACTLY_ONCE

/** Get the type of the buffer.
*
* @return The buffer type.
Expand Down Expand Up @@ -198,6 +216,29 @@ class PipelineBuilder extends Logging {
this
}

/** Set the RestartStrategy of the whole pipeline.
*
* @param strategy The strategy.
* @return The builder instance.
*/
def setRestartStrategy(
strategy: RestartStrategyConfiguration): PipelineBuilder = {
this.restartStrategy = strategy

this
}

/** Sets the StateBackend of the whole pipeline.
*
* @param stateBackend the statebackend.
* @return The builder instance.
*/
def setStateBackend(stateBackend: StateBackend): PipelineBuilder = {
this.stateBackend = stateBackend

this
}

/** Sets the serializer type for the buffer.
*
* @param serializer The serializer type (which is basically a string).
Expand All @@ -209,6 +250,39 @@ class PipelineBuilder extends Logging {
this
}

/** Enable checkpointing for this pipeline.
*
* @param interval The interval to checkpoint on.
* @param checkpointingMode The checkpointingmode (exactly once or at least once).
* @return This builder instance.
*/
def enableCheckpointing(interval: Long,
checkpointingMode: CheckpointingMode) = {
this.checkpointing = Some(interval)
this.checkpointingMode = checkpointingMode

this
}

/** Enable checkpointing for this pipeline.
*
* @param interval The interval to checkpoint on.
* @return This builder instance.
*/
def enableCheckpointing(interval: Long): PipelineBuilder = {
this.enableCheckpointing(interval, CheckpointingMode.EXACTLY_ONCE)
}

/** Sets the CheckpointMode for this pipeline. Note: this method does not enable checkpointing.
*
* @param checkpointingMode The checkpointingmode (exactly once or at least once).
*/
def setCheckpointingMode(checkpointingMode: CheckpointingMode) = {
this.checkpointingMode = checkpointingMode

this
}

/** Append a [[Stage]] in a sequential pipeline.
*
* @param stage The new stage to add.
Expand Down Expand Up @@ -469,7 +543,11 @@ class PipelineBuilder extends Logging {
val props = PipelineProperties(bufferType,
bufferProperties,
keyManager,
streamTimeCharacteristic)
streamTimeCharacteristic,
restartStrategy,
checkpointing,
checkpointingMode,
stateBackend)

Pipeline(name, props, graph, stageProperties.toMap)
}
Expand Down
Expand Up @@ -17,6 +17,8 @@
*/
package org.codefeedr.pipeline

import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.api.common.restartstrategy.RestartStrategies.RestartStrategyConfiguration
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala.{
DataStream,
Expand All @@ -25,7 +27,9 @@ import org.apache.flink.streaming.api.scala.{
import org.codefeedr.keymanager.StaticKeyManager
import org.codefeedr.buffer.{Buffer, BufferType}
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.runtime.state.memory.MemoryStateBackend
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.codefeedr.buffer.serialization.Serializer
import org.codefeedr.stages.utilities.StringType
import org.codefeedr.stages.OutputStage
Expand Down Expand Up @@ -168,6 +172,81 @@ class PipelineBuilderTest extends FunSuite with BeforeAndAfter with Matchers {
pipeline.pipelineProperties.streamTimeCharacteristic == TimeCharacteristic.IngestionTime)
}

test("Default RestartStrategy is no restart.") {
val pipeline = builder.append(new SimpleSourceStage()).build()

assert(
pipeline.pipelineProperties.restartStrategy == RestartStrategies
.noRestart())
}

test("Default RestartStrategy can be overriden.") {
val pipeline = builder
.append(new SimpleSourceStage())
.setRestartStrategy(RestartStrategies.fallBackRestart())
.build()

assert(
pipeline.pipelineProperties.restartStrategy == RestartStrategies
.fallBackRestart())
}

test("Default StateBackend is memory") {
val pipeline = builder.append(new SimpleSourceStage()).build()

assert(
pipeline.pipelineProperties.stateBackend.isInstanceOf[MemoryStateBackend])
}

test("Default StateBackend can be overriden.") {
val pipeline = builder
.append(new SimpleSourceStage())
.setStateBackend(new FsStateBackend("file://test/test"))
.build()

assert(
pipeline.pipelineProperties.stateBackend.isInstanceOf[FsStateBackend])
}

test("Default checkpointing is disabled") {
val pipeline = builder.append(new SimpleSourceStage()).build()

assert(pipeline.pipelineProperties.checkpointing.isEmpty)
assert(
pipeline.pipelineProperties.checkpointingMode == CheckpointingMode.EXACTLY_ONCE)
}

test("Default checkpointing can be enabled.") {
val pipeline =
builder.append(new SimpleSourceStage()).enableCheckpointing(1000).build()

assert(pipeline.pipelineProperties.checkpointing.get == 1000)
}

test("Default checkpointing can be enabled and mode can be set.") {
val pipeline =
builder
.append(new SimpleSourceStage())
.enableCheckpointing(500, CheckpointingMode.AT_LEAST_ONCE)
.build()

assert(pipeline.pipelineProperties.checkpointing.get == 500)
assert(
pipeline.pipelineProperties.checkpointingMode == CheckpointingMode.AT_LEAST_ONCE)
}

test("Checkpointmode can be overriden.") {
val pipeline =
builder
.append(new SimpleSourceStage())
.setCheckpointingMode(CheckpointingMode.AT_LEAST_ONCE)
.build()

assert(
pipeline.pipelineProperties.checkpointingMode == CheckpointingMode.AT_LEAST_ONCE)
assert(pipeline.pipelineProperties.checkpointing.isEmpty)
}

test("A non-sequential pipeline cannot switch to a sequential pipeline") {
val a = new SimpleSourceStage()
val b = new SimpleTransformStage()
Expand Down
8 changes: 8 additions & 0 deletions docs/pages/mydoc/mydoc_pipeline.md
Expand Up @@ -175,6 +175,14 @@ by `builder.disablePipelineVerification()`, however we do not recommend
this. If you disable this, make sure the serialization framework will
support the conversion (if you remove fields, this is often supported).

### Flink environment configuration
Many Flink environment configuration values are overloaded into the pipeline builder:
- [The state backend.](https://ci.apache.org/projects/flink/flink-docs-master/ops/state/state_backends.html): `builder.setStateBackend(new MemoryStateBackend())`
- [The restart strategy.](https://ci.apache.org/projects/flink/flink-docs-master/dev/restart_strategies.html): `builder.setRestartStrategy(RestartStrategies.noRestart())`
- [Checkpointing](https://ci.apache.org/projects/flink/flink-docs-master/dev/stream/state/checkpointing.htmlhttps://ci.apache.org/projects/flink/flink-docs-master/dev/stream/state/checkpointing.html): `builder.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE)`

**Note**: All these configuration values are pipeline-wide, so they will be configured for every stage. Stage-level environment configuration is currently not supported, however a workaround is to directly configure the environment in the `transform` function.

### Stage Properties
In the PipelineBuilder properties can be specified **per** stage. This
properties map is available to the Stage at run-time. To set a stage
Expand Down

0 comments on commit 445bc8e

Please sign in to comment.