Skip to content

Commit

Permalink
[scala] [streaming] WindowJoin scala example added
Browse files Browse the repository at this point in the history
  • Loading branch information
gyfora committed Dec 21, 2014
1 parent c513b01 commit ca2af20
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.examples.scala.streaming.windowing

import org.apache.flink.api.scala._
import org.apache.flink.api.scala.streaming.StreamExecutionEnvironment
import org.apache.flink.streaming.api.function.source.SourceFunction
import org.apache.flink.util.Collector
import scala.util.Random

object WindowJoin {

case class Name(id: Long, name: String)
case class Age(id: Long, age: Int)
case class Person(name: String, age: Long)

def main(args: Array[String]) {

val env = StreamExecutionEnvironment.getExecutionEnvironment

//Create streams for names and ages by mapping the inputs to the corresponding objects
val names = env.addSource(nameStream _).map(x => Name(x._1, x._2))
val ages = env.addSource(ageStream _).map(x => Age(x._1, x._2))

//Join the two input streams by id on the last second and create new Person objects
//containing both name and age
val joined =
names.join(ages).onWindow(1000)
.where("id").equalTo("id") { (n, a) => Person(n.name, a.age) }

joined print

env.execute("WindowJoin")
}

//Stream source for generating (id, name) pairs
def nameStream(out: Collector[(Long, String)]) = {
val names = Array("tom", "jerry", "alice", "bob", "john", "grace")

for (i <- 1 to 10000) {
if (i % 100 == 0) Thread.sleep(1000) else {
out.collect((i, names(Random.nextInt(names.length))))
}
}
}

//Stream source for generating (id, age) pairs
def ageStream(out: Collector[(Long, Int)]) = {
for (i <- 1 to 10000) {
if (i % 100 == 0) Thread.sleep(1000) else {
out.collect((i, Random.nextInt(90)))
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,25 @@ class StreamExecutionEnvironment(javaEnv: JavaEnv) {
*/
def addSource[T: ClassTag: TypeInformation](function: SourceFunction[T]): DataStream[T] = {
Validate.notNull(function, "Function must not be null.")
ClosureCleaner.clean(function, true)
val typeInfo = implicitly[TypeInformation[T]]
new DataStream[T](javaEnv.addSource(function, typeInfo))
}

/**
* Create a DataStream using a user defined source function for arbitrary
* source functionality.
*
*/
def addSource[T: ClassTag: TypeInformation](function: Collector[T] => Unit): DataStream[T] = {
Validate.notNull(function, "Function must not be null.")
val sourceFunction = new SourceFunction[T] {
override def invoke(out: Collector[T]) {
function(out)
}
}
addSource(sourceFunction)
}

/**
* Triggers the program execution. The environment will execute all parts of
Expand Down

0 comments on commit ca2af20

Please sign in to comment.