apache · ChengXiangLi · Aug 29, 2014 · Aug 29, 2014 · Sep 1, 2014 · Sep 2, 2014
diff --git a/core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction2.java b/core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction2.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java.function;
+
+import java.io.Serializable;
+
+/**
+ * A function that takes arguments of type T1 and T2, and returns zero or more records of type
+ * Double from each input record.
+ */
+public interface DoubleFlatMapFunction2<T1, T2> extends Serializable {
+  public Iterable<Double> call(T1 t1, T2 t2) throws Exception;
+}
diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction2.java b/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction2.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java.function;
+
+import scala.Tuple2;
+
+import java.io.Serializable;
+
+/**
+ * A function that takes arguments of type T1 and T2, and returns zero or more key-value pair
+ * records from each input record. The key-value pairs are represented as scala.Tuple2 objects.
+ */
+public interface PairFlatMapFunction2<T1, T2 , K, V> extends Serializable {
+  public Iterable<Tuple2<K, V>> call(T1 t1, T2 t2) throws Exception;
+}
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -27,7 +27,7 @@ import com.google.common.base.Optional
 import org.apache.hadoop.io.compress.CompressionCodec
 
 import org.apache.spark.{Partition, SparkContext, TaskContext}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaPairRDD._
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, _}
@@ -81,7 +81,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   def mapPartitionsWithIndex[R](
       f: JFunction2[java.lang.Integer, java.util.Iterator[T], java.util.Iterator[R]],
       preservesPartitioning: Boolean = false): JavaRDD[R] =
-    new JavaRDD(rdd.mapPartitionsWithIndex(((a,b) => f(a,asJavaIterator(b))),
+    new JavaRDD(rdd.mapPartitionsWithIndex(((a, b) => f(a, asJavaIterator(b))),
         preservesPartitioning)(fakeClassTag))(fakeClassTag)
 
   /**
@@ -185,6 +185,56 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
       rdd.mapPartitions(fn, preservesPartitioning))(fakeClassTag[K2], fakeClassTag[V2])
   }
 
+  /**
+   * :: DeveloperApi ::
+   * Return a new RDD by applying a function to each partition of this RDD. This is a variant of
+   * mapPartitions that also passes the TaskContext into the closure.
+   *
+   * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
+   * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
+   */
+  @DeveloperApi
+  def mapPartitionsWithContext[R](
+                                   f: JFunction2[TaskContext, java.util.Iterator[T], java.util.Iterator[R]],
+                                   preservesPartitioning: Boolean = false): JavaRDD[R] =
+    new JavaRDD(rdd.mapPartitionsWithContext(((a, b) => f(a, asJavaIterator(b))),
+      preservesPartitioning)(fakeClassTag))(fakeClassTag)
+
+  /**
+   * :: DeveloperApi ::
+   * Return a new JavaDoubleRDD by applying a function to each partition of this RDD. This is a
+   * variant of mapPartitions that also passes the TaskContext into the closure.
+   *
+   * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
+   * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
+   */
+  @DeveloperApi
+  def mapPartitionsToDoubleWithContext(
+      f: DoubleFlatMapFunction2[TaskContext, java.util.Iterator[T]],
+      preservesPartitioning: Boolean): JavaDoubleRDD = {
+    def fn = (context: TaskContext, x: Iterator[T]) =>
+      asScalaIterator(f.call(context, asJavaIterator(x)).iterator())
+    new JavaDoubleRDD(
+      rdd.mapPartitionsWithContext(fn, preservesPartitioning).map(x => x.doubleValue()))
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Return a new JavaPairRDD by applying a function to each partition of this RDD. This is a
+   * variant of mapPartitions that also passes the TaskContext into the closure.
+   *
+   * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
+   * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
+   */
+  @DeveloperApi
+  def mapPartitionsToPairWithContext[K2, V2](f: PairFlatMapFunction2[TaskContext,
+      java.util.Iterator[T], K2, V2], preservesPartitioning: Boolean): JavaPairRDD[K2, V2] = {
+    def fn = (context: TaskContext, x: Iterator[T]) =>
+      asScalaIterator(f.call(context, asJavaIterator(x)).iterator())
+    JavaPairRDD.fromRDD(
+      rdd.mapPartitionsWithContext(fn, preservesPartitioning))(fakeClassTag[K2], fakeClassTag[V2])
+  }
+
   /**
    * Applies a function f to each partition of this RDD.
    */