-
Notifications
You must be signed in to change notification settings - Fork 28.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-12884] Move classes to their own files for readability
This is a small step in implementing SPARK-10620, which migrates `TaskMetrics` to accumulators. This patch is strictly a cleanup patch and introduces no change in functionality. It literally just moves classes to their own files to avoid having single monolithic ones that contain 10 different classes. Parent PR: #10717 Author: Andrew Or <andrew@databricks.com> Closes #10810 from andrewor14/move-things.
- Loading branch information
Showing
8 changed files
with
493 additions
and
360 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark | ||
|
||
import scala.collection.{mutable, Map} | ||
import scala.ref.WeakReference | ||
|
||
|
||
/** | ||
* A simpler value of [[Accumulable]] where the result type being accumulated is the same | ||
* as the types of elements being merged, i.e. variables that are only "added" to through an | ||
* associative operation and can therefore be efficiently supported in parallel. They can be used | ||
* to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of numeric | ||
* value types, and programmers can add support for new types. | ||
* | ||
* An accumulator is created from an initial value `v` by calling [[SparkContext#accumulator]]. | ||
* Tasks running on the cluster can then add to it using the [[Accumulable#+=]] operator. | ||
* However, they cannot read its value. Only the driver program can read the accumulator's value, | ||
* using its value method. | ||
* | ||
* The interpreter session below shows an accumulator being used to add up the elements of an array: | ||
* | ||
* {{{ | ||
* scala> val accum = sc.accumulator(0) | ||
* accum: spark.Accumulator[Int] = 0 | ||
* | ||
* scala> sc.parallelize(Array(1, 2, 3, 4)).foreach(x => accum += x) | ||
* ... | ||
* 10/09/29 18:41:08 INFO SparkContext: Tasks finished in 0.317106 s | ||
* | ||
* scala> accum.value | ||
* res2: Int = 10 | ||
* }}} | ||
* | ||
* @param initialValue initial value of accumulator | ||
* @param param helper object defining how to add elements of type `T` | ||
* @tparam T result type | ||
*/ | ||
class Accumulator[T] private[spark] ( | ||
@transient private[spark] val initialValue: T, | ||
param: AccumulatorParam[T], | ||
name: Option[String], | ||
internal: Boolean) | ||
extends Accumulable[T, T](initialValue, param, name, internal) { | ||
|
||
def this(initialValue: T, param: AccumulatorParam[T], name: Option[String]) = { | ||
this(initialValue, param, name, false) | ||
} | ||
|
||
def this(initialValue: T, param: AccumulatorParam[T]) = { | ||
this(initialValue, param, None, false) | ||
} | ||
} | ||
|
||
|
||
// TODO: The multi-thread support in accumulators is kind of lame; check | ||
// if there's a more intuitive way of doing it right | ||
private[spark] object Accumulators extends Logging { | ||
/** | ||
* This global map holds the original accumulator objects that are created on the driver. | ||
* It keeps weak references to these objects so that accumulators can be garbage-collected | ||
* once the RDDs and user-code that reference them are cleaned up. | ||
*/ | ||
val originals = mutable.Map[Long, WeakReference[Accumulable[_, _]]]() | ||
|
||
private var lastId: Long = 0 | ||
|
||
def newId(): Long = synchronized { | ||
lastId += 1 | ||
lastId | ||
} | ||
|
||
def register(a: Accumulable[_, _]): Unit = synchronized { | ||
originals(a.id) = new WeakReference[Accumulable[_, _]](a) | ||
} | ||
|
||
def remove(accId: Long) { | ||
synchronized { | ||
originals.remove(accId) | ||
} | ||
} | ||
|
||
// Add values to the original accumulators with some given IDs | ||
def add(values: Map[Long, Any]): Unit = synchronized { | ||
for ((id, value) <- values) { | ||
if (originals.contains(id)) { | ||
// Since we are now storing weak references, we must check whether the underlying data | ||
// is valid. | ||
originals(id).get match { | ||
case Some(accum) => accum.asInstanceOf[Accumulable[Any, Any]] ++= value | ||
case None => | ||
throw new IllegalAccessError("Attempted to access garbage collected Accumulator.") | ||
} | ||
} else { | ||
logWarning(s"Ignoring accumulator update for unknown accumulator id $id") | ||
} | ||
} | ||
} | ||
|
||
} | ||
|
||
|
||
/** | ||
* A simpler version of [[org.apache.spark.AccumulableParam]] where the only data type you can add | ||
* in is the same type as the accumulated value. An implicit AccumulatorParam object needs to be | ||
* available when you create Accumulators of a specific type. | ||
* | ||
* @tparam T type of value to accumulate | ||
*/ | ||
trait AccumulatorParam[T] extends AccumulableParam[T, T] { | ||
def addAccumulator(t1: T, t2: T): T = { | ||
addInPlace(t1, t2) | ||
} | ||
} | ||
|
||
|
||
object AccumulatorParam { | ||
|
||
// The following implicit objects were in SparkContext before 1.2 and users had to | ||
// `import SparkContext._` to enable them. Now we move them here to make the compiler find | ||
// them automatically. However, as there are duplicate codes in SparkContext for backward | ||
// compatibility, please update them accordingly if you modify the following implicit objects. | ||
|
||
implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] { | ||
def addInPlace(t1: Double, t2: Double): Double = t1 + t2 | ||
def zero(initialValue: Double): Double = 0.0 | ||
} | ||
|
||
implicit object IntAccumulatorParam extends AccumulatorParam[Int] { | ||
def addInPlace(t1: Int, t2: Int): Int = t1 + t2 | ||
def zero(initialValue: Int): Int = 0 | ||
} | ||
|
||
implicit object LongAccumulatorParam extends AccumulatorParam[Long] { | ||
def addInPlace(t1: Long, t2: Long): Long = t1 + t2 | ||
def zero(initialValue: Long): Long = 0L | ||
} | ||
|
||
implicit object FloatAccumulatorParam extends AccumulatorParam[Float] { | ||
def addInPlace(t1: Float, t2: Float): Float = t1 + t2 | ||
def zero(initialValue: Float): Float = 0f | ||
} | ||
|
||
// TODO: Add AccumulatorParams for other types, e.g. lists and strings | ||
} |
Oops, something went wrong.