Skip to content

Commit

Permalink
Merge branch 'master' of github.com:apache/spark into configure-ports
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewor14 committed Aug 6, 2014
2 parents 8a6b820 + 82624e2 commit 621267b
Show file tree
Hide file tree
Showing 22 changed files with 377 additions and 174 deletions.
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
<dependency>
<groupId>org.json4s</groupId>
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
<version>3.2.6</version>
<version>3.2.10</version>
</dependency>
<dependency>
<groupId>colt</groupId>
Expand Down
13 changes: 10 additions & 3 deletions core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,23 @@ class JdbcRDD[T: ClassTag](

override def close() {
try {
if (null != rs && ! rs.isClosed()) rs.close()
if (null != rs && ! rs.isClosed()) {
rs.close()
}
} catch {
case e: Exception => logWarning("Exception closing resultset", e)
}
try {
if (null != stmt && ! stmt.isClosed()) stmt.close()
if (null != stmt && ! stmt.isClosed()) {
stmt.close()
}
} catch {
case e: Exception => logWarning("Exception closing statement", e)
}
try {
if (null != conn && ! stmt.isClosed()) conn.close()
if (null != conn && ! conn.isClosed()) {
conn.close()
}
logInfo("closed connection")
} catch {
case e: Exception => logWarning("Exception closing connection", e)
Expand All @@ -120,3 +126,4 @@ object JdbcRDD {
Array.tabulate[Object](rs.getMetaData.getColumnCount)(i => rs.getObject(i + 1))
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ private[spark] class SortShuffleWriter[K, V, C](
private val ser = Serializer.getSerializer(dep.serializer.orNull)

private val conf = SparkEnv.get.conf
private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024

private var sorter: ExternalSorter[K, V, _] = null
private var outputFile: File = null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class ShuffleBlockManager(blockManager: BlockManager) extends Logging {
val sortBasedShuffle =
conf.get("spark.shuffle.manager", "") == classOf[SortShuffleManager].getName

private val bufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
private val bufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024

/**
* Contains all the state related to a particular shuffle. This includes a pool of unused
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class ExternalAppendOnlyMap[K, V, C](
private var _memoryBytesSpilled = 0L
private var _diskBytesSpilled = 0L

private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024
private val keyComparator = new HashComparator[K]
private val ser = serializer.newInstance()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ private[spark] class ExternalSorter[K, V, C](

private val conf = SparkEnv.get.conf
private val spillingEnabled = conf.getBoolean("spark.shuffle.spill", true)
private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024

// Size of object batches when reading/writing from serializers.
//
Expand Down
2 changes: 1 addition & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ Apart from these, the following properties are also available, and may be useful
</tr>
<tr>
<td><code>spark.shuffle.file.buffer.kb</code></td>
<td>100</td>
<td>32</td>
<td>
Size of the in-memory buffer for each shuffle file output stream, in kilobytes. These buffers
reduce the number of disk seeks and system calls made in creating intermediate shuffle files.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ class PythonMLLibAPI extends Serializable {
.setNumIterations(numIterations)
.setRegParam(regParam)
.setStepSize(stepSize)
.setMiniBatchFraction(miniBatchFraction)
if (regType == "l2") {
lrAlg.optimizer.setUpdater(new SquaredL2Updater)
} else if (regType == "l1") {
Expand Down Expand Up @@ -341,16 +342,27 @@ class PythonMLLibAPI extends Serializable {
stepSize: Double,
regParam: Double,
miniBatchFraction: Double,
initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
initialWeightsBA: Array[Byte],
regType: String,
intercept: Boolean): java.util.List[java.lang.Object] = {
val SVMAlg = new SVMWithSGD()
SVMAlg.setIntercept(intercept)
SVMAlg.optimizer
.setNumIterations(numIterations)
.setRegParam(regParam)
.setStepSize(stepSize)
.setMiniBatchFraction(miniBatchFraction)
if (regType == "l2") {
SVMAlg.optimizer.setUpdater(new SquaredL2Updater)
} else if (regType == "l1") {
SVMAlg.optimizer.setUpdater(new L1Updater)
} else if (regType != "none") {
throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
+ " Can only be initialized using the following string values: [l1, l2, none].")
}
trainRegressionModel(
(data, initialWeights) =>
SVMWithSGD.train(
data,
numIterations,
stepSize,
regParam,
miniBatchFraction,
initialWeights),
SVMAlg.run(data, initialWeights),
dataBytesJRDD,
initialWeightsBA)
}
Expand All @@ -363,15 +375,28 @@ class PythonMLLibAPI extends Serializable {
numIterations: Int,
stepSize: Double,
miniBatchFraction: Double,
initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
initialWeightsBA: Array[Byte],
regParam: Double,
regType: String,
intercept: Boolean): java.util.List[java.lang.Object] = {
val LogRegAlg = new LogisticRegressionWithSGD()
LogRegAlg.setIntercept(intercept)
LogRegAlg.optimizer
.setNumIterations(numIterations)
.setRegParam(regParam)
.setStepSize(stepSize)
.setMiniBatchFraction(miniBatchFraction)
if (regType == "l2") {
LogRegAlg.optimizer.setUpdater(new SquaredL2Updater)
} else if (regType == "l1") {
LogRegAlg.optimizer.setUpdater(new L1Updater)
} else if (regType != "none") {
throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
+ " Can only be initialized using the following string values: [l1, l2, none].")
}
trainRegressionModel(
(data, initialWeights) =>
LogisticRegressionWithSGD.train(
data,
numIterations,
stepSize,
miniBatchFraction,
initialWeights),
LogRegAlg.run(data, initialWeights),
dataBytesJRDD,
initialWeightsBA)
}
Expand Down
Loading

0 comments on commit 621267b

Please sign in to comment.