Skip to content

Commit

Permalink
Merge branch 'master' into spark-2706
Browse files Browse the repository at this point in the history
  • Loading branch information
zhzhan committed Sep 17, 2014
2 parents 9412d24 + 2b0d513 commit 57ea52e
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 22 deletions.
10 changes: 4 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@ out/
third_party/libmesos.so
third_party/libmesos.dylib
conf/java-opts
conf/spark-env.sh
conf/streaming-env.sh
conf/log4j.properties
conf/spark-defaults.conf
conf/hive-site.xml
conf/*.sh
conf/*.properties
conf/*.conf
conf/*.xml
docs/_site
docs/api
target/
Expand Down Expand Up @@ -50,7 +49,6 @@ unit-tests.log
/lib/
rat-results.txt
scalastyle.txt
conf/*.conf
scalastyle-output.xml

# For Hive
Expand Down
14 changes: 5 additions & 9 deletions core/src/test/scala/org/apache/spark/ui/UISuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import javax.servlet.http.HttpServletRequest
import scala.io.Source
import scala.util.{Failure, Success, Try}

import org.eclipse.jetty.server.Server
import org.eclipse.jetty.servlet.ServletContextHandler
import org.scalatest.FunSuite
import org.scalatest.concurrent.Eventually._
Expand Down Expand Up @@ -108,14 +107,8 @@ class UISuite extends FunSuite {
}

test("jetty selects different port under contention") {
val startPort = 4040
val server = new Server(startPort)

Try { server.start() } match {
case Success(s) =>
case Failure(e) =>
// Either case server port is busy hence setup for test complete
}
val server = new ServerSocket(0)
val startPort = server.getLocalPort
val serverInfo1 = JettyUtils.startJettyServer(
"0.0.0.0", startPort, Seq[ServletContextHandler](), new SparkConf)
val serverInfo2 = JettyUtils.startJettyServer(
Expand All @@ -126,6 +119,9 @@ class UISuite extends FunSuite {
assert(boundPort1 != startPort)
assert(boundPort2 != startPort)
assert(boundPort1 != boundPort2)
serverInfo1.server.stop()
serverInfo2.server.stop()
server.close()
}

test("jetty binds to port 0 correctly") {
Expand Down
3 changes: 2 additions & 1 deletion docs/_layouts/global.html
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
<li class="divider"></li>
<li><a href="building-spark.html">Building Spark</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">Contributing to Spark</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/SPARK/Supplemental+Spark+Projects">Supplemental Projects</a></li>
</ul>
</li>
</ul>
Expand Down Expand Up @@ -151,7 +152,7 @@ <h1 class="title">{{ page.title }}</h1>
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
processEscapes: true,
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
}
Expand Down
4 changes: 2 additions & 2 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -520,10 +520,10 @@ Apart from these, the following properties are also available, and may be useful
</tr>
<tr>
<td><code>spark.files.fetchTimeout</code></td>
<td>false</td>
<td>60</td>
<td>
Communication timeout to use when fetching files added through SparkContext.addFile() from
the driver.
the driver, in seconds.
</td>
</tr>
<tr>
Expand Down
2 changes: 1 addition & 1 deletion docs/ec2-scripts.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,6 @@ If you have a patch or suggestion for one of these limitations, feel free to

# Accessing Data in S3

Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. You will also need to set your Amazon security credentials, either by setting the environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` before your program or through `SparkContext.hadoopConfiguration`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).
Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. To provide AWS credentials for S3 access, launch the Spark cluster with the option `--copy-aws-credentials`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).

In addition to using a single input file, you can also use a directory of files as input by simply giving the path to the directory.
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ options for deployment:
* [OpenStack Swift](storage-openstack-swift.html)
* [Building Spark](building-spark.html): build Spark using the Maven system
* [Contributing to Spark](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark)
* [Supplemental Projects](https://cwiki.apache.org/confluence/display/SPARK/Supplemental+Spark+Projects): related third party Spark projects

**External Resources:**

Expand Down
2 changes: 2 additions & 0 deletions ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,5 @@ export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
export SWAP_MB="{{swap}}"
export SPARK_WORKER_INSTANCES="{{spark_worker_instances}}"
export SPARK_MASTER_OPTS="{{spark_master_opts}}"
export AWS_ACCESS_KEY_ID="{{aws_access_key_id}}"
export AWS_SECRET_ACCESS_KEY="{{aws_secret_access_key}}"
10 changes: 10 additions & 0 deletions ec2/spark_ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ def parse_args():
parser.add_option(
"--additional-security-group", type="string", default="",
help="Additional security group to place the machines in")
parser.add_option(
"--copy-aws-credentials", action="store_true", default=False,
help="Add AWS credentials to hadoop configuration to allow Spark to access S3")

(opts, args) = parser.parse_args()
if len(args) != 2:
Expand Down Expand Up @@ -714,6 +717,13 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
"spark_master_opts": opts.master_opts
}

if opts.copy_aws_credentials:
template_vars["aws_access_key_id"] = conn.aws_access_key_id
template_vars["aws_secret_access_key"] = conn.aws_secret_access_key
else:
template_vars["aws_access_key_id"] = ""
template_vars["aws_secret_access_key"] = ""

# Create a temp directory in which we will place all the files to be
# deployed after we substitue template parameters in them
tmp_dir = tempfile.mkdtemp()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import org.scalatest.Matchers

import scala.collection.JavaConversions._
import scala.collection.mutable.{ HashMap => MutableHashMap }
import scala.reflect.ClassTag
import scala.util.Try

import org.apache.spark.{SparkException, SparkConf}
Expand Down Expand Up @@ -200,9 +201,10 @@ class ClientBaseSuite extends FunSuite with Matchers {


val knownDefMRAppCP: Seq[String] =
getFieldValue[String, Seq[String]](classOf[MRJobConfig],
"DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH",
Seq[String]())(a => a.split(","))
getFieldValue2[String, Array[String], Seq[String]](
classOf[MRJobConfig],
"DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH",
Seq[String]())(a => a.split(","))(a => a.toSeq)

val knownYARNAppCP = Some(Seq("/known/yarn/path"))

Expand Down Expand Up @@ -232,6 +234,17 @@ class ClientBaseSuite extends FunSuite with Matchers {
def getFieldValue[A, B](clazz: Class[_], field: String, defaults: => B)(mapTo: A => B): B =
Try(clazz.getField(field)).map(_.get(null).asInstanceOf[A]).toOption.map(mapTo).getOrElse(defaults)

def getFieldValue2[A: ClassTag, A1: ClassTag, B](
clazz: Class[_],
field: String,
defaults: => B)(mapTo: A => B)(mapTo1: A1 => B) : B = {
Try(clazz.getField(field)).map(_.get(null)).map {
case v: A => mapTo(v)
case v1: A1 => mapTo1(v1)
case _ => defaults
}.toOption.getOrElse(defaults)
}

private class DummyClient(
val args: ClientArguments,
val conf: Configuration,
Expand Down

0 comments on commit 57ea52e

Please sign in to comment.