From ce071fca807caf232e6a6287b90ac59ed4bcd605 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 28 Oct 2014 23:05:49 -0400 Subject: [PATCH 1/6] don't change working dir --- ec2/spark-ec2 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ec2/spark-ec2 b/ec2/spark-ec2 index 31f9771223e51..32ccb29b4021f 100755 --- a/ec2/spark-ec2 +++ b/ec2/spark-ec2 @@ -18,5 +18,7 @@ # limitations under the License. # -cd "`dirname $0`" -PYTHONPATH="./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" python ./spark_ec2.py "$@" +SPARK_EC2_DIR="$(dirname $0)" + +PYTHONPATH="${SPARK_EC2_DIR}/third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" \ + python "${SPARK_EC2_DIR}/spark_ec2.py" "$@" From 77871a2036c71f06b1ce431f81af002dbf234433 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Wed, 29 Oct 2014 15:58:14 -0400 Subject: [PATCH 2/6] add clarifying comment --- ec2/spark-ec2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ec2/spark-ec2 b/ec2/spark-ec2 index 32ccb29b4021f..3998bcc664014 100755 --- a/ec2/spark-ec2 +++ b/ec2/spark-ec2 @@ -18,6 +18,8 @@ # limitations under the License. # +# Preserve the user's CWD so that relative paths are passed corrently to +#+ the underlying Python script. SPARK_EC2_DIR="$(dirname $0)" PYTHONPATH="${SPARK_EC2_DIR}/third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" \ From bcdf6a5da70d1ee0067e72497ccc7fc6a71c7c8b Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Wed, 29 Oct 2014 16:18:28 -0400 Subject: [PATCH 3/6] fix typo --- ec2/spark-ec2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark-ec2 b/ec2/spark-ec2 index 3998bcc664014..4aa908242eeaa 100755 --- a/ec2/spark-ec2 +++ b/ec2/spark-ec2 @@ -18,7 +18,7 @@ # limitations under the License. # -# Preserve the user's CWD so that relative paths are passed corrently to +# Preserve the user's CWD so that relative paths are passed correctly to #+ the underlying Python script. SPARK_EC2_DIR="$(dirname $0)" From 752f958e49b4f84bde745db8b8f45f50e5117f6a Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Wed, 29 Oct 2014 20:45:26 -0400 Subject: [PATCH 4/6] specify deploy.generic path absolutely --- ec2/spark_ec2.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 0d6b82b4944f3..e52de3902e4b0 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -40,6 +40,7 @@ from boto import ec2 DEFAULT_SPARK_VERSION = "1.1.0" +SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) # A URL prefix from which to fetch AMI information AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list" @@ -586,7 +587,14 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v4") print "Deploying files to master..." - deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules) + deploy_files( + conn=conn, + root_dir=SPARK_EC2_DIR + "/" + "deploy.generic", + opts=opts, + master_nodes=master_nodes, + slave_nodes=slave_nodes, + modules=modules + ) print "Running setup on master..." setup_spark_cluster(master, opts) @@ -718,12 +726,16 @@ def get_num_disks(instance_type): return 1 -# Deploy the configuration file templates in a given local directory to -# a cluster, filling in any template parameters with information about the -# cluster (e.g. lists of masters and slaves). Files are only deployed to -# the first master instance in the cluster, and we expect the setup -# script to be run on that instance to copy them to other nodes. def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): + """ + Deploy the configuration file templates in a given local directory to + a cluster, filling in any template parameters with information about the + cluster (e.g. lists of masters and slaves). Files are only deployed to + the first master instance in the cluster, and we expect the setup + script to be run on that instance to copy them to other nodes. + + root_dir should be an absolute path to the directory with the files we want to deploy. + """ active_master = master_nodes[0].public_dns_name num_disks = get_num_disks(opts.instance_type) From fbc20c7a92bf0e75d16613c7e56532740257990b Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Wed, 5 Nov 2014 21:46:30 -0500 Subject: [PATCH 5/6] revert to old commenting style --- ec2/spark_ec2.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index e52de3902e4b0..8da8d1e09bac9 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -726,16 +726,14 @@ def get_num_disks(instance_type): return 1 +# Deploy the configuration file templates in a given local directory to +# a cluster, filling in any template parameters with information about the +# cluster (e.g. lists of masters and slaves). Files are only deployed to +# the first master instance in the cluster, and we expect the setup +# script to be run on that instance to copy them to other nodes. +# +# root_dir should be an absolute path to the directory with the files we want to deploy. def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): - """ - Deploy the configuration file templates in a given local directory to - a cluster, filling in any template parameters with information about the - cluster (e.g. lists of masters and slaves). Files are only deployed to - the first master instance in the cluster, and we expect the setup - script to be run on that instance to copy them to other nodes. - - root_dir should be an absolute path to the directory with the files we want to deploy. - """ active_master = master_nodes[0].public_dns_name num_disks = get_num_disks(opts.instance_type) From f3850b55ceae060a1a0f4c40a85da447a446803c Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Wed, 5 Nov 2014 21:59:59 -0500 Subject: [PATCH 6/6] pep8 fix --- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 8da8d1e09bac9..b5dee034b6510 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -731,7 +731,7 @@ def get_num_disks(instance_type): # cluster (e.g. lists of masters and slaves). Files are only deployed to # the first master instance in the cluster, and we expect the setup # script to be run on that instance to copy them to other nodes. -# +# # root_dir should be an absolute path to the directory with the files we want to deploy. def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): active_master = master_nodes[0].public_dns_name