Permalink
Browse files

HUE-662. [config] Each Hadoop hdfs/mr/yarn cluster to define its own env

* Have Beeswax and Shell define the appropriate HADOOP_MAPRED_HOME for the
  child process.
* Renamed JT_HOST & RM_HOST to just "HOST", and JT_PORT & RM_PORT to just
  "PORT" for consistent reference.
* Fixed README to drop the requirement on having a HADOOP_HOME. The build
  does not depend on one because we're using maven.
  • Loading branch information...
1 parent 871a084 commit c7e29b51f7ac4e1ab020583505e7a28c9c83b901 bc Wong committed Apr 4, 2012
View
@@ -84,7 +84,7 @@ DESKTOP_DB_CONFIG=<db engine:db name:test db name:username:password:host:port>
Writing tests that depend on Hadoop
===================================
-Use mini_cluster.py! You should tag such tests with "requires_hadoop", as follows:
+Use pseudo_hdfs4.py! You should tag such tests with "requires_hadoop", as follows:
from nose.plugins.attrib import attr
@@ -19,8 +19,8 @@
set -o errexit
-if [ -z "$HADOOP_HOME" ]; then
- echo "\$HADOOP_HOME must be specified" 1>&2
+if [ -z "$HADOOP_CONF_DIR" ]; then
+ echo "\$HADOOP_CONF_DIR must be specified" 1>&2
exit 1
fi
echo \$HADOOP_HOME=$HADOOP_HOME
@@ -46,7 +46,6 @@ fi
echo \$HIVE_HOME=$HIVE_HOME
-
BEESWAX_ROOT=$(dirname $0)
BEESWAX_JAR=$BEESWAX_ROOT/java-lib/BeeswaxServer.jar
HIVE_LIB=$HIVE_HOME/lib
@@ -65,15 +64,13 @@ echo \$HADOOP_OPTS=$HADOOP_OPTS
# and to force hive-default to correspond to the Hive version we have.
# Because we are abusing HADOOP_CONF_DIR, we have to emulate its default
# behavior here as well.
-if [ -z "$HADOOP_CONF_DIR" ]; then
- HADOOP_CONF_DIR="$HADOOP_HOME/conf"
-fi
if [ -f $HADOOP_CONF_DIR/hadoop-env.sh ]; then
. $HADOOP_CONF_DIR/hadoop-env.sh
fi
-export HADOOP_CONF_DIR=$HIVE_CONF_DIR:$BEESWAX_ROOT/../../desktop/conf:$HADOOP_CONF_DIR
+export HADOOP_CONF_DIR=$HIVE_CONF_DIR:$HADOOP_CONF_DIR
echo \$HADOOP_CONF_DIR=$HADOOP_CONF_DIR
+echo \$HADOOP_MAPRED_HOME=$HADOOP_MAPRED_HOME
# Note: I've had trouble running this with just "java -jar" with the classpath
# determined with a seemingly appropriate find command.
@@ -22,26 +22,36 @@
import beeswax.conf
import beeswax.hive_site
import desktop.conf
-import hadoop.conf
-import os
+import hadoop.cluster
+
import logging
+import os
+import sys
LOG = logging.getLogger(__name__)
class Command(NoArgsCommand):
""" Starts beeswax daemon. """
+
def handle_noargs(self, **options):
+ cluster_conf = hadoop.cluster.get_cluster_conf_for_job_submission()
+ if cluster_conf is None:
+ LOG.error("Configuration does not contain any MR/Yarn clusters with "
+ "`submit_to' enabled. Cannot start BeeswaxServer.")
+ sys.exit(1)
+
env = os.environ.copy()
- env['HADOOP_HOME'] = hadoop.conf.HADOOP_HOME.get()
- env['HADOOP_BIN'] = hadoop.conf.HADOOP_BIN.get()
- if hadoop.conf.HADOOP_CONF_DIR.get():
- env['HADOOP_CONF_DIR'] = hadoop.conf.HADOOP_CONF_DIR.get()
- if beeswax.conf.BEESWAX_HIVE_HOME_DIR.get():
- env['HIVE_HOME'] = beeswax.conf.BEESWAX_HIVE_HOME_DIR.get()
- if beeswax.conf.BEESWAX_HIVE_CONF_DIR.get():
- env['HIVE_CONF_DIR'] = beeswax.conf.BEESWAX_HIVE_CONF_DIR.get()
- if beeswax.conf.BEESWAX_SERVER_HEAPSIZE.get():
- env['HADOOP_HEAPSIZE'] = beeswax.conf.BEESWAX_SERVER_HEAPSIZE.get()
+ def set_if_present(name, val):
+ if val:
+ env[name] = val
+
+ env['HADOOP_BIN'] = cluster_conf.HADOOP_BIN.get()
+ set_if_present('HADOOP_MAPRED_HOME', cluster_conf.HADOOP_MAPRED_HOME.get())
+ set_if_present('HADOOP_CONF_DIR', cluster_conf.HADOOP_CONF_DIR.get())
+ set_if_present('HADOOP_HEAPSIZE', beeswax.conf.BEESWAX_SERVER_HEAPSIZE.get())
+ set_if_present('HIVE_HOME', beeswax.conf.BEESWAX_HIVE_HOME_DIR.get())
+ set_if_present('HIVE_CONF_DIR', beeswax.conf.BEESWAX_HIVE_CONF_DIR.get())
+
bin = beeswax.conf.BEESWAX_SERVER_BIN.get()
# Host that desktop is running on
@@ -69,7 +69,7 @@ def run(self):
raise Exception("Job design already submitted (Oozie job id %s)" % (self.job_id,))
fs_defaultfs = self._fs.fs_defaultfs
- jobtracker = hadoop.cluster.get_cluster_for_job_submission()
+ jobtracker = hadoop.cluster.get_cluster_addr_for_job_submission()
try:
wf_dir = self._get_and_create_deployment_dir()
@@ -49,6 +49,7 @@
SHELL_NOT_ALLOWED = "shellNotAllowed"
HOME = "HOME"
HADOOP_HOME = "HADOOP_HOME"
+HADOOP_MAPRED_HOME = "HADOOP_MAPRED_HOME"
HADOOP_TOKEN_FILE_LOCATION = 'HADOOP_TOKEN_FILE_LOCATION'
EXISTS = "exists"
@@ -21,7 +21,6 @@
import cStringIO
-import desktop.lib.i18n
import errno
import eventlet
import hadoop.conf
@@ -39,7 +38,8 @@
from eventlet.green import os
from eventlet.green import select
from eventlet.green import time
-from hadoop.cluster import all_mrclusters, get_all_hdfs
+from hadoop.cluster import all_mrclusters, get_all_hdfs, \
+ get_cluster_conf_for_job_submission
LOG = logging.getLogger(__name__)
SHELL_OUTPUT_LOGGER = logging.getLogger("shell_output")
@@ -388,16 +388,23 @@ def __init__(self):
self._parse_configs()
eventlet.spawn_after(1, self._handle_periodic)
+
@classmethod
def global_instance(cls):
if not hasattr(cls, "_global_instance"):
cls._global_instance = cls()
return cls._global_instance
def _parse_configs(self):
+ mr_cluster = get_cluster_conf_for_job_submission()
+ if mr_cluster is None:
+ mapred_home = None
+ else:
+ mapred_home = mr_cluster.HADOOP_MAPRED_HOME.get()
+
shell_types = [] # List of available shell types. For each shell type, we have a nice name (e.g. "Python Shell") and a short name (e.g. "python")
for item in shell.conf.SHELL_TYPES.keys():
- env_for_shell = { constants.HADOOP_HOME: hadoop.conf.HADOOP_HOME.get() }
+ env_for_shell = { constants.HADOOP_MAPRED_HOME: mapred_home }
command = shell.conf.SHELL_TYPES[item].command.get().strip().split()
nice_name = shell.conf.SHELL_TYPES[item].nice_name.get().strip()
executable_exists = utils.executable_exists(command)
View
@@ -165,15 +165,6 @@
[hadoop]
- # If you installed Hadoop in a different location, you need to set
- # hadoop_home, in which bin/hadoop, the Hadoop wrapper script, is found.
- #
- # NOTE: Hue depends on Cloudera's Distribution of Hadoop version 4 (CDH4)
- # or later.
- hadoop_home=/usr/lib/hadoop
- hadoop_bin=/usr/bin/hadoop
- # hadoop_conf_dir=/etc/hadoop/conf
-
# Configuration for HDFS NameNode
# ------------------------------------------------------------------------
[[hdfs_clusters]]
@@ -185,11 +176,23 @@
# Change this if your HDFS cluster is Kerberos-secured
## security_enabled=false
- # Use WebHdfs/HttpFs as the communication mechanism. To fallback to
- # using the Thrift plugin (used in Hue 1.x), this must be uncommented
- # and explicitly set to the empty value.
+ # Use WebHdfs/HttpFs as the communication mechanism.
+ # This should be the web service root URL, such as
+ # http://namenode:50070/webhdfs/v1
## webhdfs_url=
+ # Settings about this HDFS cluster. If you install HDFS in a
+ # different location, you need to set the following.
+
+ # Defaults to $HADOOP_HDFS_HOME or /usr/lib/hadoop-hdfs
+ ## hadoop_hdfs_home=/usr/lib/hadoop-hdfs
+
+ # Defaults to $HADOOP_BIN or /usr/bin/hadoop
+ ## hadoop_bin=/usr/bin/hadoop
+
+ # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
+ ## hadoop_conf_dir=/etc/hadoop/conf
+
# Configuration for MapReduce 0.20 JobTracker (MR1)
# ------------------------------------------------------------------------
[[mapred_clusters]]
@@ -202,11 +205,22 @@
# Thrift plug-in port for the JobTracker
## thrift_port=9290
# Whether to submit jobs to this cluster
- ## submit_to=False
+ ## submit_to=True
# Change this if your MapReduce cluster is Kerberos-secured
## security_enabled=false
+ # Settings about this MR1 cluster. If you install MR1 in a
+ # different location, you need to set the following.
+
+ # Defaults to $HADOOP_MR1_HOME or /usr/lib/hadoop-0.20-mapreduce
+ ## hadoop_mapred_home=/usr/lib/hadoop-0.20-mapreduce
+
+ # Defaults to $HADOOP_BIN or /usr/bin/hadoop
+ ## hadoop_bin=/usr/bin/hadoop
+
+ # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
+ ## hadoop_conf_dir=/etc/hadoop/conf
# Configuration for Yarn (MR2)
# ------------------------------------------------------------------------
@@ -220,6 +234,18 @@
# Whether to submit jobs to this cluster
## submit_to=False
+ # Settings about this MR2 cluster. If you install MR2 in a
+ # different location, you need to set the following.
+
+ # Defaults to $HADOOP_MR2_HOME or /usr/lib/hadoop-mapreduce
+ ## hadoop_mapred_home=/usr/lib/hadoop-mapreduce
+
+ # Defaults to $HADOOP_BIN or /usr/bin/hadoop
+ ## hadoop_bin=/usr/bin/hadoop
+
+ # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
+ ## hadoop_conf_dir=/etc/hadoop/conf
+
###########################################################################
# Settings to configure Beeswax
@@ -171,17 +171,6 @@
[hadoop]
- # If you installed Hadoop in a different location, you need to set
- # hadoop_home, in which bin/hadoop, the Hadoop wrapper script, is found.
- #
- # NOTE: Hue depends on Cloudera's Distribution of Hadoop version 4 (CDH4)
- # or later.
- hadoop_home=$HADOOP_HOME
- hadoop_bin=$HADOOP_HOME/bin/hadoop
- hadoop_conf_dir=$HADOOP_HOME/conf
-
- ## hadoop_mr1_home=$HADOOP_MR1_HOME
-
# Configuration for HDFS NameNode
# ------------------------------------------------------------------------
[[hdfs_clusters]]
@@ -195,6 +184,18 @@
# and explicitly set to the empty value.
## webhdfs_url=
+ # Settings about this HDFS cluster. If you install HDFS in a
+ # different location, you need to set the following.
+
+ # Defaults to $HADOOP_HDFS_HOME or /usr/lib/hadoop-hdfs
+ ## hadoop_hdfs_home=/usr/lib/hadoop-hdfs
+
+ # Defaults to $HADOOP_BIN or /usr/bin/hadoop
+ ## hadoop_bin=/usr/bin/hadoop
+
+ # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
+ ## hadoop_conf_dir=/etc/hadoop/conf
+
# Configuration for MapReduce JobTracker
# ------------------------------------------------------------------------
[[mapred_clusters]]
@@ -209,6 +210,18 @@
# Whether to submit jobs to this cluster
## submit_to=False
+ # Settings about this MR1 cluster. If you install MR1 in a
+ # different location, you need to set the following.
+
+ # Defaults to $HADOOP_MR1_HOME or /usr/lib/hadoop-0.20-mapreduce
+ ## hadoop_mapred_home=/usr/lib/hadoop-0.20-mapreduce
+
+ # Defaults to $HADOOP_BIN or /usr/bin/hadoop
+ ## hadoop_bin=/usr/bin/hadoop
+
+ # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
+ ## hadoop_conf_dir=/etc/hadoop/conf
+
# Configuration for Yarn
# ------------------------------------------------------------------------
[[yarn_clusters]]
@@ -221,6 +234,18 @@
# Whether to submit jobs to this cluster
## submit_to=False
+ # Settings about this MR2 cluster. If you install MR2 in a
+ # different location, you need to set the following.
+
+ # Defaults to $HADOOP_MR2_HOME or /usr/lib/hadoop-mapreduce
+ ## hadoop_mapred_home=/usr/lib/hadoop-mapreduce
+
+ # Defaults to $HADOOP_BIN or /usr/bin/hadoop
+ ## hadoop_bin=/usr/bin/hadoop
+
+ # Defaults to $HADOOP_CONF_DIR or /etc/hadoop/conf
+ ## hadoop_conf_dir=/etc/hadoop/conf
+
###########################################################################
# Settings to configure Beeswax
@@ -91,21 +91,32 @@ def all_mrclusters():
MR_CACHE[identifier] = _make_mrcluster(identifier)
return MR_CACHE
-def get_cluster_for_job_submission():
+def get_cluster_conf_for_job_submission():
"""
Check the `submit_to' for each MR/Yarn cluster, and return the
- host:port of first one that enables submission.
+ config section of first one that enables submission.
"""
for name in conf.YARN_CLUSTERS.keys():
yarn = conf.YARN_CLUSTERS[name]
if yarn.SUBMIT_TO.get():
- return "%s:%s" % (yarn.RM_HOST.get(), yarn.RM_PORT.get())
+ return yarn
for name in conf.MR_CLUSTERS.keys():
mr = conf.MR_CLUSTERS[name]
if mr.SUBMIT_TO.get():
- return "%s:%s" % (mr.JT_HOST.get(), mr.JT_PORT.get())
+ return mr
return None
+def get_cluster_addr_for_job_submission():
+ """
+ Check the `submit_to' for each MR/Yarn cluster, and return the
+ host:port of first one that enables submission.
+ """
+ conf = get_cluster_conf_for_job_submission()
+ if conf is None:
+ return None
+ return "%s:%s" % (conf.HOST.get(), conf.PORT.get())
+
+
def clear_caches():
"""
Clears cluster's internal caches. Returns
Oops, something went wrong.

0 comments on commit c7e29b5

Please sign in to comment.