From 78b996f0f4d7e7802695cf2d275f03ad840b1252 Mon Sep 17 00:00:00 2001 From: Mike Walch Date: Thu, 2 Feb 2017 12:15:17 -0500 Subject: [PATCH 1/2] ACCUMULO-4528 Accumulo scripts improvements * Accumulo now determines hostname in Java * Minimized use of environment variables * Consolidated scripts by moving service.sh and cluster.sh into accumulo-service and accumulo-cluster. * Removed accumulo-watcher script as restarting services should not be handled by Accumulo scripts * NUMA and multiple tservers are no longer configured in scripts but could be NUMA commands could be added using ACCUMULO_WRAP_CMD env variable. --- assemble/bin/accumulo | 78 ++--- assemble/bin/accumulo-cluster | 296 +++++++++++++++- assemble/bin/accumulo-service | 85 ++++- assemble/contrib/bootstrap-hdfs.sh | 17 +- assemble/contrib/gen-monitor-cert.sh | 7 +- assemble/contrib/tool.sh | 2 +- assemble/libexec/accumulo-watcher.sh | 141 -------- assemble/libexec/cluster.sh | 325 ------------------ assemble/libexec/load-env.sh | 47 +-- assemble/libexec/service.sh | 239 ------------- assemble/libexec/templates/accumulo-env.sh | 41 +-- .../main/asciidoc/chapters/administration.txt | 64 +--- docs/src/main/asciidoc/chapters/kerberos.txt | 14 +- docs/src/main/asciidoc/chapters/ssl.txt | 4 +- .../asciidoc/chapters/troubleshooting.txt | 4 +- 15 files changed, 454 insertions(+), 910 deletions(-) delete mode 100755 assemble/libexec/accumulo-watcher.sh delete mode 100755 assemble/libexec/cluster.sh delete mode 100755 assemble/libexec/service.sh diff --git a/assemble/bin/accumulo b/assemble/bin/accumulo index f72595226fc..1ca693e9fb1 100755 --- a/assemble/bin/accumulo +++ b/assemble/bin/accumulo @@ -75,8 +75,8 @@ EOF } function create_config() { - TEMPLATE_CONF_DIR="$basedir/libexec/templates" - CONF_DIR="${ACCUMULO_CONF_DIR:-$basedir/conf}" + TEMPLATE_CONF_DIR="${basedir}/libexec/templates" + CONF_DIR="${basedir}/conf" ACCUMULO_SITE=accumulo-site.xml ACCUMULO_ENV=accumulo-env.sh @@ -455,10 +455,13 @@ function main() { basedir=$( cd -P "${bin}"/.. && pwd ) # Stop: Resolve Script Directory - if [[ "$1" == "create-config" ]]; then + cmd="$1" + conf="${basedir}/conf" + + if [[ "$cmd" == "create-config" ]]; then create_config "${@:2}" exit 0 - elif [[ "$1" == "build-native" ]]; then + elif [[ "$cmd" == "build-native" ]]; then build_native "${@:2}" exit 0 fi @@ -467,13 +470,13 @@ function main() { # ACCUMULO_XTRAJARS is where all of the commandline -add items go into for reading by accumulo. # It also holds the JAR run with the jar command and, if possible, any items in the JAR manifest's Class-Path. - if [[ "$1" = "-add" ]]; then + if [[ "$cmd" = "-add" ]]; then export ACCUMULO_XTRAJARS="$2" shift 2 else export ACCUMULO_XTRAJARS="" fi - if [[ "$1" = "jar" && -f "$2" ]]; then + if [[ "$cmd" = "jar" && -f "$2" ]]; then if [[ $2 =~ ^/ ]]; then jardir="$(dirname "$2")" jarfile="$2" @@ -495,48 +498,48 @@ function main() { fi # Set up -D switches for JAAS and Kerberos if files exist - if [[ -f ${ACCUMULO_JAAS_CONF} ]]; then - ACCUMULO_GENERAL_OPTS="${ACCUMULO_GENERAL_OPTS} -Djava.security.auth.login.config=${ACCUMULO_JAAS_CONF}" + jaas_conf="${conf}/jaas.conf" + if [ -f $jaas_conf ]; then + ACCUMULO_GENERAL_OPTS="${ACCUMULO_GENERAL_OPTS} -Djava.security.auth.login.config=${jaas_conf}" fi - if [[ -f ${ACCUMULO_KRB5_CONF} ]]; then - ACCUMULO_GENERAL_OPTS="${ACCUMULO_GENERAL_OPTS} -Djava.security.krb5.conf=${ACCUMULO_KRB5_CONF}" + krb5_conf="${conf}/krb5.conf" + if [ -f $krb5_conf ]; then + ACCUMULO_GENERAL_OPTS="${ACCUMULO_GENERAL_OPTS} -Djava.security.krb5.conf=${krb5_conf}" fi # Add appropriate options for process type - case "$1" in + case "$cmd" in master) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_MASTER_OPTS}" ;; gc) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_GC_OPTS}" ;; - tserver*) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_TSERVER_OPTS}" ;; + tserver) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_TSERVER_OPTS}" ;; monitor) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_MONITOR_OPTS}" ;; shell) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_SHELL_OPTS}" ;; *) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_OTHER_OPTS}" ;; esac - LOG4J_JAR=$(find -H "${HADOOP_PREFIX}/lib" "${HADOOP_PREFIX}"/share/hadoop/common/lib -name 'log4j*.jar' -print 2>/dev/null | head -1) - SLF4J_JARS="${ACCUMULO_LIB_DIR}/slf4j-api.jar:${ACCUMULO_LIB_DIR}/slf4j-log4j12.jar" + log4j_jar=$(find -H "${HADOOP_PREFIX}/lib" "${HADOOP_PREFIX}"/share/hadoop/common/lib -name 'log4j*.jar' -print 2>/dev/null | head -1) + lib="${basedir}/lib" + slf4j_jars="${lib}/slf4j-api.jar:${lib}/slf4j-log4j12.jar" # The `find` command could fail for environmental reasons or bad configuration # Avoid trying to run Accumulo when we can't find the jar - if [[ -z "${LOG4J_JAR}" && -z "${CLASSPATH}" ]]; then + if [[ -z "${log4j_jar}" && -z "${CLASSPATH}" ]]; then echo "Could not locate Log4j jar in Hadoop installation at ${HADOOP_PREFIX}" exit 1 fi - if [[ ! "$1" =~ ^(gc|master|monitor|tserver|tracer)$ ]]; then - if [[ -f ${ACCUMULO_CONF_DIR}/log4j.properties ]]; then - export ACCUMULO_OPTS="${ACCUMULO_OPTS} -Dlog4j.configuration=file:${ACCUMULO_CONF_DIR}/log4j.properties" + if [[ ! "$cmd" =~ ^(gc|master|monitor|tserver|tracer)$ ]]; then + if [[ -f ${conf}/log4j.properties ]]; then + export ACCUMULO_OPTS="${ACCUMULO_OPTS} -Dlog4j.configuration=file:${conf}/log4j.properties" else - export ACCUMULO_OPTS="${ACCUMULO_OPTS} -Dlog4j.configuration=file:${ACCUMULO_CONF_DIR}/examples/log4j.properties" + export ACCUMULO_OPTS="${ACCUMULO_OPTS} -Dlog4j.configuration=file:${conf}/examples/log4j.properties" fi fi - CLASSPATH="${ACCUMULO_LIB_DIR}/accumulo-start.jar:${ACCUMULO_CONF_DIR}:${SLF4J_JARS}:${LOG4J_JAR}:${CLASSPATH}" + CLASSPATH="${lib}/accumulo-start.jar:${conf}:${slf4j_jars}:${log4j_jar}:${CLASSPATH}" - # This is default for hadoop 2.x; - # for another distribution, specify (DY)LD_LIBRARY_PATH - # explicitly in ${ACCUMULO_CONF_DIR}/accumulo-env.sh - # usually something like: - # ${HADOOP_PREFIX}/lib/native/${PLATFORM} + # This is default for hadoop 2.x; for another distribution, specify (DY)LD_LIBRARY_PATH explicitly in + # accumulo-env.sh usually something like: ${HADOOP_PREFIX}/lib/native/${PLATFORM} if [ -e "${HADOOP_PREFIX}/lib/native/libhadoop.so" ]; then LIB_PATH="${HADOOP_PREFIX}/lib/native" LD_LIBRARY_PATH="${LIB_PATH}:${LD_LIBRARY_PATH}" # For Linux @@ -547,30 +550,13 @@ function main() { # This makes them available to java export JAVA_HOME HADOOP_PREFIX ZOOKEEPER_HOME LD_LIBRARY_PATH DYLD_LIBRARY_PATH - # Strip the instance from $1 - APP=$1 - # Avoid setting an instance unless it's necessary to ensure consistency in filenames - INSTANCE="" - # Avoid setting a pointless system property - INSTANCE_OPT="" - if [[ "$1" =~ ^tserver-[1-9][0-9]*$ ]]; then - APP="$(echo "$1" | cut -d'-' -f1)" - # Appending the trailing underscore to make single-tserver deploys look how they did - INSTANCE="$(echo "$1" | cut -d'-' -f2)_" - - #Rewrite the input arguments - set -- "$APP" "${@:2}" - - # The extra system property we'll pass to the java cmd - INSTANCE_OPT="-Daccumulo.service.instance=${INSTANCE}" + JAVA=${JAVA:-${JAVA_HOME}/bin/java} + if [ -n "$ACCUMULO_WRAP_CMD" ]; then + JAVA="$ACCUMULO_WRAP_CMD $JAVA" fi - # app isn't used anywhere, but it makes the process easier to spot when ps/top/snmp truncate the command line - JAVA="${JAVA_HOME}/bin/java" - ACCUMULO_OPTS_ARRAY=(${ACCUMULO_OPTS}) - exec "$JAVA" "-Dapp=$1" \ - $INSTANCE_OPT \ + exec "$JAVA" "-Dapp=$cmd" \ "${ACCUMULO_OPTS_ARRAY[@]}" \ -classpath "${CLASSPATH}" \ -XX:OnOutOfMemoryError="${ACCUMULO_KILL_CMD:-kill -9 %p}" \ diff --git a/assemble/bin/accumulo-cluster b/assemble/bin/accumulo-cluster index a8c0362aa44..f1e7082e88d 100755 --- a/assemble/bin/accumulo-cluster +++ b/assemble/bin/accumulo-cluster @@ -20,10 +20,14 @@ function print_usage { Usage: accumulo-cluster ( ...) Commands: - create-config Creates cluster config - start Starts Accumulo cluster - stop Stops Accumulo cluster - + create-config Creates cluster config + start Starts Accumulo cluster + stop Stops Accumulo cluster + start-non-tservers Starts all services except tservers + start-tservers Starts all tservers on cluster + stop-tservers Stops all tservers on cluster + start-here Starts all services on this node + stop-here Stops all services on this node EOF exit 1 } @@ -58,6 +62,250 @@ function copy_file { fi } +function verify_config { + if [[ -f $conf/slaves ]]; then + echo "ERROR: A 'slaves' file was found in $conf/" + echo "Accumulo now reads tablet server hosts from 'tservers' and requires that the 'slaves' file not be present to reduce confusion." + echo "Please rename the 'slaves' file to 'tservers' or remove it if both exist." + exit 1 + fi + + if [[ ! -f $conf/tservers ]]; then + echo "ERROR: A 'tservers' file was not found at $conf/tservers" + echo "Please make sure it exists and is configured with tablet server hosts." + exit 1 + fi + + unset master1 + if [[ -f "$conf/masters" ]]; then + master1=$(egrep -v '(^#|^\s*$)' "$conf/masters" | head -1) + fi + + if [[ -z "${monitor}" ]] ; then + monitor=$master1 + if [[ -f "$conf/monitor" ]]; then + monitor=$(egrep -v '(^#|^\s*$)' "$conf/monitor" | head -1) + fi + if [[ -z "${monitor}" ]] ; then + echo "Could not infer a Monitor role. You need to either define \"${conf}/monitor\"," + echo "or make sure \"${conf}/masters\" is non-empty." + exit 1 + fi + fi + if [[ ! -f "$conf/tracers" ]]; then + if [[ -z "${master1}" ]] ; then + echo "Could not find a master node to use as a default for the tracer role." + echo "Either set up \"${conf}/tracers\" or make sure \"${conf}/masters\" is non-empty." + exit 1 + else + echo "$master1" > "$conf/tracers" + fi + fi + if [[ ! -f "$conf/gc" ]]; then + if [[ -z "${master1}" ]] ; then + echo "Could not infer a GC role. You need to either set up \"${conf}/gc\" or make sure \"${conf}/masters\" is non-empty." + exit 1 + else + echo "$master1" > "$conf/gc" + fi + fi +} + +function get_ip() { + ip_addr=$(ip addr | grep 'state UP' -A2 | tail -n1 | awk '{print $2}' | cut -f1 -d'/') + if [[ $? != 0 ]]; then + ip_addr=$(getent ahosts "$(hostname -f)" | grep DGRAM | cut -f 1 -d ' ') + fi + echo "$ip_addr" +} + +function start_service() { + host="$1" + service="$2" + + if [[ $host == "localhost" || $host == $(hostname -f) || $host == $(hostname -s) || $host == $(get_ip) ]]; then + "${bin}/accumulo-service" "$service" start + else + $SSH "$host" "bash -c '${bin}/accumulo-service \"$service\" start'" + fi +} + +function start_tservers() { + echo -n "Starting tablet servers ..." + count=1 + for server in $(egrep -v '(^#|^\s*$)' "${conf}/tservers"); do + echo -n "." + start_service "$server" tserver & + if (( ++count % 72 == 0 )) ; + then + echo + wait + fi + done + echo " done" +} + +function start_all() { + unset DISPLAY + + start_service "$monitor" monitor + + if [ "$1" != "--no-tservers" ]; then + start_tservers + fi + + for host in $(egrep -v '(^#|^\s*$)' "$conf/masters"); do + start_service "$host" master + done + + for host in $(egrep -v '(^#|^\s*$)' "$conf/gc"); do + start_service "$host" gc + done + + for host in $(egrep -v '(^#|^\s*$)' "$conf/tracers"); do + start_service "$host" tracer + done +} + +function start_here() { + + local_hosts="$(hostname -a 2> /dev/null) $(hostname) localhost 127.0.0.1 $(get_ip)" + for host in $local_hosts; do + if grep -q "^${host}\$" "$conf/tservers"; then + start_service "$host" tserver + break + fi + done + + for host in $local_hosts; do + if grep -q "^${host}\$" "$conf/masters"; then + start_service "$host" master + break + fi + done + + for host in $local_hosts; do + if grep -q "^${host}\$" "$conf/gc"; then + start_service "$host" gc + break + fi + done + + for host in $local_hosts; do + if [ $host == "$monitor" ]; then + start_service "$host" monitor + break + fi + done + + for host in $local_hosts; do + if grep -q "^${host}\$" "$conf/tracers"; then + start_service "$host" tracer + break + fi + done +} + +function end_service() { + host="$1" + service="$2" + end_cmd="$3" + if [[ $host == localhost || $host = "$(hostname -s)" || $host = "$(hostname -f)" || $host = $(get_ip) ]] ; then + "${bin}/accumulo-service" "$service" "$end_cmd" + else + $SSH "$host" "bash -c '${bin}/accumulo-service \"$service\" \"$end_cmd\"'" + fi +} + +function stop_service() { + end_service "$1" "$2" "stop" +} + +function kill_service() { + end_service "$1" "$2" "kill" +} + +function stop_tservers() { + tserver_hosts=$(egrep -v '(^#|^\s*$)' "${conf}/tservers") + + echo "Stopping unresponsive tablet servers (if any)..." + for host in ${tserver_hosts}; do + stop_service "$host" tserver & + done + + sleep 10 + + echo "Stopping unresponsive tablet servers hard (if any)..." + for host in ${tserver_hosts}; do + kill_service "$host" tserver & + done + + echo "Cleaning tablet server entries from zookeeper" + ${accumulo_cmd} org.apache.accumulo.server.util.ZooZap -tservers +} + +function stop_all() { + echo "Stopping accumulo services..." + if ! ${accumulo_cmd} admin stopAll + then + echo "Invalid password or unable to connect to the master" + echo "Initiating forced shutdown in 15 seconds (Ctrl-C to abort)" + sleep 10 + echo "Initiating forced shutdown in 5 seconds (Ctrl-C to abort)" + else + echo "Accumulo shut down cleanly" + echo "Utilities and unresponsive servers will shut down in 5 seconds (Ctrl-C to abort)" + fi + + sleep 5 + + # Look for processes not killed by 'admin stopAll' + for end_cmd in "stop" "kill" ; do + for master in $(grep -v '^#' "$conf/masters"); do + end_service "$master" master $end_cmd + done + + for gc in $(grep -v '^#' "$conf/gc"); do + end_service "$gc" gc $end_cmd + done + + end_service "$monitor" monitor $end_cmd + + for tracer in $(egrep -v '(^#|^\s*$)' "$conf/tracers"); do + end_service "$tracer" tracer $end_cmd + done + done + + # stop tserver still running + stop_tservers + + echo "Cleaning all server entries in ZooKeeper" + ${accumulo_cmd} org.apache.accumulo.server.util.ZooZap -master -tservers -tracers --site-file "${conf}/accumulo-site.xml" +} + +function stop_here() { + # Determine hostname without errors to user + hosts_to_check=($(hostname -a 2> /dev/null | head -1) $(hostname -f)) + + if egrep -q localhost\|127.0.0.1 "${conf}/tservers"; then + ${accumulo_cmd} admin stop localhost + else + for host in "${hosts_to_check[@]}"; do + if grep -q "$host" "$conf"/tservers; then + ${accumulo_cmd} admin stop "$host" + fi + done + fi + + for host in "${hosts_to_check[@]}"; do + for end_cmd in "stop" "kill"; do + for svc in tserver gc master monitor tracer; do + end_service "$host" $svc $end_cmd + done + done + done +} + function main() { if [[ -z $1 ]]; then @@ -76,21 +324,45 @@ function main() { source "$basedir"/libexec/load-env.sh - verify_file "${ACCUMULO_LIBEXEC_DIR}/cluster.sh" + accumulo_cmd="${bin}/accumulo" + conf="${basedir}/conf" + SSH='ssh -qnf -o ConnectTimeout=2' case "$1" in create-config) - copy_file "${ACCUMULO_CONF_DIR}/examples/gc" "${ACCUMULO_CONF_DIR}" - copy_file "${ACCUMULO_CONF_DIR}/examples/masters" "${ACCUMULO_CONF_DIR}" - copy_file "${ACCUMULO_CONF_DIR}/examples/monitor" "${ACCUMULO_CONF_DIR}" - copy_file "${ACCUMULO_CONF_DIR}/examples/tservers" "${ACCUMULO_CONF_DIR}" - copy_file "${ACCUMULO_CONF_DIR}/examples/tracers" "${ACCUMULO_CONF_DIR}" + copy_file "${conf}/examples/gc" "${conf}" + copy_file "${conf}/examples/masters" "${conf}" + copy_file "${conf}/examples/monitor" "${conf}" + copy_file "${conf}/examples/tservers" "${conf}" + copy_file "${conf}/examples/tracers" "${conf}" ;; start) - "${ACCUMULO_LIBEXEC_DIR}"/cluster.sh start-all + verify_config + start_all ;; stop) - "${ACCUMULO_LIBEXEC_DIR}"/cluster.sh stop-all + verify_config + stop_all + ;; + start-non-tservers) + verify_config + start_all --no-tservers + ;; + start-tservers) + verify_config + start_tservers + ;; + start-here) + verify_config + start_here + ;; + stop-tservers) + verify_config + stop_tservers + ;; + stop-here) + verify_config + stop_here ;; *) invalid_args "'$1' is an invalid " diff --git a/assemble/bin/accumulo-service b/assemble/bin/accumulo-service index bf01df9f03e..5cb0fe764ef 100755 --- a/assemble/bin/accumulo-service +++ b/assemble/bin/accumulo-service @@ -42,7 +42,10 @@ function invalid_args { } function get_host { - host="$(hostname -s)" + host="$(hostname -f)" + if [ "$1" == "-s" ]; then + host="$(hostname -s)" + fi if [[ -z "$host" ]]; then host=$(ip addr | grep 'state UP' -A2 | tail -n1 | awk '{print $2}' | cut -f1 -d'/') if [[ $? != 0 ]]; then @@ -52,8 +55,75 @@ function get_host { echo "$host" } -function main() { +function rotate_log () { + logfile="$1" + max_retained="5" + if [ -f "$logfile" ]; then + while [[ "$max_retained" -gt 1 ]]; do + prev=$(( max_retained - 1)) + [ -f "$logfile.$prev" ] && mv -f "$logfile.$prev" "$logfile.$max_retained" + max_retained=$prev + done + mv -f "$logfile" "$logfile.$max_retained"; + fi +} + +function start_service() { + host="$(get_host -s)" + + if [ -f "$pid_file" ]; then + pid=$(cat "$pid_file") + if kill -0 "$pid" 2>/dev/null; then + echo "$host : $service already running (${pid})" + exit 0 + fi + fi + echo "Starting $service on $host" + + if [[ $service == "master" ]]; then + "${bin}/accumulo" org.apache.accumulo.master.state.SetGoalState NORMAL + fi + + outfile="${ACCUMULO_LOG_DIR}/${service}_${host}.out" + errfile="${ACCUMULO_LOG_DIR}/${service}_${host}.err" + rotate_log "$outfile" + rotate_log "$errfile" + + extra_args="" + if [[ $service == "monitor" && $ACCUMULO_MONITOR_BIND_ALL == "true" ]]; then + extra_args="--address 0.0.0.0" + fi + nohup "${bin}/accumulo" "$service" $extra_args >"$outfile" 2>"$errfile" < /dev/null & + echo $! > "${pid_file}" + + # Check the max open files limit and selectively warn + max_files_open=$(ulimit -n) + if [[ -n $max_files_open ]] ; then + max_files_recommended=32768 + if (( max_files_open < max_files_recommended )); then + echo "WARN : Max open files on $host is $max_files_open, recommend $max_files_recommended" >&2 + fi + fi +} + +function stop_service() { + if [ -f "$pid_file" ]; then + echo "Stopping $service on $(get_host -s)"; + kill -s TERM "$(cat "$pid_file")" 2>/dev/null + rm -f "${pid_file}" 2>/dev/null + fi +} + +function kill_service() { + if [ -f "$pid_file" ]; then + echo "Killing $service on $(get_host -s)"; + kill -s KILL "$(cat "$pid_file")" 2>/dev/null + rm -f "${pid_file}" 2>/dev/null + fi +} + +function main() { if [[ -z $1 ]]; then invalid_args " cannot be empty" fi @@ -70,7 +140,8 @@ function main() { source "$basedir"/libexec/load-env.sh - service=$1 + service="$1" + pid_file="${basedir}/run/accumulo-${service}.pid" case "$service" in gc|master|monitor|tserver|tracer) if [[ -z $2 ]]; then @@ -78,13 +149,13 @@ function main() { fi case "$2" in start) - "${ACCUMULO_LIBEXEC_DIR}/service.sh" start "$(get_host)" "$1" + start_service ;; stop) - "${ACCUMULO_LIBEXEC_DIR}/service.sh" stop "$(get_host)" "$service" TERM + stop_service ;; kill) - "${ACCUMULO_LIBEXEC_DIR}/service.sh" stop "$(get_host)" "$service" KILL + kill_service ;; *) invalid_args "'$2' is an invalid " @@ -92,7 +163,7 @@ function main() { esac ;; *) - invalid_args "'$1' is an invalid " + invalid_args "'$service' is an invalid " ;; esac } diff --git a/assemble/contrib/bootstrap-hdfs.sh b/assemble/contrib/bootstrap-hdfs.sh index 26f94f4415d..dea6b4c0cda 100755 --- a/assemble/contrib/bootstrap-hdfs.sh +++ b/assemble/contrib/bootstrap-hdfs.sh @@ -28,10 +28,13 @@ basedir=$( cd -P "${contrib}"/.. && pwd ) source "$basedir"/libexec/load-env.sh +lib="${basedir}/lib" +conf="${basedir}/conf" + # # Find the system context directory in HDFS # -SYSTEM_CONTEXT_HDFS_DIR=$(grep -A1 "general.vfs.classpaths" "$ACCUMULO_CONF_DIR/accumulo-site.xml" | tail -1 | perl -pe 's/\s+//; s/<\/value>//; s/,.+$//; s|[^/]+$||; print $ARGV[1]') +SYSTEM_CONTEXT_HDFS_DIR=$(grep -A1 "general.vfs.classpaths" "$conf/accumulo-site.xml" | tail -1 | perl -pe 's/\s+//; s/<\/value>//; s/,.+$//; s|[^/]+$||; print $ARGV[1]') if [ -z "$SYSTEM_CONTEXT_HDFS_DIR" ] then @@ -63,7 +66,7 @@ fi # # Replicate to all tservers to avoid network contention on startup # -TSERVERS=$ACCUMULO_CONF_DIR/tservers +TSERVERS=${conf}/tservers NUM_TSERVERS=$(egrep -v '(^#|^\s*$)' "$TSERVERS" | wc -l) #let each datanode service around 50 clients @@ -73,19 +76,19 @@ REP=$(( NUM_TSERVERS / 50 )) # # Copy all jars in lib to the system context directory # -"$HADOOP_PREFIX/bin/hadoop" fs -moveFromLocal "$ACCUMULO_LIB_DIR"/*.jar "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null +"$HADOOP_PREFIX/bin/hadoop" fs -moveFromLocal "$lib"/*.jar "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null "$HADOOP_PREFIX/bin/hadoop" fs -setrep -R $REP "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null # # We need some of the jars in lib, copy them back out and remove them from the system context dir # -"$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/commons-vfs2.jar" "$ACCUMULO_LIB_DIR/." > /dev/null +"$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/commons-vfs2.jar" "$lib/." > /dev/null "$HADOOP_PREFIX/bin/hadoop" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/commons-vfs2.jar" > /dev/null -"$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/accumulo-start.jar" "$ACCUMULO_LIB_DIR/." > /dev/null +"$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/accumulo-start.jar" "$lib/." > /dev/null "$HADOOP_PREFIX/bin/hadoop" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/accumulo-start.jar" > /dev/null -"$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/slf4j*.jar" "$ACCUMULO_LIB_DIR/." > /dev/null +"$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/slf4j*.jar" "$lib/." > /dev/null "$HADOOP_PREFIX/bin/hadoop" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/slf4j*.jar" > /dev/null -for f in $(grep -v '^#' "$ACCUMULO_CONF_DIR/tservers") +for f in $(grep -v '^#' "${conf}/tservers") do rsync -ra --delete "$ACCUMULO_HOME" "$(dirname "$ACCUMULO_HOME")" done diff --git a/assemble/contrib/gen-monitor-cert.sh b/assemble/contrib/gen-monitor-cert.sh index e7f313efd46..f6a217645f7 100755 --- a/assemble/contrib/gen-monitor-cert.sh +++ b/assemble/contrib/gen-monitor-cert.sh @@ -26,14 +26,15 @@ contrib=$( cd -P "$( dirname "$SOURCE" )" && pwd ) basedir=$( cd -P "${contrib}"/.. && pwd ) # Stop: Resolve Script Directory +conf="${basdir}/conf" source "$basedir"/libexec/load-env.sh ALIAS="default" KEYPASS=$(LC_CTYPE=C tr -dc '#-~' < /dev/urandom | tr -d '<>&' | head -c 20) STOREPASS=$(LC_CTYPE=C tr -dc '#-~' < /dev/urandom | tr -d '<>&' | head -c 20) -KEYSTOREPATH="$ACCUMULO_CONF_DIR/keystore.jks" -TRUSTSTOREPATH="$ACCUMULO_CONF_DIR/conf/cacerts.jks" -CERTPATH="$ACCUMULO_CONF_DIR/server.cer" +KEYSTOREPATH="${conf}/keystore.jks" +TRUSTSTOREPATH="${conf}/conf/cacerts.jks" +CERTPATH="${conf}/server.cer" if [[ -e "$KEYSTOREPATH" ]]; then rm -i "$KEYSTOREPATH" diff --git a/assemble/contrib/tool.sh b/assemble/contrib/tool.sh index cb8cedce31b..0b0e66617a7 100755 --- a/assemble/contrib/tool.sh +++ b/assemble/contrib/tool.sh @@ -44,7 +44,7 @@ if [[ $(eval "$ZOOKEEPER_CMD" | wc -l) -ne 1 ]] ; then fi ZOOKEEPER_LIB=$(eval "$ZOOKEEPER_CMD") -LIB="$ACCUMULO_LIB_DIR" +LIB="${basedir}/lib" CORE_LIB="$LIB/accumulo-core.jar" FATE_LIB="$LIB/accumulo-fate.jar" THRIFT_LIB="$LIB/libthrift.jar" diff --git a/assemble/libexec/accumulo-watcher.sh b/assemble/libexec/accumulo-watcher.sh deleted file mode 100755 index 1731cdddd0b..00000000000 --- a/assemble/libexec/accumulo-watcher.sh +++ /dev/null @@ -1,141 +0,0 @@ -#! /usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -LOGHOST=$1 -shift -process=$1 - -SOURCE="${BASH_SOURCE[0]}" -while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink - libexec="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$libexec/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located -done -libexec="$( cd -P "$( dirname "$SOURCE" )" && pwd )" -# Stop: Resolve Script Directory - -source "${libexec}"/load-env.sh - -# Setting for watcher -UNEXPECTED_TIMESPAN=${UNEXPECTED_TIMESPAN:-3600} -UNEXPECTED_RETRIES=${UNEXPECTED_RETRIES:-2} -OOM_TIMESPAN=${OOM_TIMESPAN-3600} -OOM_RETRIES=${OOM_RETRIES-5} -ZKLOCK_TIMESPAN=${ZKLOCK_TIMESPAN-600} -ZKLOCK_RETRIES=${ZKLOCK_RETRIES-5} - -CLEAN_EXIT="Clean Exit" -UNEXPECTED_EXCEPTION="Unexpected exception" -OOM_EXCEPTION="Out of memory exception" -ZKLOCK_LOST="ZKLock lost" -UNKNOWN_ERROR="Unknown error" - -ERRFILE=${ACCUMULO_LOG_DIR}/${process}_${LOGHOST}.err -OUTFILE=${ACCUMULO_LOG_DIR}/${process}_${LOGHOST}.out -DEBUGLOG=${ACCUMULO_LOG_DIR}/${process}_$(hostname).debug.log -COMMAND="${ACCUMULO_BIN_DIR}/accumulo \"\$@\"" - -logger -s "starting process $process at $(date)" -stopRunning="" -while [ -z "$stopRunning" ]; -do - eval "$COMMAND" 2> "$ERRFILE" - exit=$? - unset cause - if [ "$exit" -eq 0 ]; then - potentialStopRunning=$CLEAN_EXIT - elif [ "$exit" -eq 1 ]; then - potentialStopRunning=$UNEXPECTED_EXCEPTION - elif [ "$exit" -eq 130 ]; then - stopRunning="Control C detected, exiting" - elif [ "$exit" -eq 143 ]; then - stopRunning="Process terminated, exiting" - elif [ "$exit" -eq 137 ]; then - potentialStopRunning="Process killed, exiting" - fi - if [ -z "$stopRunning" ]; then - stopRunning=$potentialStopRunning; - - if [ $exit -eq 1 ]; then - source="exit code" - cause=$UNEXPECTED_EXCEPTION - elif tail -n50 "$OUTFILE" | grep "java.lang.OutOfMemoryError:" > /dev/null; then - source="logs" - cause=$OOM_EXCEPTION - elif [ "$process" = "tserver" ]; then - if tail -n50 "$DEBUGLOG" | grep "ERROR: Lost tablet server lock (reason =" > /dev/null ; then - source="logs" - cause=$ZKLOCK_LOST - fi - elif [ "$process" = "master" ]; then - if tail -n50 "$DEBUGLOG" | grep "ERROR: Master lock in zookeeper lost (reason =" > /dev/null ; then - source="logs" - cause=$ZKLOCK_LOST - fi - elif [ "$process" = "gc" ]; then - if tail -n50 "$DEBUGLOG" | grep "FATAL: GC lock in zookeeper lost (reason =" > /dev/null ; then - source="logs" - cause=$ZKLOCK_LOST - fi - elif [ "$process" = "monitor" ]; then - if tail -n50 "$DEBUGLOG" | grep "ERROR: Monitor lock in zookeeper lost (reason =" > /dev/null ; then - source="logs" - cause=$ZKLOCK_LOST - fi - elif [ $exit -ne 0 ]; then - source="exit code" - cause=$UNKNOWN_ERROR - fi - case $cause in - #Unknown exit code - "$UNKNOWN_ERROR") - #window doesn't matter when retries = 0 - RETRIES=0 - ;; - - "$UNEXPECTED_EXCEPTION") - WINDOW=$UNEXPECTED_TIMESPAN - RETRIES=$UNEXPECTED_RETRIES - ;; - - "$OOM_EXCEPTION") - WINDOW=$OOM_TIMESPAN - RETRIES=$OOM_RETRIES - ;; - - "$ZKLOCK_LOST") - WINDOW=$ZKLOCK_TIMESPAN - RETRIES=$ZKLOCK_RETRIES - ;; - esac - - if [ -n "$cause" ]; then - stopRunning="" - declare -i attempts - attempts="$(jobs | grep -c "reason$cause")+1" - if [ "$RETRIES" -le "$attempts" ]; then - stopRunning="$process encountered $cause in $source with exit code $exit- quitting ($attempts/$RETRIES in $WINDOW seconds)" - # kill all sleeps now - for list in $(jobs | cut -b 2-2); do kill %"$list"; done - else - logger -s "$process encountered $cause in $source with exit code $exit- retrying ($attempts/$RETRIES in $WINDOW seconds)" - eval "(sleep $WINDOW ; echo 'reason$cause' >> /dev/null) &" - fi - fi -fi -done -logger -s "$stopRunning" diff --git a/assemble/libexec/cluster.sh b/assemble/libexec/cluster.sh deleted file mode 100755 index 3ee42c40e30..00000000000 --- a/assemble/libexec/cluster.sh +++ /dev/null @@ -1,325 +0,0 @@ -#! /usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -function print_usage { - cat < ( ...) - -Commands: - start-all [--notTservers] Starts all services on cluster - start-tservers Starts all tservers on cluster - start-here Starts all services on this node - start-service Starts on - stop-all Stops all services on cluster - stop-tservers Stops all tservers on cluster - stop-here Stops all services on this node - -EOF - exit 1 -} - -function invalid_args { - echo -e "Invalid arguments: $1\n" - print_usage 1>&2 - exit 1 -} - -function get_ip() { - ip_addr=$(ip addr | grep 'state UP' -A2 | tail -n1 | awk '{print $2}' | cut -f1 -d'/') - if [[ $? != 0 ]]; then - ip_addr=$(getent ahosts "$(hostname -f)" | grep DGRAM | cut -f 1 -d ' ') - fi - echo "$ip_addr" -} - -function start_service() { - host="$1" - service="$2" - - if [[ $host == "localhost" || $host == $(hostname -f) || $host == $(hostname -s) || $host == $(get_ip) ]]; then - "$libexec/service.sh" start "$host" "$service" - else - $SSH "$host" "bash -c 'ACCUMULO_CONF_DIR=${ACCUMULO_CONF_DIR} $libexec/service.sh start \"$host\" \"$service\"'" - fi -} - -function start_tservers() { - echo -n "Starting tablet servers ..." - count=1 - for server in $(egrep -v '(^#|^\s*$)' "${ACCUMULO_CONF_DIR}/tservers"); do - echo -n "." - start_service "$server" tserver & - if (( ++count % 72 == 0 )) ; - then - echo - wait - fi - done - echo " done" -} - -function start_all() { - unset DISPLAY - - start_service "$monitor" monitor - - if [ "$1" != "--notTservers" ]; then - start_tservers - fi - - for master in $(egrep -v '(^#|^\s*$)' "$ACCUMULO_CONF_DIR/masters"); do - start_service "$master" master - done - - for gc in $(egrep -v '(^#|^\s*$)' "$ACCUMULO_CONF_DIR/gc"); do - start_service "$gc" gc - done - - for tracer in $(egrep -v '(^#|^\s*$)' "$ACCUMULO_CONF_DIR/tracers"); do - start_service "$tracer" tracer - done -} - -function start_here() { - - local_hosts="$(hostname -a 2> /dev/null) $(hostname) localhost 127.0.0.1 $(get_ip)" - for host in $local_hosts; do - if grep -q "^${host}\$" "$ACCUMULO_CONF_DIR/tservers"; then - start_service "$host" tserver - break - fi - done - - for host in $local_hosts; do - if grep -q "^${host}\$" "$ACCUMULO_CONF_DIR/masters"; then - start_service "$host" master - break - fi - done - - for host in $local_hosts; do - if grep -q "^${host}\$" "$ACCUMULO_CONF_DIR/gc"; then - start_service "$host" gc - break - fi - done - - for host in $local_hosts; do - if [[ $host == "$monitor" ]]; then - start_service "$monitor" monitor - break - fi - done - - for host in $local_hosts; do - if grep -q "^${host}\$" "$ACCUMULO_CONF_DIR/tracers"; then - start_service "$host" tracer - break - fi - done -} - -function stop_service() { - host="$1" - service="$2" - signal="$3" - - # only stop if there's not one already running - if [[ $host == localhost || $host = "$(hostname -s)" || $host = "$(hostname -f)" || $host = $(get_ip) ]] ; then - "$libexec/service.sh" stop "$host" "$service" "$signal" - else - $SSH "$host" "bash -c '$libexec/service.sh stop \"$host\" \"$service\" \"$signal\"'" - fi -} - -function stop_tservers() { - tserver_hosts=$(egrep -v '(^#|^\s*$)' "${ACCUMULO_CONF_DIR}/tservers") - - echo "Stopping unresponsive tablet servers (if any)..." - for server in ${tserver_hosts}; do - # only start if there's not one already running - stop_service "$server" tserver TERM & - done - - sleep 10 - - echo "Stopping unresponsive tablet servers hard (if any)..." - for server in ${tserver_hosts}; do - # only start if there's not one already running - stop_service "$server" tserver KILL & - done - - echo "Cleaning tablet server entries from zookeeper" - ${accumulo_cmd} org.apache.accumulo.server.util.ZooZap -tservers -} - -function stop_all() { - echo "Stopping accumulo services..." - if ! ${accumulo_cmd} admin stopAll - then - echo "Invalid password or unable to connect to the master" - echo "Initiating forced shutdown in 15 seconds (Ctrl-C to abort)" - sleep 10 - echo "Initiating forced shutdown in 5 seconds (Ctrl-C to abort)" - else - echo "Accumulo shut down cleanly" - echo "Utilities and unresponsive servers will shut down in 5 seconds (Ctrl-C to abort)" - fi - - sleep 5 - - #look for master and gc processes not killed by 'admin stopAll' - for signal in TERM KILL ; do - for master in $(grep -v '^#' "$ACCUMULO_CONF_DIR/masters"); do - stop_service "$master" master $signal - done - - for gc in $(grep -v '^#' "$ACCUMULO_CONF_DIR/gc"); do - stop_service "$gc" gc $signal - done - - stop_service "$monitor" monitor $signal - - for tracer in $(egrep -v '(^#|^\s*$)' "$ACCUMULO_CONF_DIR/tracers"); do - stop_service "$tracer" tracer $signal - done - done - - # stop tserver still running - stop_tservers - - echo "Cleaning all server entries in ZooKeeper" - ${accumulo_cmd} org.apache.accumulo.server.util.ZooZap -master -tservers -tracers --site-file "$ACCUMULO_CONF_DIR/accumulo-site.xml" -} - -function stop_here() { - # Determine hostname without errors to user - hosts_to_check=($(hostname -a 2> /dev/null | head -1) $(hostname -f)) - - if egrep -q localhost\|127.0.0.1 "$ACCUMULO_CONF_DIR/tservers"; then - ${accumulo_cmd} admin stop localhost - else - for host in "${hosts_to_check[@]}"; do - if grep -q "$host" "$ACCUMULO_CONF_DIR"/tservers; then - ${accumulo_cmd} admin stop "$host" - fi - done - fi - - for host in "${hosts_to_check[@]}"; do - for signal in TERM KILL; do - for svc in tserver gc master monitor tracer; do - stop_service "$host" $svc $signal - done - done - done -} - -function main() { - # Start: Resolve Script Directory - SOURCE="${BASH_SOURCE[0]}" - while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink - libexec="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$libexec/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located - done - libexec="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - # Stop: Resolve Script Directory - - source "$libexec"/load-env.sh - - if [[ -f $ACCUMULO_CONF_DIR/slaves ]]; then - echo "ERROR: A 'slaves' file was found in $ACCUMULO_CONF_DIR/" - echo "Accumulo now reads tablet server hosts from 'tservers' and requires that the 'slaves' file not be present to reduce confusion." - echo "Please rename the 'slaves' file to 'tservers' or remove it if both exist." - exit 1 - fi - - if [[ ! -f $ACCUMULO_CONF_DIR/tservers ]]; then - echo "ERROR: A 'tservers' file was not found at $ACCUMULO_CONF_DIR/tservers" - echo "Please make sure it exists and is configured with tablet server hosts." - exit 1 - fi - - unset master1 - if [[ -f "$ACCUMULO_CONF_DIR/masters" ]]; then - master1=$(egrep -v '(^#|^\s*$)' "$ACCUMULO_CONF_DIR/masters" | head -1) - fi - - if [[ -z "${monitor}" ]] ; then - monitor=$master1 - if [[ -f "$ACCUMULO_CONF_DIR/monitor" ]]; then - monitor=$(egrep -v '(^#|^\s*$)' "$ACCUMULO_CONF_DIR/monitor" | head -1) - fi - if [[ -z "${monitor}" ]] ; then - echo "Could not infer a Monitor role. You need to either define \"${ACCUMULO_CONF_DIR}/monitor\"," - echo "or make sure \"${ACCUMULO_CONF_DIR}/masters\" is non-empty." - exit 1 - fi - fi - if [[ ! -f "$ACCUMULO_CONF_DIR/tracers" ]]; then - if [[ -z "${master1}" ]] ; then - echo "Could not find a master node to use as a default for the tracer role." - echo "Either set up \"${ACCUMULO_CONF_DIR}/tracers\" or make sure \"${ACCUMULO_CONF_DIR}/masters\" is non-empty." - exit 1 - else - echo "$master1" > "$ACCUMULO_CONF_DIR/tracers" - fi - fi - - if [[ ! -f "$ACCUMULO_CONF_DIR/gc" ]]; then - if [[ -z "${master1}" ]] ; then - echo "Could not infer a GC role. You need to either set up \"${ACCUMULO_CONF_DIR}/gc\" or make sure \"${ACCUMULO_CONF_DIR}/masters\" is non-empty." - exit 1 - else - echo "$master1" > "$ACCUMULO_CONF_DIR/gc" - fi - fi - accumulo_cmd="$ACCUMULO_BIN_DIR/accumulo" - - SSH='ssh -qnf -o ConnectTimeout=2' - - if [[ -z $1 ]]; then - invalid_args " cannot be empty" - fi - - case "$1" in - start-all) - start_all "${*:2}" - ;; - start-tservers) - start_tservers - ;; - start-here) - start_here - ;; - stop-all) - stop_all - ;; - stop-tservers) - stop_tservers - ;; - stop-here) - stop_here - ;; - *) - invalid_args "'$1' is an invalid " - ;; - esac -} - -main "$@" diff --git a/assemble/libexec/load-env.sh b/assemble/libexec/load-env.sh index 2cc431e48b2..40110cbf8ea 100755 --- a/assemble/libexec/load-env.sh +++ b/assemble/libexec/load-env.sh @@ -35,7 +35,7 @@ locationByProgram() { RESULT=$( which "$1" ) if [[ "$?" != 0 && -z "${RESULT}" ]]; then - echo "Cannot find '$1' and '$2' is not set in $ACCUMULO_CONF_DIR/accumulo-env.sh" + echo "Cannot find '$1' and '$2' is not set in $conf/accumulo-env.sh" exit 1 fi while [ -h "${RESULT}" ]; do # resolve $RESULT until the file is no longer a symlink @@ -58,22 +58,21 @@ while [ -h "$SOURCE" ]; do done libexec="$( cd -P "$( dirname "$SOURCE" )" && pwd )" basedir=$( cd -P "${libexec}"/.. && pwd ) +conf="${basedir}/conf" -export ACCUMULO_CONF_DIR="${ACCUMULO_CONF_DIR:-$basedir/conf}" - -if [[ -z $ACCUMULO_CONF_DIR || ! -d $ACCUMULO_CONF_DIR ]]; then - echo "ACCUMULO_CONF_DIR=$ACCUMULO_CONF_DIR is not a valid directory. Please make sure it exists" +if [[ -z $conf || ! -d $conf ]]; then + echo "$conf is not a valid directory. Please make sure it exists" exit 1 fi -if [[ ! -f $ACCUMULO_CONF_DIR/accumulo-env.sh || ! -f $ACCUMULO_CONF_DIR/accumulo-site.xml ]]; then - echo "The configuration files 'accumulo-env.sh' & 'accumulo-site.xml' must exist in $ACCUMULO_CONF_DIR" - echo "Run 'accumulo create-config' to create them or copy them from $ACCUMULO_CONF_DIR/examples" +if [[ ! -f $conf/accumulo-env.sh || ! -f $conf/accumulo-site.xml ]]; then + echo "The configuration files 'accumulo-env.sh' & 'accumulo-site.xml' must exist in $conf" + echo "Run 'accumulo create-config' to create them or copy them from $conf/examples" echo "Follow the instructions in INSTALL.md to edit them for your environment." exit 1 fi -source "$ACCUMULO_CONF_DIR/accumulo-env.sh" +source "$conf/accumulo-env.sh" ## Variables that must be set @@ -93,30 +92,19 @@ test -z "${HADOOP_PREFIX}" && locationByProgram hadoop HADOOP_PREFIX test -z "${ZOOKEEPER_HOME}" && locationByProgram zkCli.sh ZOOKEEPER_HOME export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/etc/hadoop}" -export ACCUMULO_HOME="${ACCUMULO_HOME:-$basedir}" -export ACCUMULO_BIN_DIR="${ACCUMULO_BIN_DIR:-$basedir/bin}" -export ACCUMULO_CONF_DIR="${ACCUMULO_CONF_DIR:-$basedir/conf}" -export ACCUMULO_LIB_DIR="${ACCUMULO_LIB_DIR:-$basedir/lib}" -export ACCUMULO_LIBEXEC_DIR="${ACCUMULO_LIBEXEC_DIR:-$basedir/libexec}" +export ACCUMULO_HOME="$basedir" +export ACCUMULO_CONF_DIR="$conf" export ACCUMULO_LOG_DIR="${ACCUMULO_LOG_DIR:-$basedir/logs}" -export ACCUMULO_PID_DIR="${ACCUMULO_PID_DIR:-$basedir/run}" # Make directories that may not exist mkdir -p "${ACCUMULO_LOG_DIR}" 2>/dev/null -mkdir -p "${ACCUMULO_PID_DIR}" 2>/dev/null +mkdir -p "${basedir}/run" 2>/dev/null # Verify all directories exist verify_env_dir "JAVA_HOME" "${JAVA_HOME}" verify_env_dir "HADOOP_PREFIX" "${HADOOP_PREFIX}" verify_env_dir "HADOOP_CONF_DIR" "${HADOOP_CONF_DIR}" verify_env_dir "ZOOKEEPER_HOME" "${ZOOKEEPER_HOME}" -verify_env_dir "ACCUMULO_HOME" "${ACCUMULO_HOME}" -verify_env_dir "ACCUMULO_BIN_DIR" "${ACCUMULO_BIN_DIR}" -verify_env_dir "ACCUMULO_CONF_DIR" "${ACCUMULO_CONF_DIR}" -verify_env_dir "ACCUMULO_LIB_DIR" "${ACCUMULO_LIB_DIR}" -verify_env_dir "ACCUMULO_LIBEXEC_DIR" "${ACCUMULO_LIBEXEC_DIR}" -verify_env_dir "ACCUMULO_LOG_DIR" "${ACCUMULO_LOG_DIR}" -verify_env_dir "ACCUMULO_PID_DIR" "${ACCUMULO_PID_DIR}" ## Verify Zookeeper installation ZOOKEEPER_VERSION=$(find -L "$ZOOKEEPER_HOME" -maxdepth 1 -name "zookeeper-[0-9]*.jar" | head -1) @@ -134,19 +122,6 @@ fi ## Variables that have a default export ACCUMULO_KILL_CMD=${ACCUMULO_KILL_CMD:-'kill -9 %p'} export ACCUMULO_MONITOR_BIND_ALL=${ACCUMULO_MONITOR_BIND_ALL:-"true"} -export ACCUMULO_JAAS_CONF=${ACCUMULO_JAAS_CONF:-${ACCUMULO_CONF_DIR}/jaas.conf} -export ACCUMULO_KRB5_CONF=${ACCUMULO_KRB5_CONF:-${ACCUMULO_CONF_DIR}/krb5.conf} -export ACCUMULO_NUM_OUT_FILES=${ACCUMULO_NUM_OUT_FILES:-5} -export ACCUMULO_WATCHER=${ACCUMULO_WATCHER:-"false"} -export ACCUMULO_NUM_TSERVERS=${ACCUMULO_NUM_TSERVERS:-1} -export ACCUMULO_ENABLE_NUMACTL=${ACCUMULO_ENABLE_NUMACTL:-"false"} -export ACCUMULO_NUMACTL_OPTIONS=${ACCUMULO_NUMACTL_OPTIONS:-"--interleave=all"} - -# Validate that ACCUMULO_NUM_TSERVERS is a positive integer -if ! [[ $ACCUMULO_NUM_TSERVERS =~ ^[0-9]+$ ]]; then - echo "ACCUMULO_NUM_TSERVERS, when defined in accumulo-env.sh, should be a positive number, is '$ACCUMULO_NUM_TSERVERS'" - exit 1 -fi export HADOOP_HOME=$HADOOP_PREFIX export HADOOP_HOME_WARN_SUPPRESS=true diff --git a/assemble/libexec/service.sh b/assemble/libexec/service.sh deleted file mode 100755 index 9b47382acf5..00000000000 --- a/assemble/libexec/service.sh +++ /dev/null @@ -1,239 +0,0 @@ -#! /usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -function print_usage { - cat < ( ...) - -Commands: - start Starts on local - stop Stops using on local - -EOF - exit 1 -} - -function invalid_args { - echo -e "Invalid arguments: $1\n" - print_usage 1>&2 - exit 1 -} - -rotate_log () { - logfile=$1; - max_retained=$2; - if [[ ! "$max_retained" =~ ^[0-9]+$ ]] || [[ "$max_retained" -lt 1 ]] ; then - echo "ACCUMULO_NUM_OUT_FILES should be a positive number, but was '$max_retained'" - exit 1 - fi - - if [ -f "$logfile" ]; then # rotate logs - while [[ "$max_retained" -gt 1 ]]; do - prev=$(( max_retained - 1)) - [ -f "$logfile.$prev" ] && mv -f "$logfile.$prev" "$logfile.$max_retained" - max_retained=$prev - done - mv -f "$logfile" "$logfile.$max_retained"; - fi -} - -function start_service() { - - if [[ $# -ne 2 ]]; then - invalid_args "start command expects these arguments: " - fi - - host="$1" - service="$2" - - address=$host - loghost=$host - - # When the hostname provided is the alias/shortname, try to use the FQDN to make - # sure we send the right address to the Accumulo process. - if [[ "$host" = "$(hostname -s)" ]]; then - host="$(hostname -f)" - address="$host" - fi - - if [[ ${service} == "monitor" && ${ACCUMULO_MONITOR_BIND_ALL} == "true" ]]; then - address="0.0.0.0" - fi - - if [[ $service == "master" ]]; then - "$ACCUMULO_BIN_DIR/accumulo" org.apache.accumulo.master.state.SetGoalState NORMAL - fi - - COMMAND="${ACCUMULO_BIN_DIR}/accumulo" - if [ "${ACCUMULO_WATCHER}" = "true" ]; then - COMMAND="${ACCUMULO_LIBEXEC_DIR}/accumulo-watcher.sh ${loghost}" - fi - - OUTFILE="${ACCUMULO_LOG_DIR}/${service}_${loghost}.out" - ERRFILE="${ACCUMULO_LOG_DIR}/${service}_${loghost}.err" - - # Rotate the .out and .err files - rotate_log "$OUTFILE" "${ACCUMULO_NUM_OUT_FILES}" - rotate_log "$ERRFILE" "${ACCUMULO_NUM_OUT_FILES}" - - # NUMA sanity check - if [[ $ACCUMULO_NUM_TSERVERS -eq 1 && -n $TSERVER_NUMA_OPTIONS ]]; then - echo "TSERVER_NUMA_OPTIONS declared when ACCUMULO_NUM_TSERVERS is 1, use ACCUMULO_NUMACTL_OPTIONS instead" - exit 1 - fi - if [[ $ACCUMULO_NUM_TSERVERS -gt 1 && -n $TSERVER_NUMA_OPTIONS && ${#TSERVER_NUMA_OPTIONS[*]} -ne $ACCUMULO_NUM_TSERVERS ]]; then - echo "TSERVER_NUMA_OPTIONS is declared, but not the same size as ACCUMULO_NUM_TSERVERS" - exit 1 - fi - - if [[ "$service" != "tserver" || $ACCUMULO_NUM_TSERVERS -eq 1 ]]; then - # Check the pid file to figure out if its already running. - PID_FILE="${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${service}.pid" - if [[ -f "${PID_FILE}" ]]; then - PID=$(cat "${PID_FILE}") - if kill -0 "$PID" 2>/dev/null; then - # Starting an already-started service shouldn't be an error per LSB - echo "$host : $service already running (${PID})" - exit 0 - fi - fi - echo "Starting $service on $host" - - ACCUMULO_ENABLE_NUMACTL=${ACCUMULO_ENABLE_NUMACTL:-"true"} - ACCUMULO_NUMACTL_OPTIONS=${ACCUMULO_NUMACTL_OPTIONS:-"--interleave=all"} - NUMA=$(which numactl 2>/dev/null) - NUMACTL_EXISTS="$?" - if [[ ( ${NUMACTL_EXISTS} -eq 0 ) && ( ${ACCUMULO_ENABLE_NUMACTL} == "true" ) ]] ; then - export NUMA_CMD="${NUMA} ${ACCUMULO_NUMACTL_OPTIONS}" - else - export NUMA_CMD="" - fi - - # Fork the process, store the pid - nohup ${NUMA_CMD} "$COMMAND" "${service}" --address "${address}" >"$OUTFILE" 2>"$ERRFILE" < /dev/null & - echo $! > "${PID_FILE}" - - else - - S="$service" - for (( t=1; t<=ACCUMULO_NUM_TSERVERS; t++)); do - - service="$S-$t" - - # Check the pid file to figure out if its already running. - PID_FILE="${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${service}.pid" - if [[ -f "${PID_FILE}" ]]; then - PID=$(cat "${PID_FILE}") - if kill -0 "$PID" 2>/dev/null; then - # Starting an already-started service shouldn't be an error per LSB - echo "$host : $service already running (${PID})" - continue - fi - fi - echo "Starting $service on $host" - - ACCUMULO_NUMACTL_OPTIONS=${ACCUMULO_NUMACTL_OPTIONS:-"--interleave=all"} - ACCUMULO_NUMACTL_OPTIONS=${TSERVER_NUMA_OPTIONS[$t]} - if [[ "$ACCUMULO_ENABLE_NUMACTL" == "true" ]]; then - NUMA=$(which numactl 2>/dev/null) - NUMACTL_EXISTS=$? - if [[ ( ${NUMACTL_EXISTS} -eq 0 ) ]]; then - export NUMA_CMD="${NUMA} ${ACCUMULO_NUMACTL_OPTIONS}" - else - export NUMA_CMD="" - fi - fi - - # We want the files to be consistently named with the log files - # server_identifier_hostname.{out,err}, e.g. tserver_2_fqdn.out - OUTFILE="${ACCUMULO_LOG_DIR}/${S}_${t}_${loghost}.out" - ERRFILE="${ACCUMULO_LOG_DIR}/${S}_${t}_${loghost}.err" - - # Rotate the .out and .err files - rotate_log "$OUTFILE" "${ACCUMULO_NUM_OUT_FILES}" - rotate_log "$ERRFILE" "${ACCUMULO_NUM_OUT_FILES}" - - # Fork the process, store the pid - nohup ${NUMA_CMD} "$COMMAND" "${service}" --address "${address}" >"$OUTFILE" 2>"$ERRFILE" < /dev/null & - echo $! > "${PID_FILE}" - - done - fi - - # Check the max open files limit and selectively warn - MAX_FILES_OPEN=$(ulimit -n) - - if [[ -n $MAX_FILES_OPEN ]] ; then - MAX_FILES_RECOMMENDED=${MAX_FILES_RECOMMENDED:-32768} - if (( MAX_FILES_OPEN < MAX_FILES_RECOMMENDED )) - then - echo "WARN : Max open files on $host is $MAX_FILES_OPEN, recommend $MAX_FILES_RECOMMENDED" >&2 - fi - fi -} - -function stop_service() { - - if [[ $# -ne 3 ]]; then - invalid_args "stop command expects these arguments: " - fi - - host=$1 - service=$2 - signal=$3 - - for pid_file in ${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${service}*.pid; do - if [[ -f "${pid_file}" ]]; then - echo "Stopping $service on $host"; - kill -s "$signal" "$(cat "${pid_file}")" 2>/dev/null - rm -f "${pid_file}" 2>/dev/null - fi - done -} - -function main() { - # Resolve libexec directory - SOURCE="${BASH_SOURCE[0]}" - while [[ -h "$SOURCE" ]]; do - libexec="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$libexec/$SOURCE" - done - libexec="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - - source "$libexec"/load-env.sh - - ACCUMULO_IDENT_STRING=${ACCUMULO_IDENT_STRING:-$USER} - - if [[ -z $1 ]]; then - invalid_args " cannot be empty" - fi - - case "$1" in - start) - start_service "${@:2}" - ;; - stop) - stop_service "${@:2}" - ;; - *) - invalid_args "'$1' is an invalid " - ;; - esac -} - -main "$@" diff --git a/assemble/libexec/templates/accumulo-env.sh b/assemble/libexec/templates/accumulo-env.sh index 078ce6fc380..787ddcd8849 100644 --- a/assemble/libexec/templates/accumulo-env.sh +++ b/assemble/libexec/templates/accumulo-env.sh @@ -39,8 +39,8 @@ export ACCUMULO_OTHER_OPTS="${otherHigh_otherLow}" # export HADOOP_CONF_DIR=/path/to/hadoop/etc/conf # export ZOOKEEPER_HOME=/path/to/zookeeper -# If not set below, Accumulo will derive these locations by determining the root of your -# installation and using the default locations +## If not set below, Accumulo will derive these locations by determining the root of your +## installation and using the default locations # export ACCUMULO_LOG_DIR=/path/to/accumulo/log @@ -50,41 +50,16 @@ export ACCUMULO_OTHER_OPTS="${otherHigh_otherLow}" ## Specifies what do when the JVM runs out of heap memory # export ACCUMULO_KILL_CMD='kill -9 %p' -## Should the monitor bind to all network interfaces -- default: true +## Should the monitor bind to all network interfaces # export ACCUMULO_MONITOR_BIND_ALL="true" -## Location of jaas.conf file. Needed by JAAS for things like Kerberos based logins -# export ACCUMULO_JAAS_CONF=${ACCUMULO_CONF_DIR}/jaas.conf -## Location of krb5.conf file. Needed by Kerberos subsystems to find login servers -# export ACCUMULO_KRB5_CONF=${ACCUMULO_CONF_DIR}/krb5.conf -## The number of .out and .err files per process to retain -# export ACCUMULO_NUM_OUT_FILES=5 -## Should process be automatically restarted -# export ACCUMULO_WATCHER="false" -## Sets number of tablet servers to run on each node -# export ACCUMULO_NUM_TSERVERS=1 -## Enables NUMA control policy -# export ACCUMULO_ENABLE_NUMACTL="false" -## Sets NUMA control options -# export ACCUMULO_NUMACTL_OPTIONS="--interleave=all" ############################################### # Variables that are optional. Uncomment to set ############################################### -## Example for configuring multiple tservers per host. Note that the ACCUMULO_NUMACTL_OPTIONS -## environment variable is used when ACCUMULO_NUM_TSERVERS is 1 to preserve backwards compatibility. -## If ACCUMULO_NUM_TSERVERS is greater than 2, then the TSERVER_NUMA_OPTIONS array is used if defined. -## If TSERVER_NUMA_OPTIONS is declared but not the correct size, then the service will not start. -# -# export ACCUMULO_NUM_TSERVERS=2 -# declare -a TSERVER_NUMA_OPTIONS -# TSERVER_NUMA_OPTIONS[1]="--cpunodebind 0" -# TSERVER_NUMA_OPTIONS[2]="--cpunodebind 1" - -## Optionally look for hadoop and accumulo native libraries for your -## platform in additional directories. (Use DYLD_LIBRARY_PATH on Mac OS X.) -## May not be necessary for Hadoop 2.x or using an RPM that installs to -## the correct system library directory. -# +## Specifies command that will wrap calls to Java in bin/accumulo +# export ACCUMULO_WRAP_CMD="" +## Optionally look for hadoop and accumulo native libraries for your platform in additional +## directories. (Use DYLD_LIBRARY_PATH on Mac OS X.) May not be necessary for Hadoop 2.x or +## using an RPM that installs to the correct system library directory. # export LD_LIBRARY_PATH=${HADOOP_PREFIX}/lib/native/${PLATFORM}:${LD_LIBRARY_PATH} - diff --git a/docs/src/main/asciidoc/chapters/administration.txt b/docs/src/main/asciidoc/chapters/administration.txt index 1478543852c..8bf4e405343 100644 --- a/docs/src/main/asciidoc/chapters/administration.txt +++ b/docs/src/main/asciidoc/chapters/administration.txt @@ -109,7 +109,7 @@ sync, which can cause problems with automatically timestamped data. === Configuration Accumulo is configured by editing several Shell and XML files found in -+$ACCUMULO_CONF_DIR+. The structure closely resembles Hadoop's configuration ++/path/to/accumulo-X.Y.Z/conf+. The structure closely resembles Hadoop's configuration files. Logging is primarily controlled using the log4j configuration files, @@ -177,7 +177,7 @@ After building the native map from the source, you will find the artifact in in this directory for the map library. If the file is renamed or moved from its target directory, the tablet server may not be able to find it. The system can also locate the native maps shared library by setting +LD_LIBRARY_PATH+ -(or +DYLD_LIBRARY_PATH+ on Mac OS X) in +$ACCUMULO_CONF_DIR/accumulo-env.sh+. +(or +DYLD_LIBRARY_PATH+ on Mac OS X) in +conf/accumulo-env.sh+. ===== Native Maps Configuration @@ -213,8 +213,8 @@ performance to the write-ahead log file which will slow ingest. On the machine that will serve as the Accumulo master: -. Write the IP address or domain name of the Accumulo Master to the +$ACCUMULO_CONF_DIR/masters+ file. -. Write the IP addresses or domain name of the machines that will be TabletServers in +$ACCUMULO_CONF_DIR/tservers+, one per line. +. Write the IP address or domain name of the Accumulo Master to the +conf/masters+ file. +. Write the IP addresses or domain name of the machines that will be TabletServers in +conf/tservers+, one per line. Note that if using domain names rather than IP addresses, DNS must be configured properly for all machines participating in the cluster. DNS can be a confusing source @@ -222,7 +222,7 @@ of errors. ==== Accumulo Settings Specify appropriate values for the following settings in -+$ACCUMULO_CONF_DIR/accumulo-site.xml+ : ++conf/accumulo-site.xml+ : [source,xml] @@ -251,10 +251,10 @@ some settings require a process restart to take effect. See the configuration do One aspect of Accumulo's configuration which is different as compared to the rest of the Hadoop ecosystem is that the server-process classpath is determined in part by multiple values. A -bootstrap classpath is based soley on the `accumulo-start.jar`, Log4j and `$ACCUMULO_CONF_DIR`. +bootstrap classpath is based soley on the `accumulo-start.jar`, Log4j and `/path/to/accumulo/conf`. A second classloader is used to dynamically load all of the resources specified by `general.classpaths` -in `$ACCUMULO_CONF_DIR/accumulo-site.xml`. This value is a comma-separated list of regular-expression +in `conf/accumulo-site.xml`. This value is a comma-separated list of regular-expression paths which are all loaded into a secondary classloader. This includes Hadoop, Accumulo and ZooKeeper jars necessary to run Accumulo. When this value is not defined, a default value is used which attempts to load Hadoop from multiple potential locations depending on how Hadoop was installed. It is strongly @@ -281,7 +281,7 @@ Accumulo identifies +localhost:8020+ as a different HDFS instance than +127.0.0. ==== Deploy Configuration Copy the masters, tservers, accumulo-env.sh, and if necessary, accumulo-site.xml -from the +$ACCUMULO_CONF_DIR/+ directory on the master to all the machines +from the +conf/+ directory on the master to all the machines specified in the tservers file. ==== Sensitive Configuration Values @@ -342,7 +342,7 @@ The client configuration file is a subset of the information stored in accumulo- meant only for consumption by clients of Accumulo. By default, Accumulo checks a number of locations for a client configuration by default: -* +\${ACCUMULO_CONF_DIR}/client.conf+ +* +/path/to/accumulo/conf/client.conf+ * +/etc/accumulo/client.conf+ * +/etc/accumulo/conf/client.conf+ * +~/.accumulo/config+ @@ -447,7 +447,7 @@ take some time for particular configurations. ==== Adding a Tablet Server -Update your +$ACCUMULO_CONF_DIR/tservers+ file to account for the addition. +Update your +conf/tservers+ file to account for the addition. Next, ssh to each of the hosts you want to add and run: @@ -468,13 +468,13 @@ Alternatively, you can ssh to each of the hosts you want to remove and run: accumulo-service tserver stop -Be sure to update your +$ACCUMULO_CONF_DIR/tservers+ file to +Be sure to update your +conf/tservers+ file to account for the removal of these hosts. Bear in mind that the monitor will not re-read the tservers file automatically, so it will report the decomissioned servers as down; it's recommended that you restart the monitor so that the node list is up to date. The steps described to decomission a node can also be used (without removal of the host -from the +$ACCUMULO_CONF_DIR/tservers+ file) to gracefully stop a node. This will +from the +conf/tservers+ file) to gracefully stop a node. This will ensure that the tabletserver is cleanly stopped and recovery will not need to be performed when the tablets are re-hosted. @@ -636,7 +636,7 @@ http://research.google.com/pubs/pub36356.html[Google's Dapper]. ==== Tracers To collect traces, Accumulo needs at least one server listed in - +$ACCUMULO_CONF_DIR/tracers+. The server collects traces + +conf/tracers+. The server collects traces from clients and writes them to the +trace+ table. The Accumulo user that the tracer connects to Accumulo with can be configured with the following properties @@ -729,7 +729,7 @@ for adding any SpanReceiver to Accumulo: +lib/+ and NOT in +lib/ext/+ so that the new SpanReceiver class is visible to the same class loader of htrace-core. -2. Add the following to +$ACCUMULO_CONF_DIR/accumulo-site.xml+: +2. Add the following to +conf/accumulo-site.xml+: trace.span.receivers @@ -949,32 +949,6 @@ Time Start Service@Location Name Accumulo processes each write to a set of log files. By default these are found under +$ACCUMULO/logs/+. -[[watcher]] -=== Watcher -Accumulo includes scripts to automatically restart server processes in the case -of intermittent failures. To enable this watcher, edit +conf/accumulo-env.sh+ -to include the following: - -.... -# Should process be automatically restarted -export ACCUMULO_WATCHER="true" - -# What settings should we use for the watcher, if enabled -export UNEXPECTED_TIMESPAN="3600" -export UNEXPECTED_RETRIES="2" - -export OOM_TIMESPAN="3600" -export OOM_RETRIES="5" - -export ZKLOCK_TIMESPAN="600" -export ZKLOCK_RETRIES="5" -.... - -When an Accumulo process dies, the watcher will look at the logs and exit codes -to determine how the process failed and either restart or fail depending on the -recent history of failures. The restarting policy for various failure conditions -is configurable through the +*_TIMESPAN+ and +*_RETRIES+ variables shown above. - === Recovery In the event of TabletServer failure or error on shutting Accumulo down, some @@ -1016,7 +990,7 @@ that the only volume displayed is the volume from the current namenode's HDFS UR After verifying the current volume is correct, shut down the cluster and transition HDFS to the HA nameservice. -Edit +$ACCUMULO_CONF_DIR/accumulo-site.xml+ to notify accumulo that a volume is being replaced. First, +Edit +conf/accumulo-site.xml+ to notify accumulo that a volume is being replaced. First, add the new nameservice volume to the +instance.volumes+ property. Next, add the +instance.volumes.replacements+ property in the form of +old new+. It's important to not include the volume that's being replaced in +instance.volumes+, otherwise it's possible accumulo could continue @@ -1178,14 +1152,6 @@ are true interruptions to availability and some of which are false positives. Several of these conditions become more common in VM environments, where they can be exacerbated by resource constraints and clock drift. -Accumulo includes a mechanism to limit the impact of the false positives known -as the <>. The watcher monitors Accumulo processes and will restart -them when they fail for certain reasons. The watcher can be configured within -the accumulo-env.sh file inside of Accumulo's configuration directory. We -recommend using the watcher to monitor Accumulo processes, as it will restore -the system to full capacity without administrator interaction after many of the -common failure modes. - ==== Tested Versions Another large consideration for Accumulo stability is to use versions of software that have been tested together in a VM environment. Any cluster of diff --git a/docs/src/main/asciidoc/chapters/kerberos.txt b/docs/src/main/asciidoc/chapters/kerberos.txt index 171cb7bbfdd..06d46c2af9c 100644 --- a/docs/src/main/asciidoc/chapters/kerberos.txt +++ b/docs/src/main/asciidoc/chapters/kerberos.txt @@ -268,7 +268,7 @@ prompted for a password you can just hit return, since it won't be used. ---- $ accumulo-cluster stop ... -$ ACCUMULO_CONF_DIR=/path/to/server/conf/ accumulo init --reset-security +$ accumulo init --reset-security Running against secured HDFS Principal (user) to grant administrative privileges to : acculumo_admin@EXAMPLE.COM Enter initial password for accumulo_admin@EXAMPLE.COM (this may not be applicable for your security setup): @@ -401,9 +401,9 @@ Valid starting Expires Service principal ===== Configuration The second thing clients need to do is to set up their client configuration file. By -default, this file is stored in +~/.accumulo/config+, +$ACCUMULO_CONF_DIR/client.conf+ or -+$ACCUMULO_CONF_DIR/client.conf+. Accumulo utilities also allow you to provide your own -copy of this file in any location using the +--config-file+ command line option. +default, this file is stored in +~/.accumulo/config+ or +/path/to/accumulo/client.conf+. +Accumulo utilities also allow you to provide your own copy of this file in any location +using the +--config-file+ command line option. Three items need to be set to enable access to Accumulo: @@ -545,11 +545,11 @@ I don't know what is actually failing. This will enable lots of extra debugging at the JVM level which is often sufficient to diagnose some high-level configuration problem. Client applications can add this system property by hand to the command line and Accumulo server processes or applications started using the `accumulo` -script by adding the property to +ACCUMULO_GENERAL_OPTS+ in +$ACCUMULO_CONF_DIR/accumulo-env.sh+. +script by adding the property to +ACCUMULO_GENERAL_OPTS+ in +accumulo-env.sh+. Additionally, you can increase the log4j levels on +org.apache.hadoop.security+, which includes the Hadoop +UserGroupInformation+ class, which will include some high-level debug statements. This -can be controlled in your client application, or using +$ACCUMULO_CONF_DIR/generic_logger.xml+ +can be controlled in your client application, or using +generic_logger.xml+ *Q*: All of my Accumulo processes successfully start and log in with their keytab, but they are unable to communicate with each other, showing the @@ -627,7 +627,7 @@ Caused by: org.apache.thrift.transport.TTransportException: Peer indicated failu on, is extremely important when negotiating an SASL connection. This problem commonly arises when the Accumulo servers are not configured to listen on the address denoted by their FQDN. -The values in the Accumulo "hosts" files (In +$ACCUMULO_CONF_DIR+: +masters+, +monitors+, +tservers+, +tracers+, +The values in the Accumulo "hosts" files (In +accumulo/conf+: +masters+, +monitors+, +tservers+, +tracers+, and +gc+) should match the instance componentof the Kerberos server principal (e.g. +host+ in +accumulo/host@EXAMPLE.COM+). *Q*: After configuring my system for Kerberos, server processes come up normally and I can interact with the system. However, diff --git a/docs/src/main/asciidoc/chapters/ssl.txt b/docs/src/main/asciidoc/chapters/ssl.txt index 2abb98dd50b..35f16424e10 100644 --- a/docs/src/main/asciidoc/chapters/ssl.txt +++ b/docs/src/main/asciidoc/chapters/ssl.txt @@ -40,7 +40,7 @@ included in a section below. Accumulo servers require a certificate and keystore in the form of Java KeyStores, to enable SSL. The following configuration assumes these files already exist. -In +$ACCUMULO_CONF_DIR/accumulo-site.xml+, the following properties are required: +In +conf/accumulo-site.xml+, the following properties are required: * *rpc.javax.net.ssl.keyStore*=_The path on the local filesystem to the keystore containing the server's certificate_ * *rpc.javax.net.ssl.keyStorePassword*=_The password for the keystore containing the server's certificate_ @@ -49,7 +49,7 @@ In +$ACCUMULO_CONF_DIR/accumulo-site.xml+, the following properties are required * *instance.rpc.ssl.enabled*=_true_ Optionally, SSL client-authentication (two-way SSL) can also be enabled by setting -+instance.rpc.ssl.clientAuth=true+ in +$ACCUMULO_CONF_DIR/accumulo-site.xml+. ++instance.rpc.ssl.clientAuth=true+ in +conf/accumulo-site.xml+. This requires that each client has access to valid certificate to set up a secure connection to the servers. By default, Accumulo uses one-way SSL which does not require clients to have their own certificate. diff --git a/docs/src/main/asciidoc/chapters/troubleshooting.txt b/docs/src/main/asciidoc/chapters/troubleshooting.txt index a88dfa27e9e..bb739e19bf6 100644 --- a/docs/src/main/asciidoc/chapters/troubleshooting.txt +++ b/docs/src/main/asciidoc/chapters/troubleshooting.txt @@ -23,7 +23,7 @@ Accumulo is a distributed system. It is supposed to run on remote equipment, across hundreds of computers. Each program that runs on these remote computers writes down events as they occur, into a local file. By default, this is defined in -+$ACCUMULO_CONF_DIR/accumule-env.sh+ as +ACCUMULO_LOG_DIR+. ++conf/accumule-env.sh+ as +ACCUMULO_LOG_DIR+. *A*: Look in the +$ACCUMULO_LOG_DIR/tserver*.log+ file. Specifically, check the end of the file. @@ -125,7 +125,7 @@ It is important to see the word +CONNECTED+! If you only see +CONNECTING+ you will need to diagnose zookeeper errors. *A*: Check to make sure that zookeeper is up, and that -+$ACCUMULO_CONF_DIR/accumulo-site.xml+ has been pointed to ++conf/accumulo-site.xml+ has been pointed to your zookeeper server(s). *Q*: Zookeeper is running, but it does not say +CONNECTED+ From 4442711b8b6433aa52ed1cfc7c622cfa340416eb Mon Sep 17 00:00:00 2001 From: Mike Walch Date: Tue, 7 Feb 2017 11:07:51 -0500 Subject: [PATCH 2/2] Shellcheck fixes --- assemble/bin/accumulo | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assemble/bin/accumulo b/assemble/bin/accumulo index 1ca693e9fb1..2ec25ad43a0 100755 --- a/assemble/bin/accumulo +++ b/assemble/bin/accumulo @@ -499,11 +499,11 @@ function main() { # Set up -D switches for JAAS and Kerberos if files exist jaas_conf="${conf}/jaas.conf" - if [ -f $jaas_conf ]; then + if [ -f "$jaas_conf" ]; then ACCUMULO_GENERAL_OPTS="${ACCUMULO_GENERAL_OPTS} -Djava.security.auth.login.config=${jaas_conf}" fi krb5_conf="${conf}/krb5.conf" - if [ -f $krb5_conf ]; then + if [ -f "$krb5_conf" ]; then ACCUMULO_GENERAL_OPTS="${ACCUMULO_GENERAL_OPTS} -Djava.security.krb5.conf=${krb5_conf}" fi