Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
WHIRR-25. Add HBase service. Contributed by Lars George.
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/incubator/whirr/trunk@1054522 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
tomwhite committed Jan 3, 2011
1 parent f5c4626 commit be68958
Show file tree
Hide file tree
Showing 20 changed files with 1,421 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -3,3 +3,5 @@
.project
.settings
target
.idea/
*.iml
2 changes: 2 additions & 0 deletions CHANGES.txt
Expand Up @@ -9,6 +9,8 @@ Trunk (unreleased changes)
WHIRR-176. Set AWS credentials in the local site file for Hadoop S3 access.
(Lars George via tomwhite)

WHIRR-25. Add HBase service. (Lars George via tomwhite)

IMPROVEMENTS

WHIRR-87. Parallelize Hadoop cluster creation. (tomwhite)
Expand Down
5 changes: 5 additions & 0 deletions cli/pom.xml
Expand Up @@ -49,6 +49,11 @@
<artifactId>whirr-zookeeper</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>whirr-hbase</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.jclouds</groupId>
<artifactId>jclouds-allcompute</artifactId>
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Expand Up @@ -43,6 +43,7 @@
<module>services/cdh</module>
<module>services/hadoop</module>
<module>services/zookeeper</module>
<module>services/hbase</module>
</modules>

<properties>
Expand Down
131 changes: 131 additions & 0 deletions scripts/apache/hbase/install
@@ -0,0 +1,131 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Install Apache HBase.
#

set -x
set -e

################################################################################
# Initialize variables
################################################################################

CLOUD_PROVIDER=
while getopts "c:" OPTION; do
case $OPTION in
c)
CLOUD_PROVIDER="$OPTARG"
;;
esac
done

HBASE_VERSION=${HBASE_VERSION:-0.89.20100924}
HBASE_HOME=/usr/local/hbase-$HBASE_VERSION

function update_repo() {
if which dpkg &> /dev/null; then
sudo apt-get update
elif which rpm &> /dev/null; then
yum update -y yum
fi
}

# Install a list of packages on debian or redhat as appropriate
function install_packages() {
if which dpkg &> /dev/null; then
apt-get update
apt-get -y install $@
elif which rpm &> /dev/null; then
yum install -y $@
else
echo "No package manager found."
fi
}

function install_hbase() {
set +e
useradd hadoop
set -e

# up file-max
sysctl -w fs.file-max=65535
# up ulimits
echo "root soft nofile 65535" >> /etc/security/limits.conf
echo "root hard nofile 65535" >> /etc/security/limits.conf
ulimit -n 65535
# up epoll limits; ok if this fails, only valid for kernels 2.6.27+
set +e
sysctl -w fs.epoll.max_user_instances=65535 > /dev/null 2>&1
set -e
# if there is no hosts file then provide a minimal one
[ ! -f /etc/hosts ] && echo "127.0.0.1 localhost" > /etc/hosts

# Reformat sdb as xfs
#umount /mnt
#mkfs.xfs -f /dev/sdb
#mount -o noatime /dev/sdb /mnt
# Probe for additional instance volumes
# /dev/sdb as /mnt is always set up by base image
#DFS_NAME_DIR="/mnt/hadoop/dfs/name"
#DFS_DATA_DIR="/mnt/hadoop/dfs/data"
#i=2
#for d in c d e f g h i j k l m n o p q r s t u v w x y z; do
# m="/mnt${i}"
# mkdir -p $m
# mkfs.xfs -f /dev/sd${d}
# if [ $? -eq 0 ] ; then
# mount -o noatime /dev/sd${d} $m > /dev/null 2>&1
# if [ $i -lt 3 ] ; then # no more than two namedirs
# DFS_NAME_DIR="${DFS_NAME_DIR},${m}/hadoop/dfs/name"
# fi
# DFS_DATA_DIR="${DFS_DATA_DIR},${m}/hadoop/dfs/data"
# i=$(( i + 1 ))
# fi
#done

# install HBase tarball
hbase_tar_url=http://archive.apache.org/dist/hbase/hbase-$HBASE_VERSION/hbase-${HBASE_VERSION}-bin.tar.gz
hbase_tar_file=`basename $hbase_tar_url`
hbase_tar_md5_file=`basename $hbase_tar_url.md5`

curl="curl --retry 3 --silent --show-error --fail"
for i in `seq 1 3`;
do
$curl -O $hbase_tar_url
$curl -O $hbase_tar_url.md5
if md5sum -c $hbase_tar_md5_file; then
break;
else
rm -f $hbase_tar_file $hbase_tar_md5_file
fi
done

if [ ! -e $hbase_tar_file ]; then
echo "Failed to download $hbase_tar_url. Aborting."
exit 1
fi

tar zxf $hbase_tar_file -C /usr/local
rm -f $hbase_tar_file $hbase_tar_md5_file

echo "export HBASE_HOME=$HBASE_HOME" >> ~root/.bashrc
echo 'export PATH=$JAVA_HOME/bin:$HBASE_HOME/bin:$PATH' >> ~root/.bashrc
}

update_repo
install_hbase
214 changes: 214 additions & 0 deletions scripts/apache/hbase/post-configure
@@ -0,0 +1,214 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Configure Apache HBase after the cluster has started.
#
# Call with the following arguments
# -m <master>
# -q <zookeeper quorum>
# -p <port>
# -c <cloud provider>

set -x
set -e

################################################################################
# Initialize variables
################################################################################

ROLES=$1
shift

MASTER_HOST=
ZOOKEEKER_QUORUM=
CLOUD_PROVIDER=
PORT=
while getopts "m:q:p:c:" OPTION; do
case $OPTION in
m)
MASTER_HOST="$OPTARG"
;;
q)
ZOOKEEPER_QUORUM="$OPTARG"
;;
p)
PORT="$OPTARG"
;;
c)
CLOUD_PROVIDER="$OPTARG"
;;
esac
done

case $CLOUD_PROVIDER in
ec2)
# Use public hostname for EC2
SELF_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname`
;;
*)
SELF_HOST=`/sbin/ifconfig eth0 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}'`
;;
esac

HBASE_VERSION=${HBASE_VERSION:-0.89.20100924}
HBASE_HOME=/usr/local/hbase-$HBASE_VERSION
HBASE_CONF_DIR=$HBASE_HOME/conf

# Configure HBase by setting up disks and site file
function configure_hbase() {
case $CLOUD_PROVIDER in
ec2)
MOUNT=/mnt
;;
*)
MOUNT=/data
;;
esac
mkdir -p $MOUNT/hbase
chown hadoop:hadoop $MOUNT/hbase
mkdir $MOUNT/tmp
chmod a+rwxt $MOUNT/tmp

mkdir /etc/hbase
ln -s $HBASE_CONF_DIR /etc/hbase/conf

##############################################################################
# Modify this section to customize your HBase cluster.
##############################################################################
cat > $HBASE_HOME/conf/hbase-site.xml <<EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://$MASTER_HOST:8020/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>$ZOOKEEPER_QUORUM</value>
</property>
<property>
<name>hbase.regionserver.handler.count</name>
<value>100</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>zookeeper.session.timeout</name>
<value>60000</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/mnt/hbase</value>
</property>
</configuration>
EOF
# Override JVM options
cat >> $HBASE_HOME/conf/hbase-env.sh <<EOF
export HBASE_MASTER_OPTS="-Xms1000m -Xmx1000m -Xmn256m -XX:+UseConcMarkSweepGC -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/mnt/hbase/logs/hbase-master-gc.log"
export HBASE_REGIONSERVER_OPTS="-Xms2000m -Xmx2000m -Xmn256m -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=88 -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/mnt/hbase/logs/hbase-regionserver-gc.log"
EOF
# Configure HBase for Ganglia
cat > $HBASE_HOME/conf/hadoop-metrics.properties <<EOF
dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
dfs.period=10
dfs.servers=$MASTER_HOST:8649
hbase.class=org.apache.hadoop.metrics.ganglia.GangliaContext
hbase.period=10
hbase.servers=$MASTER_HOST:8649
jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
jvm.period=10
jvm.servers=$MASTER_HOST:8649
EOF

# Update classpath to include HBase jars and config
# cat >> $HADOOP_HOME/conf/hadoop-env.sh <<EOF
#HADOOP_CLASSPATH="$HBASE_HOME/hbase-${HBASE_VERSION}.jar:$HBASE_HOME/lib/zookeeper-3.3.1.jar:$HBASE_HOME/conf"
#EOF
# Configure Hadoop for Ganglia
# cat > $HADOOP_HOME/conf/hadoop-metrics.properties <<EOF
#dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
#dfs.period=10
#dfs.servers=$MASTER_HOST:8649
#jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
#jvm.period=10
#jvm.servers=$MASTER_HOST:8649
#mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
#mapred.period=10
#mapred.servers=$MASTER_HOST:8649
#EOF

# Keep PID files in a non-temporary directory
sed -i -e "s|# export HBASE_PID_DIR=.*|export HBASE_PID_DIR=/var/run/hbase|" \
$HBASE_CONF_DIR/hbase-env.sh
mkdir -p /var/run/hbase
chown -R hadoop:hadoop /var/run/hbase

# Set SSH options within the cluster
sed -i -e 's|# export HBASE_SSH_OPTS=.*|export HBASE_SSH_OPTS="-o StrictHostKeyChecking=no"|' \
$HBASE_CONF_DIR/hbase-env.sh

# Disable IPv6
sed -i -e 's|# export HBASE_OPTS=.*|export HBASE_OPTS="-Djava.net.preferIPv4Stack=true"|' \
$HBASE_CONF_DIR/hbase-env.sh

# HBase logs should be on the /mnt partition
sed -i -e 's|# export HBASE_LOG_DIR=.*|export HBASE_LOG_DIR=/var/log/hbase/logs|' \
$HBASE_CONF_DIR/hbase-env.sh
rm -rf /var/log/hbase
mkdir $MOUNT/hbase/logs
chown hadoop:hadoop $MOUNT/hbase/logs
ln -s $MOUNT/hbase/logs /var/log/hbase
chown -R hadoop:hadoop /var/log/hbase
}

function start_daemon() {
if which dpkg &> /dev/null; then
AS_HADOOP="su -s /bin/bash - hadoop -c"
elif which rpm &> /dev/null; then
AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
fi
$AS_HADOOP "$HBASE_HOME/bin/hbase-daemon.sh start $1"
}

configure_hbase

for role in $(echo "$ROLES" | tr "," "\n"); do
case $role in
hbase-master)
start_daemon master
;;
hbase-regionserver)
start_daemon regionserver
;;
hbase-restserver)
start_daemon rest -p $PORT
;;
hbase-avroserver)
start_daemon avro -p $PORT
;;
hbase-thriftserver)
start_daemon thrift -p $PORT
;;
esac
done

0 comments on commit be68958

Please sign in to comment.