Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
CHANGELOG
=========

0.0.7
=====

* feature:``cfncluster``: Added option to encrypt ephemeral drives with in-memory keys
* feature:``cfncluster``: Detect all ephemeral drives, stripe and mount as /scratch
* feature:``cfncluster``: Support for placement groups
* feature:``cfncluster``: Support for cluster placement logic. Can either be cluster or compute.
* feature:``cfncluster``: Added option to provides arguments to pre/post install scripts
* feature:``cfncluster``: Added DKMS support for Lustre filesystems - http://zfsonlinux.org/lustre.html
* bugfix:``cli``: Added missing support from SSH from CIDR range
* bugfix:``cfncluster``: Fixed Ganglia setup for ComputeFleet
* updates:``SGE``: Updated to 8.1.7 - https://arc.liv.ac.uk/trac/SGE
* updates:``Openlava``: Updated to latest Git for Openlava 2.2 - https://github.com/openlava/openlava

0.0.6
=====

Expand Down
8 changes: 4 additions & 4 deletions amis.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
us-west-2 ami-e581fcd5
us-east-1 ami-745ea11c
eu-west-1 ami-e3458c94
ap-northeast-1 ami-2d41092c
us-west-2 ami-7dcab74d
us-east-1 ami-2c07f944
eu-west-1 ami-a1a169d6
ap-northeast-1 ami-b3c78fb2
2 changes: 1 addition & 1 deletion bootstrap/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SHELL = /bin/sh
PREFIX ?= /opt/cfncluster
DESTDIR ?= /opt/cfncluster

install:
install -d -m 755 $(DESTDIR)
Expand Down
99 changes: 97 additions & 2 deletions bootstrap/src/scripts/boot_as_compute
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,101 @@ function error_exit () {
# Run preinstall script if defined
RC=0
if [ "${cfn_preinstall}" != "NONE" ]; then
wget -qO- ${cfn_preinstall} | /bin/sh || RC=1
tmpfile=$(mktemp)
wget -qO- ${cfn_preinstall} > $tmpfile || RC=1
if [ "${cfn_preinstall_args}" != "NONE" ]; then
args=${cfn_preinstall_args}
fi
/bin/sh $tmpfile $args || RC=1
/bin/rm $tmpfile
fi
if [ $RC -ne 0 ]; then
error_exit "Failed to run boot_as_compute preinstall"
fi

## Non-scheduler specific functions
##

# LVM, format, mount /ephemeral
RC=0
mkdir -p /scratch
chmod 1777 /scratch
MAPPING=$(/usr/bin/ec2-metadata -b | grep ephemeral | awk '{print $2}' | sed 's/sd/xvd/')
for m in $MAPPING; do
stat -t /dev/${m} >/dev/null 2>&1
check=$?
if [ ${check} -eq 0 ]; then
DEVS="${m} $DEVS"
fi
done
NUM_DEVS=0
for d in $DEVS; do
d=/dev/${d}
dd if=/dev/zero of=${d} bs=32k count=1
parted -s ${d} mklabel msdos
parted -s ${d}
parted -s -a optimal ${d} mkpart primary 1MB 100%
parted -s ${d} set 1 lvm on
let NUM_DEVS++
PARTITIONS="${d}1 $PARTITIONS"
done
# sleep 10 seconds to let partitions settle (bug?)
sleep 10

# Setup LVM
pvcreate $PARTITIONS
vgcreate vg.01 $PARTITIONS
lvcreate -i $NUM_DEVS -I 64 -l 100%FREE -n lv_ephemeral vg.01
if [ "$cfn_encrypted_ephemeral" == "true" ]; then
mkfs -q /dev/ram1 1024
mkdir -p /root/keystore
mount /dev/ram1 /root/keystore
dd if=/dev/urandom of=/root/keystore/keyfile bs=1024 count=4
chmod 0400 /root/keystore/keyfile
cryptsetup -q luksFormat /dev/vg.01/lv_ephemeral /root/keystore/keyfile
cryptsetup -d /root/keystore/keyfile luksOpen /dev/vg.01/lv_ephemeral ephemeral_luks
mkfs.xfs /dev/mapper/ephemeral_luks
mount -v -t xfs -o noatime,nodiratime /dev/mapper/ephemeral_luks /scratch
else
mkfs.xfs /dev/vg.01/lv_ephemeral
echo "/dev/vg.01/lv_ephemeral /scratch xfs noatime,nodiratime 0 0" >> /etc/fstab
mount -v /scratch
fi
chmod 1777 /scratch

# Mount NFS exports
RC=0
echo "$cfn_master:/home /home nfs hard,intr,noatime,vers=3,_netdev 0 0" >> /etc/fstab || RC=1
echo "$cfn_master:/shared /shared nfs hard,intr,noatime,vers=3,_netdev 0 0" >> /etc/fstab || RC=1
mount -v /home || RC=1
mount -v /shared || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed during during NFS mounts"
fi

# Configure ganglia
RC=0
location=`curl --retry 3 --retry-delay 0 --silent --fail http://169.254.169.254/latest/meta-data/placement/availability-zone` || RC=1
cd /etc/ganglia || RC=1
/bin/cp -f /opt/cfncluster/templates/os/gmond.conf.COMPUTE gmond.conf || RC=1
sed -i "s/<master>/${cfn_master}/" gmond.conf || RC=1
sed -i "s/<location>/$location/" gmond.conf || RC=1
chkconfig gmond on || RC=1
service gmond start || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed during Ganglia setup"
fi

# Adding nodewatcher to crontab
RC=0
crontab -l > /tmp/root.crontab
echo "* * * * * cd /opt/cfncluster/nodewatcher && ./nodewatcher.py >> nodewatcher.log 2>&1" >> /tmp/root.crontab || RC=1
crontab /tmp/root.crontab || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed to nodewatcher crontab"
fi

##
# Run boot as compute for a specific scheduler
RC=0
/opt/cfncluster/scripts/${cfn_scheduler}/boot_as_compute >/var/log/cfncluster.log 2>&1 || RC=1
Expand All @@ -41,7 +130,13 @@ fi
# Run postinstall script if defined
RC=0
if [ "${cfn_postinstall}" != "NONE" ]; then
wget -qO- ${cfn_postinstall} | /bin/sh || RC=1
tmpfile=$(mktemp)
wget -qO- ${cfn_postinstall} > $tmpfile || RC=1
if [ "${cfn_postinstall_args}" != "NONE" ]; then
args=${cfn_postinstall_args}
fi
/bin/sh $tmpfile $args || RC=1
/bin/rm $tmpfile
fi
if [ $RC -ne 0 ]; then
error_exit "Failed to run boot_as_compute postinstall"
Expand Down
172 changes: 170 additions & 2 deletions bootstrap/src/scripts/boot_as_master
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,174 @@ function error_exit () {
# Run preinstall script if defined
RC=0
if [ "${cfn_preinstall}" != "NONE" ]; then
wget -qO- ${cfn_preinstall} | /bin/sh || RC=1
tmpfile=$(mktemp)
wget -qO- ${cfn_preinstall} > $tmpfile || RC=1
if [ "${cfn_preinstall_args}" != "NONE" ]; then
args=${cfn_preinstall_args}
fi
/bin/sh $tmpfile $args || RC=1
/bin/rm $tmpfile
fi
if [ $RC -ne 0 ]; then
error_exit "Failed to run boot_as_master preinstall"
fi

## Non-scheduler specific functions
##

# Check cfn_volume is present in config
if [ "${cfn_volume}x" == "x" ]; then
error_exit "Volume must be provided."
fi

# Check hostname resolves using DNS
myhostname=$(hostname -s)
if [ $? != 0 ]; then
error_exit 'Failed to determine local hostname'
fi

# Enable PAT
RC=0
/opt/cfncluster/scripts/os/configure-pat.sh || RC=1
echo -e "\n# Enable PAT\n/opt/cfncluster/scripts/os/configure-pat.sh\n\n" >> /etc/rc.local || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed to enable NAT(PAT)"
fi

# LVM, format, mount /ephemeral
RC=0
mkdir -p /scratch
chmod 1777 /scratch
MAPPING=$(/usr/bin/ec2-metadata -b | grep ephemeral | awk '{print $2}' | sed 's/sd/xvd/')
for m in $MAPPING; do
stat -t /dev/${m} >/dev/null 2>&1
check=$?
if [ ${check} -eq 0 ]; then
DEVS="${m} $DEVS"
fi
done
NUM_DEVS=0
for d in $DEVS; do
d=/dev/${d}
dd if=/dev/zero of=${d} bs=32k count=1
parted -s ${d} mklabel msdos
parted -s ${d}
parted -s -a optimal ${d} mkpart primary 1MB 100%
parted -s ${d} set 1 lvm on
let NUM_DEVS++
PARTITIONS="${d}1 $PARTITIONS"
done
# sleep 10 seconds to let partitions settle (bug?)
sleep 10

# Setup LVM
pvcreate $PARTITIONS
vgcreate vg.01 $PARTITIONS
lvcreate -i $NUM_DEVS -I 64 -l 100%FREE -n lv_ephemeral vg.01
if [ "$cfn_encrypted_ephemeral" == "true" ]; then
mkfs -q /dev/ram1 1024
mkdir -p /root/keystore
mount /dev/ram1 /root/keystore
dd if=/dev/urandom of=/root/keystore/keyfile bs=1024 count=4
chmod 0400 /root/keystore/keyfile
cryptsetup -q luksFormat /dev/vg.01/lv_ephemeral /root/keystore/keyfile
cryptsetup -d /root/keystore/keyfile luksOpen /dev/vg.01/lv_ephemeral ephemeral_luks
mkfs.xfs /dev/mapper/ephemeral_luks
mount -v -t xfs -o noatime,nodiratime /dev/mapper/ephemeral_luks /scratch
else
mkfs.xfs /dev/vg.01/lv_ephemeral
echo "/dev/vg.01/lv_ephemeral /scratch xfs noatime,nodiratime 0 0" >> /etc/fstab
mount -v /scratch
fi
chmod 1777 /scratch

# Attach and mount /shared volume
RC=0
/usr/local/sbin/attachVolume.py ${cfn_volume} || RC=1
sleep 10 # Hate having to do this...
dev=$(stat /dev/disk/by-ebs-volumeid/${cfn_volume}|grep -- 'File:'|awk '{print $4}'|cut -d'/' -f3|tr -d "'")
fs_type=$(blkid -o list | grep -- "$dev" | awk '{print $2}')
if [ "${fs_type}x" == "x" ]; then
mkfs.xfs /dev/disk/by-ebs-volumeid/${cfn_volume} || RC=1
sleep 5
fi
fs_type=$(blkid -o list | grep -- "$dev" | awk '{print $2}')
echo "/dev/disk/by-ebs-volumeid/${cfn_volume} /shared $fs_type noatime,nodiratime 0 0" >> /etc/fstab
mount -v /shared || RC=1
chmod 1777 /shared || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed to attach and mount volume"
fi

# Setup NFS as Master
# 1. Determine subnet for NFS exports
ETH0_MAC=`/sbin/ifconfig | /bin/grep eth0 | awk '{print tolower($5)}' | grep '^[0-9a-f]\{2\}\(:[0-9a-f]\{2\}\)\{5\}$'`
VPC_CIDR_URI="http://169.254.169.254/latest/meta-data/network/interfaces/macs/${ETH0_MAC}/vpc-ipv4-cidr-block"
VPC_CIDR_RANGE=`curl --retry 3 --retry-delay 0 --silent --fail ${VPC_CIDR_URI}`
if [ $? -ne 0 ] ; then
echo "Unable to retrive VPC CIDR range from meta-data. This either means a) non-VPC or b) an error" | logger -t "cfncluster"
VPC_CIDR_RANGE="10.0.0.0/8"
else
echo "Retrived the VPC CIDR range: ${VPC_CIDR_RANGE} from meta-data for NFS export." | logger -t "cfncluster"
fi
# 2. Update config
RC=0
cd /etc || RC=1
/bin/cp -f /opt/cfncluster/templates/os/exports.MASTER exports || RC=1
sed -i "s?<cidr>?$VPC_CIDR_RANGE?" exports || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed to configure NFS exports"
fi
# 3. Start NFS
RC=0
chkconfig nfs on || RC=1
chkconfig rpcbind on || RC=1
chkconfig rpcidmapd on || RC=1
service rpcbind restart || RC=1
service rpcidmapd restart || RC=1
service nfs restart || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed to start NFS server"
fi

# Setup Ganglia as Master
RC=0
location=`curl --retry 3 --retry-delay 0 --silent --fail http://169.254.169.254/latest/meta-data/placement/availability-zone` || RC=1
cd /etc/ganglia || RC=1
/bin/cp -f /opt/cfncluster/templates/os/gmond.conf.MASTER gmond.conf || RC=1
/bin/cp -f /opt/cfncluster/templates/os/gmetad.conf.MASTER gmetad.conf || RC=1
sed -i "s/<master>/$myhostname/" gmond.conf || RC=1
sed -i "s/<location>/$location/" gmond.conf || RC=1
sed -i "s/<stack_name>/$stack_name/" gmond.conf || RC=1
sed -i "s/<stack_name>/$stack_name/" gmetad.conf || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed to configure Ganglia"
fi

# Start httpd and ganglia services
RC=0
chkconfig gmond on || RC=1
chkconfig gmetad on || RC=1
chkconfig httpd on || RC=1
service gmond start || RC=1
service gmetad start || RC=1
service httpd start || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed to start Ganglia"
fi

# Setup ec2-user SSH auth
RC=0
su - ec2-user -c "ssh-keygen -q -t rsa -f ~/.ssh/id_rsa -N ''" || RC=1
su - ec2-user -c "cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys2 && chmod 0600 ~/.ssh/authorized_keys2" || RC=1
su - ec2-user -c "ssh-keyscan ${myhostname} > ~/.ssh/known_hosts && chmod 0600 ~/.ssh/known_hosts" || RC=1
if [ $RC -ne 0 ]; then
error_exit "Failed to setup ec2-user SSH auth"
fi

## Scheduler specific section
##

# Run boot as master for a specific scheduler
RC=0
/opt/cfncluster/scripts/${cfn_scheduler}/boot_as_master >/var/log/cfncluster.log 2>&1 || RC=1
Expand All @@ -44,7 +206,13 @@ cd /opt/cfncluster/sqswatcher && ./sqswatcher.py 2>&1
# Run postinstall script if defined
RC=0
if [ "${cfn_postinstall}" != "NONE" ]; then
wget -qO- ${cfn_postinstall} | /bin/sh || RC=1
tmpfile=$(mktemp)
wget -qO- ${cfn_postinstall} > $tmpfile || RC=1
if [ "${cfn_postinstall_args}" != "NONE" ]; then
args=${cfn_postinstall_args}
fi
/bin/sh $tmpfile $args || RC=1
/bin/rm $tmpfile
fi
if [ $RC -ne 0 ]; then
error_exit "Failed to run boot_as_master postinstall"
Expand Down
Loading