Skip to content
This repository has been archived by the owner on Jan 25, 2022. It is now read-only.

Commit

Permalink
Multi-jenkins to eliminate jenkins down time
Browse files Browse the repository at this point in the history
Change-Id: I462c7db115f9728901f3576bb97820cbcbf50f68
  • Loading branch information
eveningwear committed Sep 12, 2012
1 parent 3b1b430 commit 3732803
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 5 deletions.
3 changes: 3 additions & 0 deletions jobs/jenkins/spec
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ name: jenkins
templates:
jenkins_ctl: bin/jenkins_ctl
jenkins_init.ctl.erb: bin/jenkins_init.ctl
standby_check.erb: bin/standby_check
zabbix_agent_ctl: bin/zabbix_agent_ctl
run_backup: bin/run_backup
change_tmp: bin/change_tmp
id_rsa.erb: config/id_rsa
id_rsa.pub.erb: config/id_rsa.pub
Expand All @@ -16,6 +18,7 @@ templates:
hudson.tasks.Maven.xml: config/hudson.tasks.Maven.xml
gerrit_trigger_key.erb: config/gerrit_trigger_key
render_jenkins_jobs.erb: bin/render_jenkins_jobs
authorized_keys.erb: config/authorized_keys
packages:
- java
- git
Expand Down
1 change: 1 addition & 0 deletions jobs/jenkins/templates/authorized_keys.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<%= properties.jenkins.rsa_keys.public %>
7 changes: 7 additions & 0 deletions jobs/jenkins/templates/jenkins_ctl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ DATA_DIR=/var/vcap/store/jenkins
RUN_DIR=/var/vcap/sys/run/jenkins
LOG_DIR=/var/vcap/store/log/
WEBROOT=/var/vcap/store/run/jenkins
TERMINATE_BACKUP=/tmp/terminate_backup

JENKINS_HOME=$DATA_DIR
PIDFILE=$RUN_DIR/jenkins.pid
Expand All @@ -33,11 +34,17 @@ case "$1" in
# Add Slaves configs etc
$JOB_DIR/bin/jenkins_init.ctl

$JOB_DIR/bin/standby_check &

su - vcap -c "$JOB_DIR/bin/run_backup" &

su - vcap -c "JENKINS_HOME=$DATA_DIR LD_LIBRARY_PATH=$PACKAGE_DIR/lib:$LD_LIBRARY_PATH PATH=$JAVA_DIR/bin/:$GIT_DIR/bin/:$PATH $COMMAND >>$LOG_DIR/jenkins.stdout.log 2>>$LOG_DIR/jenkins.stderr.log"
;;

stop)
echo "Stopping Jenkins "
touch $TERMINATE_BACKUP
chown vcap:vcap $TERMINATE_BACKUP
PID=$(head -1 $PIDFILE)

killall java
Expand Down
38 changes: 34 additions & 4 deletions jobs/jenkins/templates/jenkins_init.ctl.erb
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,29 @@ NOT_UPGRADE_SITE_CONFIG=<%= properties.jenkins.not_upgrade_site_config||0 %>
NOT_UPGRADE_JOBS=<%= properties.jenkins.not_upgrade_jobs||0 %>
ENABLE_ZABBIX_AGENT=<%= properties.jenkins.enable_zabbix_agent||0 %>

if [ $NOT_UPGRADE_SITE_CONFIG != 1 ]; then
jenkins_address=<%= properties.jenkins.address %>
jenkins_backup_address=<%= properties.jenkins.backup_address %>
/sbin/ifconfig | grep $jenkins_address
if [ $? == 0 ]; then
jenkins_master_id=1
else
jenkins_master_id=2
fi

# For the standby jenkins, always update the config
if [ $NOT_UPGRADE_SITE_CONFIG != 1 ] || [ $jenkins_master_id == 2 ]; then
# Grab all the slaves to the master config file
cat $JOB_DIR/config/config_head > $JOB_DIR/config/config.xml

slave_address="<%= properties.jenkins.slaves_ip.join(' ') %>"
<%if properties.jenkins.backup_slaves_ip %>
if [ $jenkins_master_id == 2 ]; then
slave_address="<%= properties.jenkins.backup_slaves_ip.join(' ') %>"
fi
<% end %>

slave_id=1
for ip_addr in <%= properties.jenkins_slave.slaves_ip.join(' ') %>
for ip_addr in $slave_address
do
cat $JOB_DIR/config/config_slave|sed -e "s/SLAVE_CONFIG_NAME/jenk${slave_id}/g" -e "s/SLAVE_CONFIG_IP/${ip_addr}/g" >> $JOB_DIR/config/config.xml
slave_id=`expr $slave_id + 1`
Expand All @@ -41,9 +58,21 @@ chown -R vcap:vcap $DATA_DIR/plugins/

# Passwdless ssh host key pair(for slave)
mkdir -p /home/vcap/.ssh
cp $CONFIG_DIR/{id_rsa,id_rsa.pub} /home/vcap/.ssh
chmod 0600 /home/vcap/.ssh/{id_rsa,id_rsa.pub}
if [ $jenkins_master_id == 1 ]; then
cp $CONFIG_DIR/{id_rsa,id_rsa.pub} /home/vcap/.ssh
chmod 0600 /home/vcap/.ssh/{id_rsa,id_rsa.pub}
else
cp $CONFIG_DIR/{id_rsa,id_rsa.pub,authorized_keys} /home/vcap/.ssh
chmod 0600 /home/vcap/.ssh/{id_rsa,id_rsa.pub,authorized_keys}

cat /etc/sudoers | grep "vcap ALL=(ALL) NOPASSWD: ALL" > /dev/null 2>&1
[ $? != 0 ] && echo "vcap ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
fi
chown -R vcap:vcap /home/vcap/.ssh
if [ $jenkins_master_id == 1 ] && [ ! -z "$jenkins_backup_address" ]; then
su - vcap -c "ssh-keygen -f /home/vcap/.ssh/known_hosts -R $jenkins_backup_address"
su - vcap -c "ssh-keyscan $jenkins_backup_address >> /home/vcap/.ssh/known_hosts"
fi

# SSH Keyfile for gerrit-trigger
cp /var/vcap/jobs/jenkins/config/gerrit_trigger_key $DATA_DIR
Expand All @@ -54,6 +83,7 @@ if [ $NOT_UPGRADE_JOBS != 1 ]; then
# Update the job configs
$JOB_DIR/bin/render_jenkins_jobs
fi
chown -R vcap:vcap $DATA_DIR/jobs

# Fonts needed for graph drawing
if [ ! -e /usr/share/fonts/truetype/ttf-dejavu ];
Expand Down
71 changes: 71 additions & 0 deletions jobs/jenkins/templates/run_backup
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/bin/bash
RUN_DIR=/var/vcap/sys/run/jenkins
DATA_DIR=/var/vcap/store/jenkins
LOG_DIR=/var/vcap/store/log
BACKUP_LOG=jenkins_backup.log
TERMINATE_BACKUP=/tmp/terminate_backup
MONIT_FILE=/var/vcap/jobs/jenkins/jenkins.monitrc

log() {
timenow=`date "+%Y-%m-%d %H:%M:%S"`
find $LOG_DIR -maxdepth 1 -ctime +5 -name $BACKUP_LOG | grep "$BACKUP_LOG"
[ $? == 0 ] && mv $LOG_DIR/$BACKUP_LOG $LOG_DIR/${BACKUP_LOG}.1
echo "$timenow $1" >> $LOG_DIR/$BACKUP_LOG
}

jenkins_master_id=1
jenkins_address=<%= properties.jenkins.address %>
/sbin/ifconfig | grep $jenkins_address
[ $? == 0 ] || jenkins_master_id=2

jenkins_backup_address=<%= properties.jenkins.backup_address %>
[ -z "$jenkins_backup_address" ] && exit 1

if [ $jenkins_master_id == 2 ]; then
cat $MONIT_FILE | grep "$jenkins_backup_address"
if [ $? != 0 ]; then
cat $MONIT_FILE | sed -e "s/$jenkins_address/$jenkins_backup_address/g" > /tmp/jenkins.monitrc
sudo mv /tmp/jenkins.monitrc $MONIT_FILE
sudo /var/vcap/bosh/bin/monit reload
sleep 20
sudo /var/vcap/bosh/bin/monit restart all
fi
[ -f $TERMINATE_BACKUP ] && rm $TERMINATE_BACKUP
exit 0
fi

LAST_BAK_FILE=$DATA_DIR/last_backup_time
LAST_RESTART_FILE=$DATA_DIR/last_restart_time
backup_interval=<%= properties.jenkins.backup_interval || 20 %> #unit: min
restart_interval=<%= properties.jenkins.restart_interval || 12 %> #unit: hour
backup_interval_secs=$((backup_interval * 60))
restart_interval_secs=$((restart_interval * 60 * 60))

[ -f $TERMINATE_BACKUP ] && rm $TERMINATE_BACKUP
while true
do
sleep 20
[ -f $TERMINATE_BACKUP ] && exit 0

last_backup=0
[ -f $LAST_BAK_FILE ] && last_backup=`head -1 $LAST_BAK_FILE`

last_restart=0
[ -f $LAST_RESTART_FILE ] && last_restart=`head -1 $LAST_RESTART_FILE`

now=`date +%s`
ps $JENKINS_PID
if [ $? == 0 ] && (( $now - $last_backup > $backup_interval_secs )); then
log "Prepare for starting sync to $jenkins_backup_address:$DATA_DIR/"
rsync -avz $DATA_DIR/jobs vcap@$jenkins_backup_address:$DATA_DIR/ | grep "jobs" >> $LOG_DIR/$BACKUP_LOG
echo $now > $LAST_BAK_FILE
fi

now=`date +%s`
ps $JENKINS_PID
if [ $? == 0 ] && (( $now - $last_restart > $restart_interval_secs )); then
log "Retart remote jenkins $jenkins_backup_address"
ssh vcap@$jenkins_backup_address "sudo /var/vcap/bosh/bin/monit restart jenkins"
echo $now > $LAST_RESTART_FILE
fi
done
33 changes: 33 additions & 0 deletions jobs/jenkins/templates/standby_check.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
LOG_DIR=/var/vcap/store/log/
TERMINATE_BACKUP=/tmp/terminate_backup
STANDBY_LOG=jenkins_standby.log

log() {
timenow=`date "+%Y-%m-%d %H:%M:%S"`
find $LOG_DIR -maxdepth 1 -ctime +5 -name $STANDBY_LOG | grep "$STANDBY_LOG"
[ $? == 0 ] && mv $LOG_DIR/$STANDBY_LOG $LOG_DIR/${STANDBY_LOG}.1
echo "$timenow $1" >> $LOG_DIR/$STANDBY_LOG
}

jenkins_address=<%= properties.jenkins.address %>
/sbin/ifconfig | grep $jenkins_address
[ $? == 0 ] && exit 0

[ -f $TERMINATE_BACKUP ] && rm $TERMINATE_BACKUP
while true
do
sleep 10
[ -f $TERMINATE_BACKUP ] && exit 0

status=`curl -o /dev/null -s -w %{http_code} http://<%= properties.jenkins.address %>:<%= properties.jenkins.http_port %>`
if (( $status == 200 )); then
log "The jenkins $jenkins_address is back, cut connection to gerrit <%= properties.gerrit.address %>"
iptables -L | grep <%= properties.gerrit.address %>
[ $? == 0 ] || iptables -A OUTPUT -p ALL -d <%= properties.gerrit.address %> -j DROP
else
log "The jenkins $jenkins_address is error due to $status, open connection to gerrit <%= properties.gerrit.address %>"
iptables -L | grep <%= properties.gerrit.address %>
[ $? == 0 ] && iptables -D OUTPUT 1
fi
done
9 changes: 8 additions & 1 deletion jobs/nginx/templates/nginx.conf.erb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ http {
<% end %>
<% if properties.jenkins %>
upstream jenkins {
server <%= properties.jenkins.address %>:<%= properties.jenkins.http_port || 8080 %> max_fails=<%= properties.jenkins.max_fails || 3 %> fail_timeout=<%= properties.jenkins.fail_timeout || "60s" %>;
<% if properties.jenkins.backup_address %>
server <%= properties.jenkins.backup_address %>:<%= properties.jenkins.http_port || 8080 %> backup;
<% end %>
}

server {
listen 80;
server_name "<%= properties.jenkins.external_domain %>";
Expand All @@ -78,7 +85,7 @@ http {
proxy_send_timeout 30;
proxy_read_timeout 30;

proxy_pass http://<%= properties.jenkins.address %>:<%= properties.jenkins.http_port || 8080 %>;
proxy_pass http://jenkins;
}
}
<% end %>
Expand Down

0 comments on commit 3732803

Please sign in to comment.