Permalink
Browse files

Multi-jenkins to eliminate jenkins down time

Change-Id: I462c7db115f9728901f3576bb97820cbcbf50f68
  • Loading branch information...
eveningwear committed Sep 3, 2012
1 parent 3b1b430 commit 37328034a5c469e4cb2df846ad2c00d61279dfeb
View
@@ -3,7 +3,9 @@ name: jenkins
templates:
jenkins_ctl: bin/jenkins_ctl
jenkins_init.ctl.erb: bin/jenkins_init.ctl
+ standby_check.erb: bin/standby_check
zabbix_agent_ctl: bin/zabbix_agent_ctl
+ run_backup: bin/run_backup
change_tmp: bin/change_tmp
id_rsa.erb: config/id_rsa
id_rsa.pub.erb: config/id_rsa.pub
@@ -16,6 +18,7 @@ templates:
hudson.tasks.Maven.xml: config/hudson.tasks.Maven.xml
gerrit_trigger_key.erb: config/gerrit_trigger_key
render_jenkins_jobs.erb: bin/render_jenkins_jobs
+ authorized_keys.erb: config/authorized_keys
packages:
- java
- git
@@ -0,0 +1 @@
+<%= properties.jenkins.rsa_keys.public %>
@@ -7,6 +7,7 @@ DATA_DIR=/var/vcap/store/jenkins
RUN_DIR=/var/vcap/sys/run/jenkins
LOG_DIR=/var/vcap/store/log/
WEBROOT=/var/vcap/store/run/jenkins
+TERMINATE_BACKUP=/tmp/terminate_backup
JENKINS_HOME=$DATA_DIR
PIDFILE=$RUN_DIR/jenkins.pid
@@ -33,11 +34,17 @@ case "$1" in
# Add Slaves configs etc
$JOB_DIR/bin/jenkins_init.ctl
+ $JOB_DIR/bin/standby_check &
+
+ su - vcap -c "$JOB_DIR/bin/run_backup" &
+
su - vcap -c "JENKINS_HOME=$DATA_DIR LD_LIBRARY_PATH=$PACKAGE_DIR/lib:$LD_LIBRARY_PATH PATH=$JAVA_DIR/bin/:$GIT_DIR/bin/:$PATH $COMMAND >>$LOG_DIR/jenkins.stdout.log 2>>$LOG_DIR/jenkins.stderr.log"
;;
stop)
echo "Stopping Jenkins "
+ touch $TERMINATE_BACKUP
+ chown vcap:vcap $TERMINATE_BACKUP
PID=$(head -1 $PIDFILE)
killall java
@@ -9,12 +9,29 @@ NOT_UPGRADE_SITE_CONFIG=<%= properties.jenkins.not_upgrade_site_config||0 %>
NOT_UPGRADE_JOBS=<%= properties.jenkins.not_upgrade_jobs||0 %>
ENABLE_ZABBIX_AGENT=<%= properties.jenkins.enable_zabbix_agent||0 %>
-if [ $NOT_UPGRADE_SITE_CONFIG != 1 ]; then
+jenkins_address=<%= properties.jenkins.address %>
+jenkins_backup_address=<%= properties.jenkins.backup_address %>
+/sbin/ifconfig | grep $jenkins_address
+if [ $? == 0 ]; then
+ jenkins_master_id=1
+else
+ jenkins_master_id=2
+fi
+
+# For the standby jenkins, always update the config
+if [ $NOT_UPGRADE_SITE_CONFIG != 1 ] || [ $jenkins_master_id == 2 ]; then
# Grab all the slaves to the master config file
cat $JOB_DIR/config/config_head > $JOB_DIR/config/config.xml
+ slave_address="<%= properties.jenkins.slaves_ip.join(' ') %>"
+ <%if properties.jenkins.backup_slaves_ip %>
+ if [ $jenkins_master_id == 2 ]; then
+ slave_address="<%= properties.jenkins.backup_slaves_ip.join(' ') %>"
+ fi
+ <% end %>
+
slave_id=1
- for ip_addr in <%= properties.jenkins_slave.slaves_ip.join(' ') %>
+ for ip_addr in $slave_address
do
cat $JOB_DIR/config/config_slave|sed -e "s/SLAVE_CONFIG_NAME/jenk${slave_id}/g" -e "s/SLAVE_CONFIG_IP/${ip_addr}/g" >> $JOB_DIR/config/config.xml
slave_id=`expr $slave_id + 1`
@@ -41,9 +58,21 @@ chown -R vcap:vcap $DATA_DIR/plugins/
# Passwdless ssh host key pair(for slave)
mkdir -p /home/vcap/.ssh
-cp $CONFIG_DIR/{id_rsa,id_rsa.pub} /home/vcap/.ssh
-chmod 0600 /home/vcap/.ssh/{id_rsa,id_rsa.pub}
+if [ $jenkins_master_id == 1 ]; then
+ cp $CONFIG_DIR/{id_rsa,id_rsa.pub} /home/vcap/.ssh
+ chmod 0600 /home/vcap/.ssh/{id_rsa,id_rsa.pub}
+else
+ cp $CONFIG_DIR/{id_rsa,id_rsa.pub,authorized_keys} /home/vcap/.ssh
+ chmod 0600 /home/vcap/.ssh/{id_rsa,id_rsa.pub,authorized_keys}
+
+ cat /etc/sudoers | grep "vcap ALL=(ALL) NOPASSWD: ALL" > /dev/null 2>&1
+ [ $? != 0 ] && echo "vcap ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
+fi
chown -R vcap:vcap /home/vcap/.ssh
+if [ $jenkins_master_id == 1 ] && [ ! -z "$jenkins_backup_address" ]; then
+ su - vcap -c "ssh-keygen -f /home/vcap/.ssh/known_hosts -R $jenkins_backup_address"
+ su - vcap -c "ssh-keyscan $jenkins_backup_address >> /home/vcap/.ssh/known_hosts"
+fi
# SSH Keyfile for gerrit-trigger
cp /var/vcap/jobs/jenkins/config/gerrit_trigger_key $DATA_DIR
@@ -54,6 +83,7 @@ if [ $NOT_UPGRADE_JOBS != 1 ]; then
# Update the job configs
$JOB_DIR/bin/render_jenkins_jobs
fi
+chown -R vcap:vcap $DATA_DIR/jobs
# Fonts needed for graph drawing
if [ ! -e /usr/share/fonts/truetype/ttf-dejavu ];
@@ -0,0 +1,71 @@
+#!/bin/bash
+RUN_DIR=/var/vcap/sys/run/jenkins
+DATA_DIR=/var/vcap/store/jenkins
+LOG_DIR=/var/vcap/store/log
+BACKUP_LOG=jenkins_backup.log
+TERMINATE_BACKUP=/tmp/terminate_backup
+MONIT_FILE=/var/vcap/jobs/jenkins/jenkins.monitrc
+
+log() {
+ timenow=`date "+%Y-%m-%d %H:%M:%S"`
+ find $LOG_DIR -maxdepth 1 -ctime +5 -name $BACKUP_LOG | grep "$BACKUP_LOG"
+ [ $? == 0 ] && mv $LOG_DIR/$BACKUP_LOG $LOG_DIR/${BACKUP_LOG}.1
+ echo "$timenow $1" >> $LOG_DIR/$BACKUP_LOG
+}
+
+jenkins_master_id=1
+jenkins_address=<%= properties.jenkins.address %>
+/sbin/ifconfig | grep $jenkins_address
+[ $? == 0 ] || jenkins_master_id=2
+
+jenkins_backup_address=<%= properties.jenkins.backup_address %>
+[ -z "$jenkins_backup_address" ] && exit 1
+
+if [ $jenkins_master_id == 2 ]; then
+ cat $MONIT_FILE | grep "$jenkins_backup_address"
+ if [ $? != 0 ]; then
+ cat $MONIT_FILE | sed -e "s/$jenkins_address/$jenkins_backup_address/g" > /tmp/jenkins.monitrc
+ sudo mv /tmp/jenkins.monitrc $MONIT_FILE
+ sudo /var/vcap/bosh/bin/monit reload
+ sleep 20
+ sudo /var/vcap/bosh/bin/monit restart all
+ fi
+ [ -f $TERMINATE_BACKUP ] && rm $TERMINATE_BACKUP
+ exit 0
+fi
+
+LAST_BAK_FILE=$DATA_DIR/last_backup_time
+LAST_RESTART_FILE=$DATA_DIR/last_restart_time
+backup_interval=<%= properties.jenkins.backup_interval || 20 %> #unit: min
+restart_interval=<%= properties.jenkins.restart_interval || 12 %> #unit: hour
+backup_interval_secs=$((backup_interval * 60))
+restart_interval_secs=$((restart_interval * 60 * 60))
+
+[ -f $TERMINATE_BACKUP ] && rm $TERMINATE_BACKUP
+while true
+do
+ sleep 20
+ [ -f $TERMINATE_BACKUP ] && exit 0
+
+ last_backup=0
+ [ -f $LAST_BAK_FILE ] && last_backup=`head -1 $LAST_BAK_FILE`
+
+ last_restart=0
+ [ -f $LAST_RESTART_FILE ] && last_restart=`head -1 $LAST_RESTART_FILE`
+
+ now=`date +%s`
+ ps $JENKINS_PID
+ if [ $? == 0 ] && (( $now - $last_backup > $backup_interval_secs )); then
+ log "Prepare for starting sync to $jenkins_backup_address:$DATA_DIR/"
+ rsync -avz $DATA_DIR/jobs vcap@$jenkins_backup_address:$DATA_DIR/ | grep "jobs" >> $LOG_DIR/$BACKUP_LOG
+ echo $now > $LAST_BAK_FILE
+ fi
+
+ now=`date +%s`
+ ps $JENKINS_PID
+ if [ $? == 0 ] && (( $now - $last_restart > $restart_interval_secs )); then
+ log "Retart remote jenkins $jenkins_backup_address"
+ ssh vcap@$jenkins_backup_address "sudo /var/vcap/bosh/bin/monit restart jenkins"
+ echo $now > $LAST_RESTART_FILE
+ fi
+done
@@ -0,0 +1,33 @@
+#!/bin/bash
+LOG_DIR=/var/vcap/store/log/
+TERMINATE_BACKUP=/tmp/terminate_backup
+STANDBY_LOG=jenkins_standby.log
+
+log() {
+ timenow=`date "+%Y-%m-%d %H:%M:%S"`
+ find $LOG_DIR -maxdepth 1 -ctime +5 -name $STANDBY_LOG | grep "$STANDBY_LOG"
+ [ $? == 0 ] && mv $LOG_DIR/$STANDBY_LOG $LOG_DIR/${STANDBY_LOG}.1
+ echo "$timenow $1" >> $LOG_DIR/$STANDBY_LOG
+}
+
+jenkins_address=<%= properties.jenkins.address %>
+/sbin/ifconfig | grep $jenkins_address
+[ $? == 0 ] && exit 0
+
+[ -f $TERMINATE_BACKUP ] && rm $TERMINATE_BACKUP
+while true
+do
+ sleep 10
+ [ -f $TERMINATE_BACKUP ] && exit 0
+
+ status=`curl -o /dev/null -s -w %{http_code} http://<%= properties.jenkins.address %>:<%= properties.jenkins.http_port %>`
+ if (( $status == 200 )); then
+ log "The jenkins $jenkins_address is back, cut connection to gerrit <%= properties.gerrit.address %>"
+ iptables -L | grep <%= properties.gerrit.address %>
+ [ $? == 0 ] || iptables -A OUTPUT -p ALL -d <%= properties.gerrit.address %> -j DROP
+ else
+ log "The jenkins $jenkins_address is error due to $status, open connection to gerrit <%= properties.gerrit.address %>"
+ iptables -L | grep <%= properties.gerrit.address %>
+ [ $? == 0 ] && iptables -D OUTPUT 1
+ fi
+done
@@ -62,6 +62,13 @@ http {
<% end %>
<% if properties.jenkins %>
+ upstream jenkins {
+ server <%= properties.jenkins.address %>:<%= properties.jenkins.http_port || 8080 %> max_fails=<%= properties.jenkins.max_fails || 3 %> fail_timeout=<%= properties.jenkins.fail_timeout || "60s" %>;
+ <% if properties.jenkins.backup_address %>
+ server <%= properties.jenkins.backup_address %>:<%= properties.jenkins.http_port || 8080 %> backup;
+ <% end %>
+ }
+
server {
listen 80;
server_name "<%= properties.jenkins.external_domain %>";
@@ -78,7 +85,7 @@ http {
proxy_send_timeout 30;
proxy_read_timeout 30;
- proxy_pass http://<%= properties.jenkins.address %>:<%= properties.jenkins.http_port || 8080 %>;
+ proxy_pass http://jenkins;
}
}
<% end %>

0 comments on commit 3732803

Please sign in to comment.