Skip to content

Commit

Permalink
Merge pull request #135 from buildkite/lifecycled-the-second
Browse files Browse the repository at this point in the history
Replace lifecycled without SNS/SQS
  • Loading branch information
toolmantim committed Sep 6, 2016
2 parents b57f5f9 + 285bdc0 commit 0b8847d
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 79 deletions.
1 change: 0 additions & 1 deletion README.md
Expand Up @@ -88,7 +88,6 @@ Adding extra tags to the stack (including the EC2 instances) can be done via `ex
* [aws-cli](https://aws.amazon.com/cli/) - useful for performing any ops-related tasks
* [jq](https://stedolan.github.io/jq/) - useful for manipulating JSON responses from cli tools such as aws-cli or the Buildkite API
* [docker-gc](https://github.com/spotify/docker-gc) - removes old docker images
* [lifecycled](https://github.com/lox/lifecycled) - manages AWS autoscaling events

## What Type of Builds Does This Support?

Expand Down
4 changes: 2 additions & 2 deletions packer/buildkite-ami.json
Expand Up @@ -8,7 +8,7 @@
"instance_type": "c4.large",
"ssh_username": "ec2-user",
"ami_name": "buildkite-stack-{{isotime | clean_ami_name}}",
"ami_description": "Buildkite CloudFormation Stack base image (Amazon Linux, buildkite-agent, docker, docker-compose, docker-gc, jq, lifecycled)",
"ami_description": "Buildkite CloudFormation Stack base image (Amazon Linux, buildkite-agent, docker, docker-compose, docker-gc, jq)",
"ami_groups": ["all"]
}
],
Expand All @@ -28,7 +28,7 @@
},
{
"type": "shell",
"script": "scripts/install-lifecycled.sh"
"script": "scripts/install-terminationd.sh"
},
{
"type": "shell",
Expand Down
55 changes: 55 additions & 0 deletions packer/conf/terminationd/bin/terminationd
@@ -0,0 +1,55 @@
#!/bin/bash

set -eu -o pipefail

# This script is a daemon that monitors for the instance’s autoscaling state
# group to put it into a `terminating:wait` state, and then runs a hook before
# telling the autoscaling group to proceed with the termination. This allows
# us to wait for all buildkite-agent jobs to complete on the instance before
# termination.
#
# Expects /etc/terminationd/hook to be installed and executable

AWS_REGION="$(/opt/aws/bin/ec2-metadata --availability-zone | cut -d " " -f 2 | head -c -2)"
INSTANCE_ID="$(/opt/aws/bin/ec2-metadata --instance-id | cut -d " " -f 2)"

echo "Monitoring instance state for ${INSTANCE_ID}"

while true; do
LIFECYCLE_STATE="$(aws --region "${AWS_REGION}" autoscaling describe-auto-scaling-instances \
--instance-ids "${INSTANCE_ID}" \
--query AutoScalingInstances[].LifecycleState --output text)"

echo "Lifecycle state is ${LIFECYCLE_STATE}"

if [[ "${LIFECYCLE_STATE}" == "Terminating:Wait" ]]; then
break
else
sleep 15 # seconds
fi
done

echo "Running terminationd hook"

/etc/terminationd/hook || true

echo "Fetching autoscaling group name"

AUTO_SCALING_GROUP_NAME="$(aws --region "${AWS_REGION}" ec2 describe-tags \
--filters "Name=resource-id,Values=${INSTANCE_ID}" \
"Name=key,Values=aws:autoscaling:groupName" \
--query "Tags[].Value" --output text)"

echo "Fetching lifecycle hook name"

LIFECYCLE_HOOK_NAME="$(aws --region "${AWS_REGION}" autoscaling describe-lifecycle-hooks \
--auto-scaling-group-name "${AUTO_SCALING_GROUP_NAME}" \
--query "LifecycleHooks[].LifecycleHookName" --output text)"

echo "Completing lifecycle action for ${AUTO_SCALING_GROUP_NAME} ${LIFECYCLE_HOOK_NAME}"

aws --region "${AWS_REGION}" autoscaling complete-lifecycle-action \
--auto-scaling-group-name "${AUTO_SCALING_GROUP_NAME}" \
--lifecycle-hook-name "${LIFECYCLE_HOOK_NAME}" \
--instance-id "${INSTANCE_ID}" \
--lifecycle-action-result CONTINUE
15 changes: 15 additions & 0 deletions packer/conf/terminationd/upstart/terminationd.conf
@@ -0,0 +1,15 @@
start on runlevel [2345]

stop on runlevel [!2345]

respawn

script
set -a
mkfifo /tmp/terminationd-log-fifo
( logger -t terminationd </tmp/terminationd-log-fifo & )
exec >/tmp/terminationd-log-fifo
exec 2>&1
rm /tmp/terminationd-log-fifo
exec /usr/bin/terminationd
end script
5 changes: 4 additions & 1 deletion packer/scripts/install-buildkite-agent.sh
Expand Up @@ -60,4 +60,7 @@ sudo mkdir -p /var/lib/buildkite-agent/plugins
sudo chown -R buildkite-agent: /var/lib/buildkite-agent/plugins

echo "Adding init.d template..."
sudo cp /tmp/conf/buildkite-agent/init.d/buildkite-agent /etc/buildkite-agent/init.d.tmpl
sudo cp /tmp/conf/buildkite-agent/init.d/buildkite-agent /etc/buildkite-agent/init.d.tmpl

echo "Adding termationd hook..."
sudo cp /tmp/conf/buildkite-agent/terminationd/hook /etc/terminationd/hook
17 changes: 0 additions & 17 deletions packer/scripts/install-lifecycled.sh

This file was deleted.

7 changes: 7 additions & 0 deletions packer/scripts/install-terminationd.sh
@@ -0,0 +1,7 @@
#!/bin/bash

set -eu -o pipefail

sudo mkdir /etc/terminationd
sudo cp /tmp/conf/terminationd/bin/* /usr/bin
sudo cp /tmp/conf/terminationd/upstart/* /etc/init
2 changes: 1 addition & 1 deletion packer/scripts/install-utils.sh
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/bash

set -eu -o pipefail

Expand Down
86 changes: 29 additions & 57 deletions templates/buildkite-elastic.yml
Expand Up @@ -203,13 +203,6 @@ Conditions:
UseSpecifiedIamPolicies:
!Not [ !Equals [ !Join [ "", $(ManagedPolicyArns) ], "" ] ]

Outputs:
AgentAutoScaleTopic:
Value: $(AgentAutoScaleTopic)

AgentAutoScaleTopicName:
Value: $(AgentAutoScaleTopic[TopicName])

Resources:
# Allow ec2 instances to assume a role and be granted the IAMPolicies
IAMInstanceProfile:
Expand Down Expand Up @@ -241,21 +234,18 @@ Resources:
- cloudwatch:PutMetricData
- cloudformation:DescribeStackResource
- ec2:DescribeTags
- autoscaling:CompleteLifecycleAction
- autoscaling:DescribeAutoScalingInstances
- autoscaling:DescribeLifecycleHooks
- autoscaling:RecordLifecycleActionHeartbeat
- autoscaling:CompleteLifecycleAction
Resource: "*"
- Effect: Allow
Action:
- sqs:*
Resource: $(AgentLifecycleQueue[Arn])
- Effect: Allow
Action:
- logs:CreateLogGroup
- logs:CreateLogStream
- logs:PutLogEvents
- logs:DescribeLogStreams
Resource:
- "arn:aws:logs:*:*:*"
Resource: "*"

Roles:
- $(IAMRole)
Expand Down Expand Up @@ -341,28 +331,14 @@ Resources:
#!/bin/bash -eu
cat << EOF > /var/lib/buildkite-agent/cfn-env
BUILDKITE_STACK_NAME=$(AWS::StackName)
BUILDKITE_SECRETS_BUCKET=$(SecretsBucket)
BUILDKITE_AGENTS_PER_INSTANCE=$(AgentsPerInstance)
BUILDKITE_STACK_NAME="$(AWS::StackName)"
BUILDKITE_SECRETS_BUCKET="$(SecretsBucket)"
BUILDKITE_AGENTS_PER_INSTANCE="$(AgentsPerInstance)"
EOF
chown buildkite-agent: /var/lib/buildkite-agent/cfn-env
chown buildkite-agent /var/lib/buildkite-agent/cfn-env
02-write-lifecycled-config:
command: |
#!/bin/bash -eu
cat << EOF > /etc/lifecycled
AWS_REGION=$(AWS::Region)
LIFECYCLED_DEBUG=true
LIFECYCLED_QUEUE=$(AgentLifecycleQueue)
LIFECYCLED_INSTANCEID=\$(/opt/aws/bin/ec2-metadata --instance-id | cut -d " " -f 2)
LIFECYCLED_HANDLER=/usr/bin/buildkite-agent-lifecycled-handler
EOF
start lifecycled
03-restart-docker:
02-restart-docker:
command: |
#!/bin/bash -eu
Expand All @@ -374,16 +350,14 @@ Resources:
service docker start || ( cat /var/log/docker && false )
sleep 10 && docker info
04-install-buildkite:
03-install-buildkite:
command: |
#!/bin/bash -eu
# Choose the right binary
ln -s /usr/bin/buildkite-agent-$(BuildkiteAgentRelease) /usr/bin/buildkite-agent
# Setup the buildkite-agent config
INSTANCE_ID=\$(/opt/aws/bin/ec2-metadata --instance-id | cut -d " " -f 2)
DOCKER_VERSION=\$(docker --version | cut -f3 -d' ' | sed 's/,//')
Expand All @@ -408,7 +382,6 @@ Resources:
chown buildkite-agent: /etc/buildkite-agent/buildkite-agent.cfg
# Setup logging first so we capture everything
for i in \$(seq 1 $(AgentsPerInstance)); do
touch /var/log/buildkite-agent-\${i}.log
Expand All @@ -423,7 +396,8 @@ Resources:
service awslogs restart
05-fetch-authorized-users:
04-fetch-authorized-users:
test: test -n "$(AuthorizedUsersUrl)"
command: |
#!/bin/bash -eu
Expand All @@ -439,25 +413,32 @@ Resources:
/etc/cron.hourly/authorized_keys
06-run-bootstrap-script:
05-run-bootstrap-script:
test: test -n "$(BootstrapScriptUrl)"
command: |
#!/bin/bash -euo pipefail
#!/bin/bash -eu
curl -sSL "$(BootstrapScriptUrl)" | bash
07-start-agent:
06-start-the-world:
command: |
# Setup and start services
#!/bin/bash -eu
# Start services
for i in \$(seq 1 $(AgentsPerInstance)); do
cp /etc/buildkite-agent/init.d.tmpl /etc/init.d/buildkite-agent-\${i}
service buildkite-agent-\${i} start
chkconfig --add buildkite-agent-\${i}
done
AgentLifecycleQueue:
Type: AWS::SQS::Queue
# Make sure terminationd is started if it isn't
start terminationd || true
# XXX: We don't even want a topic or role, but CloudFormation requires them
# to create a lifecycle hook.
AgentLifecycleTopic:
Type: AWS::SNS::Topic

AgentLifecycleHookRole:
Type: AWS::IAM::Role
Expand All @@ -474,21 +455,19 @@ Resources:
Statement:
- Effect: Allow
Action:
- sqs:*
Resource: $(AgentLifecycleQueue[Arn])
- sns:Publish
Resource: $(AgentLifecycleTopic)
Path: /

AgentLifecycleHook:
Type: AWS::AutoScaling::LifecycleHook
Properties:
AutoScalingGroupName: $(AgentAutoScaleGroup)
LifecycleTransition: autoscaling:EC2_INSTANCE_TERMINATING
NotificationTargetARN: $(AgentLifecycleQueue[Arn])
DefaultResult: CONTINUE
NotificationTargetARN: $(AgentLifecycleTopic)
RoleARN: $(AgentLifecycleHookRole[Arn])

AgentAutoScaleTopic:
Type: AWS::SNS::Topic

AgentAutoScaleGroup:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
Expand All @@ -500,13 +479,6 @@ Resources:
LaunchConfigurationName: $(AgentLaunchConfiguration)
MinSize: $(MinSize)
MaxSize: $(MaxSize)
NotificationConfigurations:
- TopicARN: $(AgentAutoScaleTopic)
NotificationTypes:
- "autoscaling:EC2_INSTANCE_LAUNCH"
- "autoscaling:EC2_INSTANCE_LAUNCH_ERROR"
- "autoscaling:EC2_INSTANCE_TERMINATE"
- "autoscaling:EC2_INSTANCE_TERMINATE_ERROR"
MetricsCollection:
- Granularity: 1Minute
Metrics:
Expand Down

0 comments on commit 0b8847d

Please sign in to comment.