Skip to content

Commit

Permalink
Merge branch 'master' into no-latest
Browse files Browse the repository at this point in the history
  • Loading branch information
Noah Zoschke committed Dec 16, 2015
2 parents cbb7e32 + 9b593c8 commit f019661
Show file tree
Hide file tree
Showing 8 changed files with 559 additions and 44 deletions.
4 changes: 2 additions & 2 deletions api/controllers/processes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func TestProcessesList(t *testing.T) {
test.DescribeTaskDefinitionCycle("convox-test-cluster"),
test.DescribeContainerInstancesFilteredCycle("convox-test-cluster"),
test.DescribeInstancesFilteredCycle(),
test.ListServicesCycle("convox-test-cluster"),
test.DescribeAppStackResourcesCycle("myapp-staging"),
test.DescribeServicesCycle("convox-test-cluster"),
)
defer aws.Close()
Expand Down Expand Up @@ -70,7 +70,7 @@ func TestGetProcessesWithDeployments(t *testing.T) {
test.DescribeTaskDefinitionCycle("convox-test-cluster"),
test.DescribeContainerInstancesFilteredCycle("convox-test-cluster"),
test.DescribeInstancesFilteredCycle(),
test.ListServicesCycle("convox-test-cluster"),
test.DescribeAppStackResourcesCycle("myapp-staging"),
test.DescribeServicesWithDeploymentsCycle("convox-test-cluster"),
test.DescribeTaskDefinition3Cycle("convox-test-cluster"),
test.DescribeTaskDefinition1Cycle("convox-test-cluster"),
Expand Down
7 changes: 7 additions & 0 deletions api/models/formation.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,13 @@ func SetFormation(app, process, count, memory string) error {
params[fmt.Sprintf("%sMemory", UpperName(process))] = memory
}

NotifySuccess("release:scale", map[string]string{
"app": rel.App,
"id": rel.Id,
})

go rel.Monitor()

return a.UpdateParams(params)
}

Expand Down
122 changes: 104 additions & 18 deletions api/models/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,37 @@ type Process struct {

type Processes []Process

var DEPLOYMENT_TIMEOUT = 10 * time.Minute

func GetServices(app string) ([]*ecs.Service, error) {
services := []*ecs.Service{}

resources, err := ListResources(app)

if err != nil {
return services, err
}

arns := []*string{}

for _, r := range resources {
if r.Type == "Custom::ECSService" {
arns = append(arns, aws.String(r.Id))
}
}

dres, err := ECS().DescribeServices(&ecs.DescribeServicesInput{
Cluster: aws.String(os.Getenv("CLUSTER")),
Services: arns,
})

if err != nil {
return services, err
}

return dres.Services, nil
}

func ListProcesses(app string) (Processes, error) {
_, err := GetApp(app)

Expand Down Expand Up @@ -128,29 +159,13 @@ func ListProcesses(app string) (Processes, error) {
func ListPendingProcesses(app string) (Processes, error) {
pss := Processes{}

// Describe all services
lsres, err := ECS().ListServices(&ecs.ListServicesInput{
Cluster: aws.String(os.Getenv("CLUSTER")),
})

if err != nil {
return nil, err
}

dsres, err := ECS().DescribeServices(&ecs.DescribeServicesInput{
Cluster: aws.String(os.Getenv("CLUSTER")),
Services: lsres.ServiceArns,
})
services, err := GetServices(app)

if err != nil {
return nil, err
}

for _, service := range dsres.Services {
if !strings.HasPrefix(*service.ServiceName, fmt.Sprintf("%s-", app)) {
continue
}

for _, service := range services {
// Test every service deployment for running != pending, to put in a placeholder
for _, d := range service.Deployments {
if *d.DesiredCount == *d.RunningCount {
Expand Down Expand Up @@ -192,6 +207,77 @@ func ListPendingProcesses(app string) (Processes, error) {
return pss, nil
}

// Determine the deployment state based on the state of all the services.
func AppDeploymentState(serviceStates []string) string {
severity := map[string]int{
"finished": 0,
"pending": 1,
"warning": 2,
"timeout": 3,
}

max := 0
state := "finished"

for i := 0; i < len(serviceStates); i++ {
s := serviceStates[i]
if severity[s] > max {
max = severity[s]
state = s
}
}

return state
}

// Determine the deployment state based on the events that occurred between
// the Deployment.StartedAt and now. For testing purposes take an optional
// time to compare to.
func ServiceDeploymentState(s *ecs.Service, at ...time.Time) string {
now := time.Now()

if len(at) > 0 {
now = at[0]
}

// get latest deployment, event, message
deployment := s.Deployments[0]
event := s.Events[0]
message := *event.Message

fmt.Printf("ServiceDeploymentState message=%q event.CreatedAt=%q deploy.CreatedAt=%q now=%q\n", message, event.CreatedAt, deployment.CreatedAt, now)

window := now.Add(-DEPLOYMENT_TIMEOUT)

if deployment.CreatedAt.Before(window) {
return "timeout"
}

if deployment.CreatedAt.After(*event.CreatedAt) {
return "pending"
}

if strings.HasSuffix(message, "reached a steady state.") {
return "finished"
}

if strings.HasSuffix(message, "see the Troubleshooting section of the Amazon ECS Developer Guide.") {
return "warning"
}

return "pending"
}

func ServicesDeploymentStates(services []*ecs.Service) []string {
states := []string{}

for i := 0; i < len(services); i++ {
states = append(states, ServiceDeploymentState(services[i]))
}

return states
}

func fetchProcess(app string, task ecs.Task, td ecs.TaskDefinition, cd ecs.ContainerDefinition, c ecs.Container, psch chan Process, errch chan error) {
idp := strings.Split(*c.ContainerArn, "-")
id := idp[len(idp)-1]
Expand Down
93 changes: 93 additions & 0 deletions api/models/process_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package models_test

import (
"bytes"
"os"
"testing"
"time"

"github.com/convox/rack/Godeps/_workspace/src/github.com/aws/aws-sdk-go/aws"
"github.com/convox/rack/Godeps/_workspace/src/github.com/aws/aws-sdk-go/private/protocol/json/jsonutil"
"github.com/convox/rack/Godeps/_workspace/src/github.com/aws/aws-sdk-go/service/ecs"

"github.com/convox/rack/api/models"
"github.com/convox/rack/test"
"github.com/stretchr/testify/assert"
)

func TestGetServices(t *testing.T) {
os.Setenv("RACK", "convox-test")
os.Setenv("CLUSTER", "convox-test")

aws := test.StubAws(
test.HttpdDescribeStackResourcesCycle(),
test.HttpdDescribeServicesCycle(),
)
defer aws.Close()

services, err := models.GetServices("httpd")
assert.Nil(t, err)
assert.Equal(t, 1, len(services))

s := services[0]
assert.Equal(t, "arn:aws:ecs:us-west-2:901416387788:service/httpd-web-SRZPVERKQOL", *s.ServiceArn)
}

func TestServiceDeploymentState(t *testing.T) {
out := ecs.DescribeServicesOutput{}
err := jsonutil.UnmarshalJSON(&out, bytes.NewBufferString(test.HttpdDescribeServicesResponse()))
assert.Nil(t, err)

s := out.Services[0]
eventAt := *s.Events[0].CreatedAt

// final event is "(service httpd-web-SRZPVERKQOL) has reached a steady state."
assert.Equal(t, 4, len(s.Events))
assert.Equal(t, "finished", models.ServiceDeploymentState(s, eventAt))

// shift current event back to "(service httpd-web-SRZPVERKQOL) registered 1 instances in (elb httpd)"
s.Events = s.Events[1:]
assert.Equal(t, 3, len(s.Events))
assert.Equal(t, "pending", models.ServiceDeploymentState(s, eventAt))

// unshift a scheduler warning
s.Events = append([]*ecs.ServiceEvent{
&ecs.ServiceEvent{
CreatedAt: aws.Time(eventAt),
Message: aws.String("service httpd-web-SRZPVERKQOL was unable to place a task because no container instance met all of its requirements. The closest matching container-instance b1a73168-f8a6-4ed9-b69e-94adc7a0f1e0 has insufficient memory available. For more information, see the Troubleshooting section of the Amazon ECS Developer Guide."),
},
}, s.Events...)
assert.Equal(t, 4, len(s.Events))
assert.Equal(t, "warning", models.ServiceDeploymentState(s, eventAt))

// unshift a Deployment that started after the last event
newDeployAt := eventAt.Add(10 * time.Second)
s.Deployments = append([]*ecs.Deployment{
&ecs.Deployment{
Status: aws.String("PRIMARY"),
CreatedAt: aws.Time(newDeployAt),
},
}, s.Deployments...)

assert.Equal(t, "pending", models.ServiceDeploymentState(s, newDeployAt))

// compare deployment start to now which is > 10m after latest event
assert.Equal(t, "pending", models.ServiceDeploymentState(s, newDeployAt.Add(models.DEPLOYMENT_TIMEOUT)))
assert.Equal(t, "timeout", models.ServiceDeploymentState(s, newDeployAt.Add(models.DEPLOYMENT_TIMEOUT+1*time.Second)))
}

func TestAppDeploymentState(t *testing.T) {
assert.Equal(t, "finished", models.AppDeploymentState([]string{"finished", "finished"}))
assert.Equal(t, "pending", models.AppDeploymentState([]string{"finished", "pending"}))
assert.Equal(t, "warning", models.AppDeploymentState([]string{"finished", "warning"}))
assert.Equal(t, "timeout", models.AppDeploymentState([]string{"finished", "timeout"}))

assert.Equal(t, "pending", models.AppDeploymentState([]string{"pending", "pending"}))
assert.Equal(t, "warning", models.AppDeploymentState([]string{"pending", "warning"}))
assert.Equal(t, "timeout", models.AppDeploymentState([]string{"pending", "timeout"}))

assert.Equal(t, "warning", models.AppDeploymentState([]string{"warning", "warning"}))
assert.Equal(t, "timeout", models.AppDeploymentState([]string{"warning", "timeout"}))

assert.Equal(t, "timeout", models.AppDeploymentState([]string{"timeout", "timeout"}))
}
44 changes: 44 additions & 0 deletions api/models/release.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,48 @@ func (r *Release) Save() error {
return S3Put(app.Outputs["Settings"], fmt.Sprintf("releases/%s/env", r.Id), env, true)
}

// Monitor the ECS services and events every 30s and and send notifications.
// If there is a scheduling error, warn once.
// Stop monitoring and report a final state when everything finished or after a 10m timeout.
func (r *Release) Monitor() {
fmt.Printf("release monitor app=%s id=%s\n", r.App, r.Id)

data := map[string]string{"id": r.Id, "app": r.App}
warned := false

for i := 0; i < 20; i++ {
services, err := GetServices(r.App)

if err != nil {
fmt.Printf("error: %+v\n", err)
time.Sleep(30 * time.Second)
continue
}

state := AppDeploymentState(ServicesDeploymentStates(services))

fmt.Printf("release monitor app=%s id=%s i=%d state=%s\n", r.App, r.Id, i, state)

switch state {
case "finished":
NotifySuccess("release:finish", data)
return

case "timeout":
NotifyError("release:timeout", fmt.Errorf("Error: Deploy did not complete in 10m."), data)
return

case "failed":
if !warned {
NotifyError("release:warn", fmt.Errorf("Warning: deploy failed to place resources."), data)
}
warned = true
}

time.Sleep(30 * time.Second)
}
}

func (r *Release) Promote() error {
formation, err := r.Formation()

Expand Down Expand Up @@ -223,6 +265,8 @@ func (r *Release) Promote() error {
"id": r.Id,
})

go r.Monitor()

return err
}

Expand Down
27 changes: 4 additions & 23 deletions api/models/resource.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package models

import (
"strings"
"time"

"github.com/convox/rack/Godeps/_workspace/src/github.com/aws/aws-sdk-go/aws"
Expand All @@ -22,13 +21,15 @@ type Resource struct {
type Resources map[string]Resource

func ListResources(app string) (Resources, error) {
res, err := CloudFormation().DescribeStackResources(&cloudformation.DescribeStackResourcesInput{StackName: aws.String(app)})
res, err := CloudFormation().DescribeStackResources(&cloudformation.DescribeStackResourcesInput{
StackName: aws.String(app),
})

if err != nil {
return nil, err
}

resources := make(Resources)
resources := make(Resources, len(res.StackResources))

for _, r := range res.StackResources {
resources[*r.LogicalResourceId] = Resource{
Expand All @@ -43,23 +44,3 @@ func ListResources(app string) (Resources, error) {

return resources, nil
}

func ListProcessResources(app, process string) (Resources, error) {
res, err := ListResources(app)

if err != nil {
return nil, err
}

resources := make(Resources)

prefix := UpperName(process)

for name, resource := range res {
if strings.HasPrefix(name, prefix) {
resources[name] = resource
}
}

return resources, nil
}

0 comments on commit f019661

Please sign in to comment.