Skip to content

Commit

Permalink
[EXPERIMENTAL] Integration Test on Swarm
Browse files Browse the repository at this point in the history
This commit adds contrib/integration-cli-on-swarm/integration-cli-on-swarm.sh,
which enables IT to be running in parallel, using Swarm-mode and Funker.

Please refer to contrib/integration-cli-on-swarm/README.md

The test takes almost 5 to 6 minutes, with 10 n1-standard-4 GCE instances.

  $ ./contrib/integration-cli-on-swarm/integration-cli-on-swarm.sh --push-worker-image example.gcr.io/foo/bar --replicas 30 --shuffle
  2016/12/29 08:32:15 Loaded 1618 tests (30 chunks)
  2016/12/29 08:32:15 Executing 30 chunks in parallel, against "integration-cli-worker"
  2016/12/29 08:32:15 Executing chunk 0 (contains 54 test filters)
  ..
  2016/12/29 08:34:34 Finished chunk 28 [1/30] with 54 test filters in 2m19.098068003s, code=0.
  2016/12/29 08:34:38 Finished chunk 12 [2/30] with 54 test filters in 2m23.088569511s, code=0.
  2016/12/29 08:34:48 Finished chunk 10 [3/30] with 54 test filters in 2m33.880679079s, code=0.
  2016/12/29 08:34:54 Finished chunk 20 [4/30] with 54 test filters in 2m39.973747028s, code=0.
  2016/12/29 08:35:11 Finished chunk 18 [5/30] with 54 test filters in 2m56.28384361s, code=0.
  2016/12/29 08:35:11 Finished chunk 29 [6/30] with 52 test filters in 2m56.54047088s, code=0.
  2016/12/29 08:35:15 Finished chunk 1 [7/30] with 54 test filters in 3m0.285044426s, code=0.
  2016/12/29 08:35:22 Finished chunk 6 [8/30] with 54 test filters in 3m7.211775338s, code=0.
  2016/12/29 08:35:24 Finished chunk 25 [9/30] with 54 test filters in 3m9.938413009s, code=0.
  2016/12/29 08:35:30 Finished chunk 27 [10/30] with 54 test filters in 3m15.219834368s, code=0.
  2016/12/29 08:35:36 Finished chunk 9 [11/30] with 54 test filters in 3m21.615434162s, code=0.
  2016/12/29 08:35:41 Finished chunk 13 [12/30] with 54 test filters in 3m26.576907401s, code=0.
  2016/12/29 08:35:45 Finished chunk 17 [13/30] with 54 test filters in 3m30.290752537s, code=0.
  2016/12/29 08:35:53 Finished chunk 2 [14/30] with 54 test filters in 3m38.148423321s, code=0.
  2016/12/29 08:35:55 Finished chunk 24 [15/30] with 54 test filters in 3m40.09669137s, code=0.
  2016/12/29 08:35:57 Finished chunk 8 [16/30] with 54 test filters in 3m42.299945108s, code=0.
  2016/12/29 08:35:57 Finished chunk 22 [17/30] with 54 test filters in 3m42.946558809s, code=0.
  2016/12/29 08:35:59 Finished chunk 23 [18/30] with 54 test filters in 3m44.232557165s, code=0.
  2016/12/29 08:36:02 Finished chunk 3 [19/30] with 54 test filters in 3m47.112051358s, code=0.
  2016/12/29 08:36:11 Finished chunk 15 [20/30] with 54 test filters in 3m56.340656645s, code=0.
  2016/12/29 08:36:11 Finished chunk 11 [21/30] with 54 test filters in 3m56.882401231s, code=0.
  2016/12/29 08:36:22 Finished chunk 19 [22/30] with 54 test filters in 4m7.551093516s, code=0.
  2016/12/29 08:36:23 Finished chunk 21 [23/30] with 54 test filters in 4m8.221093446s, code=0.
  2016/12/29 08:36:25 Finished chunk 16 [24/30] with 54 test filters in 4m10.450451705s, code=0.
  2016/12/29 08:36:27 Finished chunk 5 [25/30] with 54 test filters in 4m12.162272692s, code=0.
  2016/12/29 08:36:28 Finished chunk 14 [26/30] with 54 test filters in 4m13.977801031s, code=0.
  2016/12/29 08:36:29 Finished chunk 0 [27/30] with 54 test filters in 4m14.34086812s, code=0.
  2016/12/29 08:36:49 Finished chunk 26 [28/30] with 54 test filters in 4m34.437085539s, code=0.
  2016/12/29 08:37:14 Finished chunk 7 [29/30] with 54 test filters in 4m59.22902721s, code=0.
  2016/12/29 08:37:20 Finished chunk 4 [30/30] with 54 test filters in 5m5.103469214s, code=0.
  2016/12/29 08:37:20 Executed 30 chunks in 5m5.104379119s. PASS: 30, FAIL: 0.

Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
  • Loading branch information
AkihiroSuda committed Feb 28, 2017
1 parent 7fb83eb commit 2879701
Show file tree
Hide file tree
Showing 24 changed files with 1,486 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Expand Up @@ -3,3 +3,5 @@ bundles
vendor/pkg
.go-pkg-cache
.git
hack/integration-cli-on-swarm/integration-cli-on-swarm

1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -31,3 +31,4 @@ man/man1
man/man5
man/man8
vendor/pkg/
hack/integration-cli-on-swarm/integration-cli-on-swarm
24 changes: 22 additions & 2 deletions Makefile
Expand Up @@ -58,10 +58,11 @@ DOCKER_MOUNT := $(if $(DOCKER_MOUNT),$(DOCKER_MOUNT),-v /go/src/github.com/docke
DOCKER_CONTAINER_NAME := $(if $(CONTAINER_NAME),--name $(CONTAINER_NAME),)

# enable package cache if DOCKER_INCREMENTAL_BINARY and DOCKER_MOUNT (i.e.DOCKER_HOST) are set
PKGCACHE_MAP := gopath:/go/pkg goroot-linux_amd64_netgo:/usr/local/go/pkg/linux_amd64_netgo
PKGCACHE_MAP := gopath:/go/pkg goroot-linux_amd64:/usr/local/go/pkg/linux_amd64 goroot-linux_amd64_netgo:/usr/local/go/pkg/linux_amd64_netgo
PKGCACHE_VOLROOT := dockerdev-go-pkg-cache
PKGCACHE_VOL := $(if $(PKGCACHE_DIR),$(CURDIR)/$(PKGCACHE_DIR)/,$(PKGCACHE_VOLROOT)-)
DOCKER_MOUNT := $(if $(DOCKER_INCREMENTAL_BINARY),$(DOCKER_MOUNT) $(shell echo $(PKGCACHE_MAP) | sed -E 's@([^ ]*)@-v "$(PKGCACHE_VOL)\1"@g'),$(DOCKER_MOUNT))
DOCKER_MOUNT_PKGCACHE := $(if $(DOCKER_INCREMENTAL_BINARY),$(shell echo $(PKGCACHE_MAP) | sed -E 's@([^ ]*)@-v "$(PKGCACHE_VOL)\1"@g'),)
DOCKER_MOUNT := $(DOCKER_MOUNT) $(DOCKER_MOUNT_PKGCACHE)

GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
Expand All @@ -74,6 +75,9 @@ export BUILD_APT_MIRROR

SWAGGER_DOCS_PORT ?= 9000

INTEGRATION_CLI_MASTER_IMAGE := $(if $(INTEGRATION_CLI_MASTER_IMAGE), $(INTEGRATION_CLI_MASTER_IMAGE), integration-cli-master)
INTEGRATION_CLI_WORKER_IMAGE := $(if $(INTEGRATION_CLI_WORKER_IMAGE), $(INTEGRATION_CLI_WORKER_IMAGE), integration-cli-worker)

# if this session isn't interactive, then we don't want to allocate a
# TTY, which would fail, but if it is interactive, we do want to attach
# so that the user can send e.g. ^C through.
Expand Down Expand Up @@ -172,3 +176,19 @@ swagger-docs: ## preview the API documentation
-e 'REDOC_OPTIONS=hide-hostname="true" lazy-rendering' \
-p $(SWAGGER_DOCS_PORT):80 \
bfirsh/redoc:1.6.2

build-integration-cli-on-swarm: build ## build images and binary for running integration-cli on Swarm in parallel
@echo "Building hack/integration-cli-on-swarm"
go build -o ./hack/integration-cli-on-swarm/integration-cli-on-swarm ./hack/integration-cli-on-swarm/host
@echo "Building $(INTEGRATION_CLI_MASTER_IMAGE)"
docker build -t $(INTEGRATION_CLI_MASTER_IMAGE) hack/integration-cli-on-swarm/agent
# For worker, we don't use `docker build` so as to enable DOCKER_INCREMENTAL_BINARY and so on
@echo "Building $(INTEGRATION_CLI_WORKER_IMAGE) from $(DOCKER_IMAGE)"
$(eval tmp := integration-cli-worker-tmp)
# We mount pkgcache, but not bundle (bundle needs to be baked into the image)
# For avoiding bakings DOCKER_GRAPHDRIVER and so on to image, we cannot use $(DOCKER_ENVS) here
docker run -t -d --name $(tmp) -e DOCKER_GITCOMMIT -e BUILDFLAGS -e DOCKER_INCREMENTAL_BINARY --privileged $(DOCKER_MOUNT_PKGCACHE) $(DOCKER_IMAGE) top
docker exec $(tmp) hack/make.sh build-integration-test-binary dynbinary
docker exec $(tmp) go build -o /worker github.com/docker/docker/hack/integration-cli-on-swarm/agent/worker
docker commit -c 'ENTRYPOINT ["/worker"]' $(tmp) $(INTEGRATION_CLI_WORKER_IMAGE)
docker rm -f $(tmp)
66 changes: 66 additions & 0 deletions hack/integration-cli-on-swarm/README.md
@@ -0,0 +1,66 @@
# Integration Testing on Swarm

IT on Swarm allows you to execute integration test in parallel across a Docker Swarm cluster

## Architecture

### Master service

- Works as a funker caller
- Calls a worker funker (`-worker-service`) with a chunk of `-check.f` filter strings (passed as a file via `-input` flag, typically `/mnt/input`)

### Worker service

- Works as a funker callee
- Executes an equivalent of `TESTFLAGS=-check.f TestFoo|TestBar|TestBaz ... make test-integration-cli` using the bind-mounted API socket (`docker.sock`)

### Client

- Controls master and workers via `docker stack`
- No need to have a local daemon

Typically, the master and workers are supposed to be running on a cloud environment,
while the client is supposed to be running on a laptop, e.g. Docker for Mac/Windows.

## Requirement

- Docker daemon 1.13 or later
- Private registry for distributed execution with multiple nodes

## Usage

### Step 1: Prepare images

$ make build-integration-cli-on-swarm

Following environment variables are known to work in this step:

- `BUILDFLAGS`
- `DOCKER_INCREMENTAL_BINARY`

### Step 2: Execute tests

$ ./hack/integration-cli-on-swarm/integration-cli-on-swarm -replicas 40 -push-worker-image YOUR_REGISTRY.EXAMPLE.COM/integration-cli-worker:latest

Following environment variables are known to work in this step:

- `DOCKER_GRAPHDRIVER`
- `DOCKER_EXPERIMENTAL`

#### Flags

Basic flags:

- `-replicas N`: the number of worker service replicas. i.e. degree of parallelism.
- `-chunks N`: the number of chunks. By default, `chunks` == `replicas`.
- `-push-worker-image REGISTRY/IMAGE:TAG`: push the worker image to the registry. Note that if you have only single node and hence you do not need a private registry, you do not need to specify `-push-worker-image`.

Experimental flags for mitigating makespan nonuniformity:

- `-shuffle`: Shuffle the test filter strings

Flags for debugging IT on Swarm itself:

- `-rand-seed N`: the random seed. This flag is useful for deterministic replaying. By default(0), the timestamp is used.
- `-filters-file FILE`: the file contains `-check.f` strings. By default, the file is automatically generated.
- `-dry-run`: skip the actual workload
6 changes: 6 additions & 0 deletions hack/integration-cli-on-swarm/agent/Dockerfile
@@ -0,0 +1,6 @@
# this Dockerfile is solely used for the master image.
# Please refer to the top-level Makefile for the worker image.
FROM golang:1.7
ADD . /go/src/github.com/docker/docker/hack/integration-cli-on-swarm/agent
RUN go build -o /master github.com/docker/docker/hack/integration-cli-on-swarm/agent/master
ENTRYPOINT ["/master"]
132 changes: 132 additions & 0 deletions hack/integration-cli-on-swarm/agent/master/call.go
@@ -0,0 +1,132 @@
package main

import (
"encoding/json"
"fmt"
"log"
"strings"
"sync"
"sync/atomic"
"time"

"github.com/bfirsh/funker-go"
"github.com/docker/docker/hack/integration-cli-on-swarm/agent/types"
)

const (
// funkerRetryTimeout is for the issue https://github.com/bfirsh/funker/issues/3
// When all the funker replicas are busy in their own job, we cannot connect to funker.
funkerRetryTimeout = 1 * time.Hour
funkerRetryDuration = 1 * time.Second
)

// ticker is needed for some CI (e.g., on Travis, job is aborted when no output emitted for 10 minutes)
func ticker(d time.Duration) chan struct{} {
t := time.NewTicker(d)
stop := make(chan struct{})
go func() {
for {
select {
case <-t.C:
log.Printf("tick (just for keeping CI job active) per %s", d.String())
case <-stop:
t.Stop()
}
}
}()
return stop
}

func executeTests(funkerName string, testChunks [][]string) error {
tickerStopper := ticker(9*time.Minute + 55*time.Second)
defer func() {
close(tickerStopper)
}()
begin := time.Now()
log.Printf("Executing %d chunks in parallel, against %q", len(testChunks), funkerName)
var wg sync.WaitGroup
var passed, failed uint32
for chunkID, tests := range testChunks {
log.Printf("Executing chunk %d (contains %d test filters)", chunkID, len(tests))
wg.Add(1)
go func(chunkID int, tests []string) {
defer wg.Done()
chunkBegin := time.Now()
result, err := executeTestChunkWithRetry(funkerName, types.Args{
ChunkID: chunkID,
Tests: tests,
})
if result.RawLog != "" {
for _, s := range strings.Split(result.RawLog, "\n") {
log.Printf("Log (chunk %d): %s", chunkID, s)
}
}
if err != nil {
log.Printf("Error while executing chunk %d: %v",
chunkID, err)
atomic.AddUint32(&failed, 1)
} else {
if result.Code == 0 {
atomic.AddUint32(&passed, 1)
} else {
atomic.AddUint32(&failed, 1)
}
log.Printf("Finished chunk %d [%d/%d] with %d test filters in %s, code=%d.",
chunkID, passed+failed, len(testChunks), len(tests),
time.Now().Sub(chunkBegin), result.Code)
}
}(chunkID, tests)
}
wg.Wait()
// TODO: print actual tests rather than chunks
log.Printf("Executed %d chunks in %s. PASS: %d, FAIL: %d.",
len(testChunks), time.Now().Sub(begin), passed, failed)
if failed > 0 {
return fmt.Errorf("%d chunks failed", failed)
}
return nil
}

func executeTestChunk(funkerName string, args types.Args) (types.Result, error) {
ret, err := funker.Call(funkerName, args)
if err != nil {
return types.Result{}, err
}
tmp, err := json.Marshal(ret)
if err != nil {
return types.Result{}, err
}
var result types.Result
err = json.Unmarshal(tmp, &result)
return result, err
}

func executeTestChunkWithRetry(funkerName string, args types.Args) (types.Result, error) {
begin := time.Now()
for i := 0; time.Now().Sub(begin) < funkerRetryTimeout; i++ {
result, err := executeTestChunk(funkerName, args)
if err == nil {
log.Printf("executeTestChunk(%q, %d) returned code %d in trial %d", funkerName, args.ChunkID, result.Code, i)
return result, nil
}
if errorSeemsInteresting(err) {
log.Printf("Error while calling executeTestChunk(%q, %d), will retry (trial %d): %v",
funkerName, args.ChunkID, i, err)
}
// TODO: non-constant sleep
time.Sleep(funkerRetryDuration)
}
return types.Result{}, fmt.Errorf("could not call executeTestChunk(%q, %d) in %v", funkerName, args.ChunkID, funkerRetryTimeout)
}

// errorSeemsInteresting returns true if err does not seem about https://github.com/bfirsh/funker/issues/3
func errorSeemsInteresting(err error) bool {
boringSubstrs := []string{"connection refused", "connection reset by peer", "no such host", "transport endpoint is not connected", "no route to host"}
errS := err.Error()
for _, boringS := range boringSubstrs {
if strings.Contains(errS, boringS) {
return false
}
}
return true
}
65 changes: 65 additions & 0 deletions hack/integration-cli-on-swarm/agent/master/master.go
@@ -0,0 +1,65 @@
package main

import (
"errors"
"flag"
"io/ioutil"
"log"
"strings"
)

func main() {
if err := xmain(); err != nil {
log.Fatalf("fatal error: %v", err)
}
}

func xmain() error {
workerService := flag.String("worker-service", "", "Name of worker service")
chunks := flag.Int("chunks", 0, "Number of chunks")
input := flag.String("input", "", "Path to input file")
randSeed := flag.Int64("rand-seed", int64(0), "Random seed")
shuffle := flag.Bool("shuffle", false, "Shuffle the input so as to mitigate makespan nonuniformity")
flag.Parse()
if *workerService == "" {
return errors.New("worker-service unset")
}
if *chunks == 0 {
return errors.New("chunks unset")
}
if *input == "" {
return errors.New("input unset")
}

tests, err := loadTests(*input)
if err != nil {
return err
}
testChunks := chunkTests(tests, *chunks, *shuffle, *randSeed)
log.Printf("Loaded %d tests (%d chunks)", len(tests), len(testChunks))
return executeTests(*workerService, testChunks)
}

func chunkTests(tests []string, numChunks int, shuffle bool, randSeed int64) [][]string {
// shuffling (experimental) mitigates makespan nonuniformity
// Not sure this can cause some locality problem..
if shuffle {
shuffleStrings(tests, randSeed)
}
return chunkStrings(tests, numChunks)
}

func loadTests(filename string) ([]string, error) {
b, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
var tests []string
for _, line := range strings.Split(string(b), "\n") {
s := strings.TrimSpace(line)
if s != "" {
tests = append(tests, s)
}
}
return tests, nil
}
28 changes: 28 additions & 0 deletions hack/integration-cli-on-swarm/agent/master/set.go
@@ -0,0 +1,28 @@
package main

import (
"math/rand"
)

// chunkStrings chunks the string slice
func chunkStrings(x []string, numChunks int) [][]string {
var result [][]string
chunkSize := (len(x) + numChunks - 1) / numChunks
for i := 0; i < len(x); i += chunkSize {
ub := i + chunkSize
if ub > len(x) {
ub = len(x)
}
result = append(result, x[i:ub])
}
return result
}

// shuffleStrings shuffles strings
func shuffleStrings(x []string, seed int64) {
r := rand.New(rand.NewSource(seed))
for i := range x {
j := r.Intn(i + 1)
x[i], x[j] = x[j], x[i]
}
}

0 comments on commit 2879701

Please sign in to comment.