Skip to content
This repository has been archived by the owner on Sep 4, 2021. It is now read-only.

Improve CI debugging #582

Merged
merged 3 commits into from
Dec 10, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions test/apps/signal/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@ import (
const service = "signal-service"

func main() {
log.SetFlags(log.Lmicroseconds)
ch := make(chan os.Signal)
log.Println("setting signal handler")
signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM)
log.Println("registering service")
if err := discoverd.Register(service, ":12345"); err != nil {
log.Fatal(err)
}
log.Println("waiting for signal")
sig := <-ch
fmt.Printf("got signal: %s", sig)
}
2 changes: 2 additions & 0 deletions test/arg/arg.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type Args struct {
Build bool
Debug bool
Stream bool
DumpLogs bool
Kill bool
KeepRootFS bool
DBPath string
Expand Down Expand Up @@ -49,6 +50,7 @@ func Parse() *Args {
flag.BoolVar(&args.Build, "build", true, "build Flynn")
flag.BoolVar(&args.Debug, "debug", false, "enable debug output")
flag.BoolVar(&args.Stream, "stream", false, "stream debug output (implies --debug)")
flag.BoolVar(&args.DumpLogs, "dump-logs", false, "dump logs on error")
flag.BoolVar(&args.Kill, "kill", true, "kill the cluster after running the tests")
flag.BoolVar(&args.KeepRootFS, "keep-rootfs", false, "don't remove the rootfs which was built to run the tests")
flag.BoolVar(&args.Gist, "gist", false, "upload debug info to a gist")
Expand Down
69 changes: 49 additions & 20 deletions test/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,19 +128,26 @@ func (c *Cluster) BuildFlynn(rootFS, commit string, merge bool) (string, error)
return build.Drive("hda").FS, nil
}

func (c *Cluster) Boot(rootFS string, count int) error {
func (c *Cluster) Boot(rootFS string, count int, dumpLogs io.Writer) error {
if err := c.setup(); err != nil {
return err
}

c.log("Booting", count, "VMs")
if err := c.startVMs(rootFS, count); err != nil {
_, err := c.startVMs(rootFS, count)
if err != nil {
if dumpLogs != nil && len(c.Instances) > 0 {
c.DumpLogs(dumpLogs)
}
c.Shutdown()
return err
}

c.log("Bootstrapping layer 1...")
if err := c.bootstrapLayer1(); err != nil {
if dumpLogs != nil {
c.DumpLogs(dumpLogs)
}
c.Shutdown()
return err
}
Expand All @@ -155,50 +162,48 @@ func (c *Cluster) BridgeIP() string {
return c.bridge.IP()
}

func (c *Cluster) AddHost() error {
func (c *Cluster) AddHost() (*Instance, error) {
if c.rootFS == "" {
return errors.New("cluster not yet booted")
return nil, errors.New("cluster not yet booted")
}
c.log("Booting 1 VM")
return c.startVMs(c.rootFS, 1)
instances, err := c.startVMs(c.rootFS, 1)
return instances[0], err
}

// RemoveHost stops flynn-host on the instance but leaves it running so the logs
// are still available if we need to dump them later.
func (c *Cluster) RemoveHost(id string) error {
inst, err := c.Instances.Get(id)
if err != nil {
return err
}
c.log("shutting down instance", id)
c.log("removing host", id)

var cmd string
switch c.bc.Backend {
case "libvirt-lxc":
cmd = "sudo start-stop-daemon --stop --pidfile /var/run/flynn-host.pid --retry 15"
case "docker":
cmd = "docker stop -t 15 flynn-host"
}
if err := inst.Run(cmd, nil); err != nil {
fmt.Errorf("failed to stop flynn-host on %s", id)
}

return inst.Shutdown()
return inst.Run(cmd, nil)
}

func (c *Cluster) Size() int {
return len(c.Instances)
}

func (c *Cluster) startVMs(rootFS string, count int) error {
func (c *Cluster) startVMs(rootFS string, count int) ([]*Instance, error) {
tmpl, ok := flynnHostScripts[c.bc.Backend]
if !ok {
return fmt.Errorf("unknown host backend: %s", c.bc.Backend)
return nil, fmt.Errorf("unknown host backend: %s", c.bc.Backend)
}

uid, gid, err := lookupUser(c.bc.User)
if err != nil {
return err
return nil, err
}

instances := make([]*Instance, count)
for i := 0; i < count; i++ {
inst, err := c.vm.NewInstance(&VMConfig{
Kernel: c.bc.Kernel,
Expand All @@ -211,11 +216,12 @@ func (c *Cluster) startVMs(rootFS string, count int) error {
},
})
if err != nil {
return fmt.Errorf("error creating instance %d: %s", i, err)
return nil, fmt.Errorf("error creating instance %d: %s", i, err)
}
if err = inst.Start(); err != nil {
return fmt.Errorf("error starting instance %d: %s", i, err)
return nil, fmt.Errorf("error starting instance %d: %s", i, err)
}
instances[i] = inst
c.Instances = append(c.Instances, inst)

var script bytes.Buffer
Expand All @@ -230,10 +236,10 @@ func (c *Cluster) startVMs(rootFS string, count int) error {

c.logf("Starting flynn-host on %s [id: %s]\n", inst.IP, inst.ID)
if err := inst.Run("bash", &Streams{Stdin: &script, Stdout: c.out, Stderr: os.Stderr}); err != nil {
return err
return nil, err
}
}
return nil
return instances, nil
}

func (c *Cluster) setup() error {
Expand Down Expand Up @@ -457,3 +463,26 @@ func lookupUser(name string) (int, int, error) {
gid, _ := strconv.Atoi(u.Gid)
return uid, gid, nil
}

func (c *Cluster) DumpLogs(w io.Writer) {
streams := &Streams{Stdout: w, Stderr: w}
run := func(inst *Instance, cmd string) {
fmt.Fprint(w, "\n\n***** ***** ***** ***** ***** ***** ***** ***** ***** *****\n\n")
fmt.Fprintln(w, "HostID:", inst.ID, "-", cmd)
fmt.Fprintln(w)
inst.Run(cmd, streams)
fmt.Fprintln(w)
}
fmt.Fprint(w, "\n\n***** ***** ***** DUMPING ALL LOGS ***** ***** *****\n\n")
for _, inst := range c.Instances {
run(inst, "ps faux")
run(inst, "cat /tmp/flynn-host.log")
}
var out bytes.Buffer
c.Run("flynn-host ps -a -q", &Streams{Stdout: &out})
ids := strings.Split(strings.TrimSpace(out.String()), "\n")
for _, id := range ids {
run(c.Instances[0], fmt.Sprintf("flynn-host inspect %s", id))
run(c.Instances[0], fmt.Sprintf("flynn-host log %s", id))
}
}
33 changes: 4 additions & 29 deletions test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ func main() {
// defer exiting here so it runs after all other defers
defer func() {
if err != nil || res != nil && !res.Passed() {
if args.Debug {
if args.DumpLogs {
if args.Gist {
exec.Command("flynn-host", "upload-debug-info").Run()
} else {
dumpLogs()
} else if testCluster != nil {
testCluster.DumpLogs(os.Stdout)
}
}
os.Exit(1)
Expand All @@ -88,7 +88,7 @@ func main() {
} else {
defer os.RemoveAll(rootFS)
}
if err = testCluster.Boot(rootFS, 3); err != nil {
if err = testCluster.Boot(rootFS, 3, nil); err != nil {
log.Println("could not boot cluster: ", err)
return
}
Expand Down Expand Up @@ -396,28 +396,3 @@ func matches(value, regex interface{}) (result bool, error string) {
}
return false, "Obtained value is not a string and has no .String()"
}

func dumpLogs() {
run := func(cmd *exec.Cmd) string {
fmt.Println(cmd.Path, strings.Join(cmd.Args[1:], " "))
var out bytes.Buffer
cmd.Stdout = io.MultiWriter(os.Stdout, &out)
cmd.Stderr = io.MultiWriter(os.Stderr, &out)
cmd.Run()
return out.String()
}

fmt.Println("***** running processes *****")
run(exec.Command("ps", "faux"))

fmt.Println("***** flynn-host log *****")
run(exec.Command("cat", "/tmp/flynn-host.log"))

ids := strings.Split(strings.TrimSpace(run(exec.Command("flynn-host", "ps", "-a", "-q"))), "\n")
for _, id := range ids {
fmt.Print("\n\n***** ***** ***** ***** ***** ***** ***** ***** ***** *****\n\n")
run(exec.Command("flynn-host", "inspect", id))
fmt.Println()
run(exec.Command("flynn-host", "log", id))
}
}
16 changes: 11 additions & 5 deletions test/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ cmd="bin/flynn-test \
--cluster-api https://{{ .Cluster.BridgeIP }}:{{ .ListenPort }}/cluster/{{ .AuthKey }}/{{ .Cluster.ID }} \
--cli $(pwd)/../cli/flynn-cli \
--router-ip {{ .Cluster.RouterIP }} \
--debug"
--debug \
--dump-logs"

timeout --kill-after=10 20m $cmd
`[1:]))
Expand Down Expand Up @@ -262,7 +263,7 @@ func (r *Runner) build(b *Build) (err error) {
return fmt.Errorf("could not build flynn: %s", err)
}

if err := c.Boot(rootFS, 3); err != nil {
if err := c.Boot(rootFS, 3, out); err != nil {
return fmt.Errorf("could not boot cluster: %s", err)
}

Expand Down Expand Up @@ -607,13 +608,18 @@ func (r *Runner) httpClusterHandler(w http.ResponseWriter, req *http.Request) {

switch req.Method {
case "GET":
json.NewEncoder(w).Encode(c)
if err := json.NewEncoder(w).Encode(c); err != nil {
http.Error(w, fmt.Sprintf("error encoding cluster: %s", err), 500)
}
case "POST":
if err := c.AddHost(); err != nil {
instance, err := c.AddHost()
if err != nil {
http.Error(w, err.Error(), 500)
return
}
w.Write([]byte("ok"))
if err := json.NewEncoder(w).Encode(instance); err != nil {
http.Error(w, fmt.Sprintf("error encoding instance: %s", err), 500)
}
case "DELETE":
hostID := req.FormValue("host")
if err := c.RemoveHost(hostID); err != nil {
Expand Down
10 changes: 9 additions & 1 deletion test/test_scheduler.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"encoding/json"
"fmt"
"net"
"net/http"
Expand All @@ -14,6 +15,7 @@ import (
"github.com/flynn/flynn/host/types"
"github.com/flynn/flynn/pkg/attempt"
"github.com/flynn/flynn/pkg/cluster"
tc "github.com/flynn/flynn/test/cluster"
)

type SchedulerSuite struct {
Expand Down Expand Up @@ -44,14 +46,20 @@ func (s *SchedulerSuite) addHosts(t *c.C, count int) []string {
if err != nil {
t.Fatal("error in POST request to cluster api:", err)
}
res.Body.Close()
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
t.Fatal("expected 200 status, got", res.Status)
}
instance := &tc.Instance{}
err = json.NewDecoder(res.Body).Decode(instance)
if err != nil {
t.Fatal("could not decode new instance:", err)
}

select {
case event := <-ch:
debug(t, "host added ", event.HostID)
testCluster.Instances = append(testCluster.Instances, instance)
hosts = append(hosts, event.HostID)
case <-time.After(20 * time.Second):
t.Fatal("timed out waiting for new host")
Expand Down