Skip to content

Commit

Permalink
kola: Support the Debian autopkgtest reboot API
Browse files Browse the repository at this point in the history
As I was working on extending some of ostree's destructive
test suite to do reboots:
ostreedev/ostree#2127

I realized that the Debian autopkgtest API for rebooting is
better, because it allows *saving state external to the host*.

Rather than having the test count boots as ostree is doing
today, the "mark" allows us to more reliably dispatch.
And further, becase we don't rely on writing anything to disk
on the target, we can add clean support for "forced reboots"
that might kill the OS before we write to persistent storage there.

The "between reboot" state lives in the test runner's memory instead.

We retain support for the previous (two!) reboot APIs here for now.

I tested this with basically the example script
from the Debian autopkgtest specification:

```
set -xeuo pipefail
case "${AUTOPKGTEST_REBOOT_MARK:-}" in
  "") echo "test beginning"; /tmp/autopkgtest-reboot mark1 ;;
  mark1) echo "test in mark1"; /tmp/autopkgtest-reboot mark2 ;;
  mark2) echo "test in mark2" ;;
  *) echo "unexpected mark: ${AUTOPKGTEST_REBOOT_MARK}"; exit 1;;
esac
echo "ok autopkgtest rebooting"
```

I think it will make sense actually to implement more of the autopkgtest
API - Debian has a nontrivial number of tests using this, and I
think there's even work upstream in e.g. systemd to bridge its
tests to autopkgtest.  Which would mean we gain "run systemd's tests in kola"
for free.
  • Loading branch information
cgwalters committed Jun 11, 2020
1 parent 34cf11b commit ceed1c1
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 25 deletions.
72 changes: 58 additions & 14 deletions mantle/cmd/kolet/kolet.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ package main

import (
"fmt"
"io/ioutil"
"os"
"strings"
"syscall"
"time"

systemddbus "github.com/coreos/go-systemd/v22/dbus"
Expand All @@ -43,6 +43,23 @@ const (
// kolaRebootStamp should be created by tests that want to reboot
const kolaRebootStamp = "/run/kola-reboot"

// These are defined by https://salsa.debian.org/ci-team/autopkgtest/raw/master/doc/README.package-tests.rst
const (
autopkgTestRebootPath = "/tmp/autopkgtest-reboot"
autopkgtestRebootScript = `#!/bin/bash
set -euo pipefail
~core/kolet reboot-request $1
systemctl kill --no-block ${KOLA_UNIT} || true
sleep infinity
`
autopkgTestRebootPreparePath = "/tmp/autopkgtest-reboot-prepare"

autopkgtestRebootPrepareScript = `#!/bin/bash
set -euo pipefail
exec ~core/kolet reboot-request $1
`
)

var (
plog = capnslog.NewPackageLogger("github.com/coreos/mantle", "kolet")

Expand All @@ -64,6 +81,13 @@ var (
RunE: runExtUnit,
SilenceUsage: true,
}

cmdReboot = &cobra.Command{
Use: "reboot-request MARK",
Short: "Request a reboot",
RunE: runReboot,
SilenceUsage: true,
}
)

func run(cmd *cobra.Command, args []string) {
Expand Down Expand Up @@ -103,17 +127,9 @@ func registerTestMap(m map[string]*register.Test) {
}
}

// requestRebootAndWait sends SIGTERM to the current process,
// which then propagates back to the ssh
// status, so that the kola runner (on the remote host)
// can use that as a trigger to reboot.
func requestRebootAndWait() error {
selfproc := os.Process{
Pid: os.Getpid(),
}
selfproc.Signal(syscall.SIGTERM)
time.Sleep(time.Hour)
panic("failed to send SIGTERM to self")
// requestReboot writes a message which is parsed by the harness
func requestReboot() {
fmt.Println("reboot")
}

// dispatchRunExtUnit returns true if unit completed successfully, false if
Expand Down Expand Up @@ -151,7 +167,8 @@ func dispatchRunExtUnit(unitname string, sdconn *systemddbus.Conn) (bool, error)
_, err := os.Stat(kolaRebootStamp)
if err == nil {
systemdjournal.Print(systemdjournal.PriInfo, "Unit %s requested reboot via %s\n", unitname, kolaRebootStamp)
return false, requestRebootAndWait()
requestReboot()
return true, nil
}
return true, nil
} else {
Expand All @@ -163,7 +180,8 @@ func dispatchRunExtUnit(unitname string, sdconn *systemddbus.Conn) (bool, error)
// SIGTERM; we explicitly allow that, expecting we're rebooting.
if mainstatus == int32(15) {
systemdjournal.Print(systemdjournal.PriInfo, "Unit %s terminated via SIGTERM, assuming reboot request\n", unitname)
return false, requestRebootAndWait()
requestReboot()
return true, nil
} else {
return true, fmt.Errorf("Unit %s killed by signal %d", unitname, mainstatus)
}
Expand All @@ -185,6 +203,14 @@ func dispatchRunExtUnit(unitname string, sdconn *systemddbus.Conn) (bool, error)
}

func runExtUnit(cmd *cobra.Command, args []string) error {
// Write the autopkgtest wrappers
if err := ioutil.WriteFile(autopkgTestRebootPath, []byte(autopkgtestRebootScript), 0755); err != nil {
return err
}
if err := ioutil.WriteFile(autopkgTestRebootPreparePath, []byte(autopkgtestRebootPrepareScript), 0755); err != nil {
return err
}

unitname := args[0]
// Restrict this to services, don't need to support anything else right now
if !strings.HasSuffix(unitname, ".service") {
Expand All @@ -194,6 +220,13 @@ func runExtUnit(cmd *cobra.Command, args []string) error {
if err != nil {
return errors.Wrapf(err, "systemd connection")
}

// Start the unit; it's not started by default because we need to
// do some preparatory work above (and some is done in the harness)
if _, err := sdconn.StartUnit(unitname, "fail", nil); err != nil {
return errors.Wrapf(err, "starting unit")
}

if err := sdconn.Subscribe(); err != nil {
return err
}
Expand All @@ -220,11 +253,22 @@ func runExtUnit(cmd *cobra.Command, args []string) error {
}
}

// This is a backend intending to support at least the same
// API as defined by Debian autopkgtests:
// https://salsa.debian.org/ci-team/autopkgtest/raw/master/doc/README.package-tests.rst
func runReboot(cmd *cobra.Command, args []string) error {
mark := args[0]
systemdjournal.Print(systemdjournal.PriInfo, "Requesting reboot with mark: %s", mark)
return ioutil.WriteFile(kolaRebootStamp, []byte(fmt.Sprintf("%s\n", mark)), 0644)
}

func main() {
registerTestMap(register.Tests)
registerTestMap(register.UpgradeTests)
root.AddCommand(cmdRun)
root.AddCommand(cmdRunExtUnit)
cmdReboot.Args = cobra.ExactArgs(1)
root.AddCommand(cmdReboot)

cli.Execute(root)
}
45 changes: 34 additions & 11 deletions mantle/kola/harness.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ import (
"strings"
"time"

"golang.org/x/crypto/ssh"

"github.com/coreos/go-semver/semver"
"github.com/coreos/pkg/capnslog"
"github.com/kballard/go-shellquote"
"github.com/pkg/errors"
"golang.org/x/crypto/ssh"

ignv3 "github.com/coreos/ignition/v2/config/v3_0"
ignv3types "github.com/coreos/ignition/v2/config/v3_0/types"
Expand Down Expand Up @@ -551,11 +550,11 @@ func registerExternalTest(testname, executable, dependencydir, ignition string,
unit := fmt.Sprintf(`[Unit]
[Service]
RemainAfterExit=yes
EnvironmentFile=-/run/kola-runext-env
Environment=KOLA_UNIT=%s
Environment=%s=%s
ExecStart=%s
[Install]
RequiredBy=multi-user.target
`, kolaExtBinDataEnv, kolaExtBinDataDir, remotepath)
`, unitname, kolaExtBinDataEnv, kolaExtBinDataDir, remotepath)
runextconfig := ignv3types.Config{
Ignition: ignv3types.Ignition{
Version: "3.0.0",
Expand Down Expand Up @@ -588,24 +587,48 @@ RequiredBy=multi-user.target
mach := c.Machines()[0]
plog.Debugf("Running kolet")

var previousRebootState string
var stdout []byte
var stderr []byte
var err error
for {
plog.Debug("Starting kolet run-test-unit")
_, stderr, err = mach.SSH(fmt.Sprintf("sudo ./kolet run-test-unit %s", shellquote.Join(unitname)))
if previousRebootState != "" {
plog.Debugf("Setting AUTOPKGTEST_REBOOT_MARK=%s", previousRebootState)
c.MustSSHf(mach, "echo AUTOPKGTEST_REBOOT_MARK=%s | sudo tee /run/kola-runext-env", previousRebootState)
previousRebootState = ""
}
stdout, stderr, err = mach.SSH(fmt.Sprintf("sudo ./kolet run-test-unit %s", shellquote.Join(unitname)))
expectingReboot := false
if exit, ok := err.(*ssh.ExitError); ok {
plog.Debug("Caught ssh.ExitError")
// In the future I'd like to better support having the host reboot itself and
// we just detect it.
// If we got SIGTERM, then we assume the unit (or our login) was killed by a reboot.
// Verify that on stdout
if exit.Signal() == "TERM" {
plog.Debug("Caught SIGTERM from kolet run-test-unit, rebooting machine")
plog.Debug("Caught SIGTERM from kolet run-test-unit")
expectingReboot = true
err = nil
}
}
if err == nil {
stdout := strings.TrimSpace(string(stdout))
if stdout == "reboot" {
var kolaRebootStdout []byte
kolaRebootStdout, _, err = mach.SSH("if test -f /run/kola-reboot; then cat /run/kola-reboot; fi")
if err != nil {
break
}
previousRebootState = strings.TrimSpace(string(kolaRebootStdout))
plog.Debugf("Reboot request with mark='%s'", previousRebootState)
suberr := mach.Reboot()
if suberr == nil {
err = nil
continue
}
plog.Debug("Propagating ssh.ExitError")
err = suberr
} else if expectingReboot {
err = errors.New("Got SIGTERM, but didn't see reboot indication")
} else if stdout != "" {
err = fmt.Errorf("Unexpected stdout %s", stdout)
}
}
// Other errors, just bomb out for now
Expand Down

0 comments on commit ceed1c1

Please sign in to comment.