Skip to content

Commit

Permalink
cmd/makemac, cmd/coordinator: export warnings/error from makemac to c…
Browse files Browse the repository at this point in the history
…oordinator

This adds information on warnings & errors to makemac's JSON status
handler that is then parsed by the coordinator's health checking code,
which already polls this JSON endpoint.

Updates golang/go#32449
Updates golang/go#15760

Change-Id: I69bea7b07c184d1f62a358bc317376aa97018230
Reviewed-on: https://go-review.googlesource.com/c/build/+/181217
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
  • Loading branch information
bradfitz committed Jun 10, 2019
1 parent 4c1c063 commit a3d123a
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 27 deletions.
57 changes: 37 additions & 20 deletions cmd/coordinator/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"html"
"html/template"
Expand Down Expand Up @@ -236,31 +237,21 @@ func newMacHealthChecker() *healthChecker {
// And check that the makemac daemon is listening.
var makeMac struct {
sync.Mutex
lastErr error
lastCheck time.Time // currently unused
lastCheck time.Time // currently unused
lastErrors []string
lastWarns []string
}
setMakeMacErr := func(err error) {
setMakeMacStatus := func(errs, warns []string) {
makeMac.Lock()
defer makeMac.Unlock()
makeMac.lastErr = err
makeMac.lastCheck = time.Now()
makeMac.lastErrors = errs
makeMac.lastWarns = warns
}
go func() {
c := &http.Client{Timeout: 15 * time.Second}
for {
res, err := c.Get("http://macstadiumd.golang.org:8713")
if err != nil {
setMakeMacErr(err)
} else {
res.Body.Close()
if res.StatusCode != 200 {
setMakeMacErr(fmt.Errorf("HTTP status %v", res.Status))
} else if res.Header.Get("Content-Type") != "application/json" {
setMakeMacErr(fmt.Errorf("unexpected content-type %q", res.Header.Get("Content-Type")))
} else {
setMakeMacErr(nil)
}
}
errs, warns := fetchMakeMacStatus()
setMakeMacStatus(errs, warns)
time.Sleep(15 * time.Second)
}
}()
Expand All @@ -274,13 +265,39 @@ func newMacHealthChecker() *healthChecker {
// Check makemac daemon.
makeMac.Lock()
defer makeMac.Unlock()
if makeMac.lastErr != nil {
w.errorf("makemac daemon: %v", makeMac.lastErr)
for _, v := range makeMac.lastWarns {
w.warnf("makemac daemon: %v", v)
}
for _, v := range makeMac.lastErrors {
w.errorf("makemac daemon: %v", v)
}
},
}
}

func fetchMakeMacStatus() (errs, warns []string) {
c := &http.Client{Timeout: 15 * time.Second}
res, err := c.Get("http://macstadiumd.golang.org:8713")
if err != nil {
return []string{fmt.Sprintf("failed to fetch status: %v", err)}, nil
}
defer res.Body.Close()
if res.StatusCode != 200 {
return []string{fmt.Sprintf("HTTP status %v", res.Status)}, nil
}
if res.Header.Get("Content-Type") != "application/json" {
return []string{fmt.Sprintf("unexpected content-type %q; want JSON", res.Header.Get("Content-Type"))}, nil
}
var resj struct {
Errors []string
Warnings []string
}
if err := json.NewDecoder(res.Body).Decode(&resj); err != nil {
return []string{fmt.Sprintf("reading status response body: %v", err)}, nil
}
return resj.Errors, resj.Warnings
}

func newJoyentSolarisChecker() *healthChecker {
return &healthChecker{
ID: "joyent-solaris",
Expand Down
13 changes: 12 additions & 1 deletion cmd/makemac/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

# golang.org/x/build/cmd/makemac

The makemac command starts OS X VMs for the builders.
The makemac command manages creating & destroying macOS VMs for the
builders. See the README in x/build/env/darwin/macstadium for some
more background.

## Deploying `makemac`

Expand Down Expand Up @@ -30,3 +32,12 @@ On that host,
$ sudo systemctl restart makemac
$ sudo journalctl -f -u makemac # watch it
```

## Checking that it's running:

```
$ curl -v http://macstadiumd.golang.org:8713
```

(Note that URL won't work in a browser due to HSTS requirements on
*.golang.org)
56 changes: 50 additions & 6 deletions cmd/makemac/makemac.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Usage:
package main

import (
"bufio"
"context"
"encoding/json"
"errors"
Expand Down Expand Up @@ -313,6 +314,9 @@ func govc(ctx context.Context, args ...string) error {
fmt.Fprintf(os.Stderr, "$ govc %v\n", strings.Join(args, " "))
out, err := exec.CommandContext(ctx, "govc", args...).CombinedOutput()
if err != nil {
if isFileSystemReadOnly() {
out = append(out, "; filesystem is read-only"...)
}
return fmt.Errorf("govc %s ...: %v, %s", args[0], err, out)
}
return nil
Expand Down Expand Up @@ -372,7 +376,7 @@ func getState(ctx context.Context) (*State, error) {

var hosts elementList
if err := govcJSONDecode(ctx, &hosts, "ls", "-json", "/MacStadium-ATL/host/MacMini_Cluster"); err != nil {
return nil, fmt.Errorf("Reading /MacStadium-ATL/host/MacMini_Cluster: %v", err)
return nil, fmt.Errorf("getState: reading /MacStadium-ATL/host/MacMini_Cluster: %v", err)
}
for _, h := range hosts.Elements {
if h.Object.Self.Type == "HostSystem" {
Expand All @@ -384,7 +388,7 @@ func getState(ctx context.Context) (*State, error) {

var vms elementList
if err := govcJSONDecode(ctx, &vms, "ls", "-json", "/MacStadium-ATL/vm"); err != nil {
return nil, fmt.Errorf("Reading /MacStadium-ATL/vm: %v", err)
return nil, fmt.Errorf("getState: reading /MacStadium-ATL/vm: %v", err)
}
for _, h := range vms.Elements {
if h.Object.Self.Type != "VirtualMachine" {
Expand Down Expand Up @@ -528,6 +532,8 @@ var status struct {
lastCheck time.Time
lastLog string
lastState *State
warnings []string
errors []string
}

func init() {
Expand Down Expand Up @@ -581,14 +587,20 @@ func autoAdjust() {

st, err := getState(ctx)
if err != nil {
log.Printf("getting VMWare state: %v", err)
status.Lock()
status.errors = []string{err.Error()}
status.Unlock()
log.Print(err)
return
}
var warnings, errors []string
defer func() {
// Set status.lastState once we're now longer using it.
if st != nil {
status.Lock()
status.lastState = st
status.warnings = warnings
status.errors = errors
status.Unlock()
}
}()
Expand All @@ -597,12 +609,14 @@ func autoAdjust() {
req = req.WithContext(ctx)
res, err := http.DefaultClient.Do(req)
if err != nil {
errors = append(errors, fmt.Sprintf("getting /status/reverse.json from coordinator: %v", err))
log.Printf("getting reverse status: %v", err)
return
}
defer res.Body.Close()
var rstat types.ReverseBuilderStatus
if err := json.NewDecoder(res.Body).Decode(&rstat); err != nil {
errors = append(errors, fmt.Sprintf("decoding /status/reverse.json from coordinator: %v", err))
log.Printf("decoding reverse.json: %v", err)
return
}
Expand All @@ -618,6 +632,7 @@ func autoAdjust() {
}

// Destroy running VMs that appear to be dead and not connected to the coordinator.
// TODO: do these all concurrently.
dirty := false
for name, vi := range st.VMInfo {
if vi.BootTime.After(time.Now().Add(-3 * time.Minute)) {
Expand All @@ -632,18 +647,22 @@ func autoAdjust() {
// Look it up by its slot name instead.
rh = revHost[vi.SlotName]
}
if rh == nil { // || (!rh.Busy && rh.ConnectedSec > 50 && rh.HostType == "host-darwin-10_12") {
if rh == nil {
log.Printf("Destroying VM %q unknown to coordinator...", name)
err := govc(ctx, "vm.destroy", name)
log.Printf("vm.destroy(%q) = %v", name, err)
dirty = true
if err != nil {
warnings = append(warnings, fmt.Sprintf("vm.destroy(%q) = %v", name, err))
}
}
}
for {
if dirty {
st, err = getState(ctx)
if err != nil {
log.Printf("getState: %v", err)
errors = append(errors, err.Error())
log.Print(err)
return
}
}
Expand All @@ -661,7 +680,9 @@ func autoAdjust() {
dedupLogf("Have capacity for %d more Mac VMs; creating requested 10.%d ...", canCreate, ver)
slotName, err := st.CreateMac(ctx, ver)
if err != nil {
log.Printf("Error creating 10.%d: %v", ver, err)
errStr := fmt.Sprintf("Error creating 10.%d: %v", ver, err)
errors = append(errors, errStr)
log.Print(errStr)
return
}
log.Printf("Created 10.%d VM on %q", ver, slotName)
Expand Down Expand Up @@ -715,10 +736,14 @@ func handleStatus(w http.ResponseWriter, r *http.Request) {
LastCheck string
LastLog string
LastState *State
Warnings []string
Errors []string
}{
LastCheck: status.lastCheck.UTC().Format(time.RFC3339),
LastLog: status.lastLog,
LastState: status.lastState,
Warnings: status.warnings,
Errors: status.errors,
}
j, _ := json.MarshalIndent(res, "", "\t")
w.Write(j)
Expand Down Expand Up @@ -823,3 +848,22 @@ func (h onlyAtRoot) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}
h.h.ServeHTTP(w, r)
}

func isFileSystemReadOnly() bool {
f, err := os.Open("/proc/mounts")
if err != nil {
return false
}
defer f.Close()
// Look for line:
// /dev/sda1 / ext4 rw,relatime,errors=remount-ro,data=ordered 0 0
bs := bufio.NewScanner(f)
for bs.Scan() {
f := strings.Fields(bs.Text())
mountPoint, state := f[1], f[3]
if mountPoint == "/" {
return strings.HasPrefix(state, "ro,")
}
}
return false
}

0 comments on commit a3d123a

Please sign in to comment.