Skip to content
This repository has been archived by the owner on Feb 9, 2024. It is now read-only.

(6.2) gravity check for upgrades. #871

Merged
merged 1 commit into from Nov 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
59 changes: 56 additions & 3 deletions lib/app/service/installer.go
Expand Up @@ -195,6 +195,8 @@ func (r *applications) GetAppInstaller(req appservice.InstallerRequest) (install
uploadScriptFilename, string(uploadScript), defaults.SharedExecutableMask),
archive.ItemFromStringMode(
upgradeScriptFilename, upgradeScript, defaults.SharedExecutableMask),
archive.ItemFromStringMode(
checkScriptFilename, checkScript, defaults.SharedExecutableMask),
archive.ItemFromStringMode(
readmeFilename, readme, defaults.SharedReadMask))...)
writer.CloseWithError(err)
Expand Down Expand Up @@ -338,6 +340,13 @@ const (
#
# Installation script for Gravity-powered multi-host Linux applications.
#
# Copyright 2016 Gravitational, Inc.
#
# This file is licensed under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0

REQMSG="This installer requires a 64-bit Linux desktop"

Expand Down Expand Up @@ -383,14 +392,37 @@ main "$@"
#
# Script for upgrading the currently running application to a new version.
#
# Copyright 2016 Gravitational, Inc.
#
# This file is licensed under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0

if [[ $(id -u) -ne 0 ]]; then
echo "please run this script as root" && exit 1
fi

scriptdir=$(dirname $(realpath $0))
app=$($scriptdir/gravity app-package --state-dir=$scriptdir)
$scriptdir/upload && $scriptdir/gravity --insecure update trigger $app
app=$("$scriptdir/gravity" app-package --state-dir="$scriptdir")
"$scriptdir/upload" && "$scriptdir/gravity" --insecure update trigger $app
`

checkScript = `#!/bin/bash
#
# Script for executing preflight checks.
#
# Copyright 2019 Gravitational, Inc.
#
# This file is licensed under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0

scriptdir=$(dirname $(realpath $0))
"$scriptdir/gravity" check --image-path="$scriptdir" "$scriptdir/app.yaml" "$@"
`

readme = `Requirements
Expand All @@ -405,6 +437,27 @@ You also need a direct network connection to the servers
The target servers need to be able to connect to the computer
the installer is running on during the installation.

Executing preflight checks
==========================

Before launching install or upgrade operation, you can execute preflight
checks to make sure the infrastructure satisfies all requirements.

For example, to see if the node satisfies requirements before initial
installation, run:

./run_preflight_checks

To check the node against a specific node profile (defined in app.yaml found
in the same directory), pass the profile name on the command line:

./run_preflight_checks --profile=worker

If the cluster is already installed, the same script can be used to check
requirements before launching the upgrade operation:

./run_preflight_checks

Starting the installer
======================

Expand Down Expand Up @@ -442,6 +495,7 @@ status command.
installScriptFilename = "install"
uploadScriptFilename = "upload"
upgradeScriptFilename = "upgrade"
checkScriptFilename = "run_preflight_checks"
readmeFilename = "README"
)

Expand All @@ -456,6 +510,5 @@ var uploadScriptTemplate = template.Must(template.New("uploadScript").Parse(`#!/
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
./gravity --insecure update upload --state-dir=.
`))
108 changes: 75 additions & 33 deletions lib/checks/checks.go
Expand Up @@ -252,6 +252,18 @@ func DockerConfigFromSchemaValue(dockerSchema schema.Docker) (config storage.Doc
}
}

// Checker defines a preflight checker interface.
type Checker interface {
// Run runs a full set of checks on the nodes configured in the checker.
Run(ctx context.Context) error
// CheckNode executes single-node checks (such as CPU/RAM requirements,
// disk space, etc) for the provided server.
CheckNode(ctx context.Context, server Server) []*agentpb.Probe
// CheckNodes executes multi-node checks (such as network reachability,
// bandwidth, etc) on the provided set of servers.
CheckNodes(ctx context.Context, servers []Server) []*agentpb.Probe
}

type checker struct {
// Config is the checker configuration.
Config
Expand Down Expand Up @@ -316,119 +328,149 @@ func (r *checker) Run(ctx context.Context) error {
return nil
}

var errors []error
var failed []*agentpb.Probe

// check each server against its profile
for _, server := range r.Servers {
errors = append(errors, r.CheckNode(ctx, server))
failed = append(failed, r.CheckNode(ctx, server)...)
}

// run checks that take all servers into account
errors = append(errors, r.CheckNodes(ctx, r.Servers))
failed = append(failed, r.CheckNodes(ctx, r.Servers)...)

if len(failed) != 0 {
return trace.BadParameter("The following checks failed:\n%v",
FormatFailedChecks(failed))
}

return trace.NewAggregate(errors...)
return nil
}

// CheckNode executes checks for the provided individual server.
func (r *checker) CheckNode(ctx context.Context, server Server) error {
func (r *checker) CheckNode(ctx context.Context, server Server) (failed []*agentpb.Probe) {
if ifTestsDisabled() {
log.Infof("Skipping single-node checks due to %q set.",
constants.PreflightChecksOffEnvVar)
return nil
}

var errors []error

requirements := r.Requirements[server.Server.Role]
validateCtx, cancel := context.WithTimeout(ctx, defaults.AgentValidationTimeout)
defer cancel()

failed, err := r.Remote.Validate(validateCtx, server.AdvertiseIP, ValidateConfig{
Manifest: r.Manifest,
Profile: server.Server.Role,
Docker: requirements.Docker,
})
if err != nil {
log.WithError(err).Warn("Failed to validate remote node.")
errors = append(errors,
trace.BadParameter("failed to validate remote node %v", server))
}
if len(failed) != 0 {
errors = append(errors, trace.BadParameter("%v failed checks:\n%v",
server, FormatFailedChecks(failed)))
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: fmt.Sprintf("failed to validate node %v", server),
})
}

err = checkServerProfile(server, requirements)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate profile requirements.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate profile requirements",
})
}

dockerConfig := r.Manifest.SystemDocker()
if r.TestDockerDevice {
err = checkDockerDevice(server, dockerConfig)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate docker device.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate docker device",
})
}
}

err = checkSystemPackages(server, dockerConfig)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate system packages.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate system packages",
})
}

err = r.checkTempDir(ctx, server)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate temporary directory.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate temporary directory",
})
}

err = r.checkDisks(ctx, server)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate disk requirements.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate disk requirements",
})
}

return trace.NewAggregate(errors...)
return failed
}

// CheckNodes executes checks that take all provided servers into account.
func (r *checker) CheckNodes(ctx context.Context, servers []Server) error {
func (r *checker) CheckNodes(ctx context.Context, servers []Server) (failed []*agentpb.Probe) {
if ifTestsDisabled() {
log.Infof("Skipping multi-node checks due to %q set.",
constants.PreflightChecksOffEnvVar)
return nil
}

if len(servers) < 2 {
log.Infof("Skipping multi-node checks for < 2 servers: %v.",
servers)
return nil
}

var errors []error

err := checkSameOS(servers)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate same OS requirements.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate same OS requirement",
})
}

err = checkTime(time.Now().UTC(), servers)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate time drift requirements.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate time drift requirement",
})
}

if r.TestPorts {
err = r.checkPorts(ctx, servers)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate port requirements.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate port requirements",
})
}
}

if r.TestBandwidth {
err = r.checkBandwidth(ctx, servers)
if err != nil {
errors = append(errors, err)
log.WithError(err).Warn("Failed to validate bandwidth requirements.")
failed = append(failed, &agentpb.Probe{
Detail: err.Error(),
Error: "failed to validate network bandwidth requirements",
})
}
}

return trace.NewAggregate(errors...)
return failed
}

// checkDisks verifies that disk performance satisfies the profile requirements.
Expand Down