Skip to content

Commit

Permalink
wip fc-agent maintenance scheduling
Browse files Browse the repository at this point in the history
- RequestManager: only add requests with effective activities
- Implement activity/request merging
- Reboot/VM change activities, update activity improvements, refactoring
- manage.py uses update activity
- throw out a lot of old code :)
- default.nix use buildPythonPackage
- configurable request prep time
- update CLI
  • Loading branch information
dpausp committed Jun 30, 2023
1 parent 8ed02e4 commit b4b66e9
Show file tree
Hide file tree
Showing 41 changed files with 1,877 additions and 971 deletions.
18 changes: 14 additions & 4 deletions nixos/platform/agent.nix
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ in
type = types.bool;
};

maintenancePreparationSeconds = mkOption {
default = 300;
description = "How long the preparation before maintance takes";
type = types.ints.positive;
};

maintenance = mkOption {
type = with types; attrsOf (submodule {
options = {
Expand Down Expand Up @@ -162,6 +168,9 @@ in
];

environment.etc."fc-agent.conf".text = ''
[maintenance]
preparation_seconds = ${toString cfg.agent.maintenancePreparationSeconds}
[maintenance-enter]
${concatStringsSep "\n" (
mapAttrsToList (k: v: "${k} = ${v.enter}") cfg.agent.maintenance)}
Expand Down Expand Up @@ -195,7 +204,7 @@ in

script =
let
verbose = lib.optionalString cfg.agent.verbose "--verbose";
verbose = lib.optionalString cfg.agent.verbose "--show-caller-info";
options = "--enc-path=${cfg.encPath} ${verbose}";
wrappedExtraCommands = lib.optionalString (cfg.agent.extraCommands != "") ''
(
Expand Down Expand Up @@ -240,11 +249,12 @@ in
IOWeight = 10; # 1-10000
ExecStart =
let
verbose = lib.optionalString cfg.agent.verbose "--verbose";
verbose = lib.optionalString cfg.agent.verbose "--show-caller-info";
options = "--enc-path=${cfg.encPath} ${verbose}";
runNow = lib.optionalString (!cfg.agent.updateInMaintenance) "--run-now";
in
"${pkgs.fc.agent}/bin/fc-maintenance ${options} request update ${runNow}";
if cfg.agent.updateInMaintenance
then "${pkgs.fc.agent}/bin/fc-maintenance ${options} request update"
else "${pkgs.fc.agent}/bin/fc-manage ${options} switch --update-channel --lazy";
};

path = commonEnvPath;
Expand Down
1 change: 1 addition & 0 deletions pkgs/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ let

fc = import ./fc {
inherit (self) callPackage;
pythonPackages = pkgs.python310Packages;
inherit pkgs;
};

Expand Down
18 changes: 13 additions & 5 deletions pkgs/fc/agent/default.nix
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
{ lib
, stdenv
, fetchPypi
, fetchFromGitHub
, dmidecode
, gitMinimal
, gptfdisk
, libyaml
, multipath-tools
, nix
, python310
, buildPythonPackage
, pythonPackages
, python
, util-linux
, xfsprogs
, pytest
, structlog
}:

let
py = python310.pkgs;
py = pythonPackages;

pytest-structlog = py.buildPythonPackage rec {
pname = "pytest-structlog";
Expand All @@ -26,11 +31,11 @@ let
sha256 = "00g2ivgj4y398d0y60lk710zz62pj80r9ya3b4iqijkp4j8nh4gp";
};

buildInputs = [ py.pytest py.structlog ];
buildInputs = [ pytest structlog ];
};

in
py.buildPythonPackage rec {
buildPythonPackage rec {
name = "fc-agent-${version}";
version = "1.0";
namePrefix = "";
Expand All @@ -40,6 +45,7 @@ py.buildPythonPackage rec {
py.freezegun
py.pytest-cov
py.responses
py.pytest
py.pytest-mock
py.pytest-subprocess
pytest-structlog
Expand Down Expand Up @@ -70,7 +76,9 @@ py.buildPythonPackage rec {
py.systemd
xfsprogs
];
# XXX: just for testing, remove!
doCheck = false;
dontStrip = true;
passthru.pythonDevEnv = python310.withPackages (_: checkInputs ++ propagatedBuildInputs);
passthru.pythonDevEnv = python.withPackages (_: checkInputs ++ propagatedBuildInputs);

}
38 changes: 26 additions & 12 deletions pkgs/fc/agent/fc/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import contextlib
import shutil
import textwrap
import unittest
import uuid
from pathlib import Path

import responses
import shortuuid
import structlog
from fc.maintenance.activity import Activity
Expand All @@ -12,8 +15,8 @@


@fixture
def agent_maintenance_config(tmpdir):
config_file = str(tmpdir / "fc-agent.conf")
def agent_maintenance_config(tmp_path):
config_file = str(tmp_path / "fc-agent.conf")
with open(config_file, "w") as f:
f.write(
textwrap.dedent(
Expand All @@ -31,27 +34,32 @@ def agent_maintenance_config(tmpdir):


@fixture
def reqmanager(tmpdir, agent_maintenance_config):
with ReqManager(
Path(tmpdir), config_file=Path(agent_maintenance_config)
) as rm:
yield rm
def reqmanager(tmp_path, agent_maintenance_config):
spooldir = tmp_path / "maintenance"
spooldir.mkdir()
enc_path = tmp_path / "enc.json"
enc_path.write_text("{}")
with unittest.mock.patch("fc.util.directory.connect"):
with ReqManager(
spooldir=spooldir,
enc_path=enc_path,
config_file=agent_maintenance_config,
) as rm:
yield rm


@fixture
def request_population(tmpdir, agent_maintenance_config):
def request_population(tmp_path, agent_maintenance_config, reqmanager):
@contextlib.contextmanager
def _request_population(n):
"""Creates a ReqManager with a pregenerated population of N requests.
The ReqManager and a list of Requests are passed to the calling code.
"""
with ReqManager(
Path(tmpdir), config_file=Path(agent_maintenance_config)
) as reqmanager:
with reqmanager:
requests = []
for i in range(n):
req = Request(Activity(), 60, comment=str(i))
req = Request(Activity(), comment=str(i))
req._reqid = shortuuid.encode(uuid.UUID(int=i))
reqmanager.add(req)
requests.append(req)
Expand All @@ -65,3 +73,9 @@ def logger():
_logger = structlog.get_logger()
_logger.trace = lambda *a, **k: None
return _logger


@fixture
def mocked_responses():
with responses.RequestsMock() as rsps:
yield rsps
63 changes: 49 additions & 14 deletions pkgs/fc/agent/fc/maintenance/activity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,48 @@
"""Base class for maintenance activities."""
from enum import Enum
from typing import NamedTuple, Optional

import structlog
from fc.maintenance.estimate import Estimate

class RebootType(Enum):
WARM = 1
COLD = 2

class RebootType(str, Enum):
WARM = "reboot"
COLD = "poweroff"


class ActivityMergeResult(NamedTuple):
merged: Optional["Activity"] = None
is_effective: bool = False
is_significant: bool = False
changes: dict = {}


class Activity:
"""Maintenance activity which is executed as request payload.
Activities are executed possibly several times until they succeed or
exceeed their retry limit. Individual maintenance activities should
exceed their retry limit. Individual maintenance activities should
subclass this class and add custom behaviour to its methods.
Attributes: `stdout`, `stderr` capture the outcomes of shellouts.
Attributes: `stdout`, `stderr` capture the outcomes of shell-outs.
`returncode` controls the resulting request state. If `duration` is
set, it overrules execution timing done by the calling scope. Use
this if a logical transaction spans several attempts, e.g. for
reboots.
"""

stdout = None
stderr = None
returncode = None
duration = None
request = None # backpointer, will be set in Request
reboot_needed = None
stdout: None | str = None
stderr: None | str = None
returncode: None | int = None
duration: None | float = None
request = None # back-pointer, will be set in Request
reboot_needed: None | RebootType = None
# Do we predict that this activity will actually change anything?
is_effective = True
comment = ""
estimate = Estimate("10m")
log = None

def __init__(self):
"""Creates activity object (add args if you like).
Expand All @@ -41,10 +57,12 @@ def __getstate__(self):
# Deserializing loggers breaks, remove them before serializing (to YAML).
if "log" in state:
del state["log"]
if "request" in state:
del state["request"]
return state

def set_up_logging(self, log):
self.log = log
self.log = log.bind(activity_type=self.__class__.__name__)

def run(self):
"""Executes maintenance activity.
Expand All @@ -53,8 +71,8 @@ def run(self):
whatever you want here, but do not destruct `request.yaml`.
Directory contents is preserved between several attempts.
This method is expected to update self.stdout, self.stderr, and
self.returncode after each run. Request state is determined
This method is expected to update `self.stdout`, `self.stderr`, and
`self.returncode` after each run. Request state is determined
according to the EXIT_* constants in `state.py`. Any returncode
not listed there means hard failure and causes the request to be
archived. Uncaught exceptions are handled the same way.
Expand All @@ -74,3 +92,20 @@ def load(self):
def dump(self):
"""Saves additional state during serialization."""
pass

def merge(self, other) -> ActivityMergeResult:
"""Merges in other activity. Settings from other have precedence.
Returns merge result.
"""
return ActivityMergeResult()

def __rich__(self):
cls = self.__class__
out = f"{cls.__module__}.{cls.__qualname__}"
match self.reboot_needed:
case RebootType.WARM:
out += " (warm reboot needed)"
case RebootType.COLD:
out += " (cold reboot needed)"

return out
75 changes: 75 additions & 0 deletions pkgs/fc/agent/fc/maintenance/activity/reboot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Scheduled machine reboot.
This activity does nothing if the machine has been booted for another reason in
the time between creation and execution.
"""

from typing import Union

import structlog

from ..estimate import Estimate
from . import Activity, ActivityMergeResult, RebootType

_log = structlog.get_logger()


class RebootActivity(Activity):
estimate = Estimate("5m")

def __init__(
self, action: Union[str, RebootType] = RebootType.WARM, log=_log
):
super().__init__()
self.set_up_logging(log)
self.reboot_needed = RebootType(action)

@property
def comment(self):
return "Scheduled {}".format(
"cold boot" if self.reboot_needed == RebootType.COLD else "reboot"
)

def merge(self, other):
if not isinstance(other, RebootActivity):
self.log.debug(
"merge-incompatible-skip",
self_type=type(self),
other_type=type(other),
)
return ActivityMergeResult()

if self.reboot_needed == other.reboot_needed:
self.log.debug("merge-reboot-identical")
return ActivityMergeResult(self, is_effective=True)

if (
self.reboot_needed == RebootType.COLD
and other.reboot_needed == RebootType.WARM
):
self.log.debug(
"merge-reboot-cold-warm",
help=(
"merging a warm reboot into a cold reboot results in a "
"cold reboot."
),
)
return ActivityMergeResult(self, is_effective=True)

if (
self.reboot_needed == RebootType.WARM
and other.reboot_needed == RebootType.COLD
):
self.log.debug(
"merge-reboot-warm-to-cold",
help=(
"merging a cold reboot into a warm reboot results in a "
"cold reboot. This is a significant change."
),
)
return ActivityMergeResult(
self,
is_effective=True,
is_significant=True,
changes={"before": RebootType.WARM, "after": RebootType.COLD},
)
Loading

0 comments on commit b4b66e9

Please sign in to comment.