From a4deaaa07f6af17574ed40b8a2fe43b16279603c Mon Sep 17 00:00:00 2001 From: Tobias Stenzel Date: Tue, 21 Mar 2023 23:50:35 +0100 Subject: [PATCH] wip update CLI --- nixos/platform/agent.nix | 14 +- pkgs/fc/agent/default.nix | 2 + pkgs/fc/agent/fc/conftest.py | 7 + .../agent/fc/maintenance/activity/__init__.py | 4 +- .../maintenance/activity/tests/test_update.py | 87 ++++++- .../activity/tests/test_vm_change.py | 16 +- .../agent/fc/maintenance/activity/update.py | 219 ++++++++++-------- pkgs/fc/agent/fc/maintenance/cli.py | 112 +++------ pkgs/fc/agent/fc/maintenance/estimate.py | 5 +- .../{system_properties.py => maintenance.py} | 58 ++++- pkgs/fc/agent/fc/maintenance/request.py | 49 ++-- .../fc/agent/fc/maintenance/tests/test_cli.py | 16 +- .../fc/maintenance/tests/test_estimate.py | 5 +- ...stem_properties.py => test_maintenance.py} | 61 +++-- pkgs/fc/agent/fc/manage/cli.py | 4 + pkgs/fc/agent/fc/manage/manage.py | 207 ++--------------- pkgs/fc/agent/fc/manage/tests/test_manage.py | 113 +-------- pkgs/fc/agent/fc/util/channel.py | 141 +++++++++++ pkgs/fc/agent/fc/util/nixos.py | 28 ++- pkgs/fc/agent/shell.nix | 7 +- 20 files changed, 572 insertions(+), 583 deletions(-) rename pkgs/fc/agent/fc/maintenance/{system_properties.py => maintenance.py} (66%) rename pkgs/fc/agent/fc/maintenance/tests/{test_system_properties.py => test_maintenance.py} (68%) create mode 100644 pkgs/fc/agent/fc/util/channel.py diff --git a/nixos/platform/agent.nix b/nixos/platform/agent.nix index e4f08f7e9..004361cdc 100644 --- a/nixos/platform/agent.nix +++ b/nixos/platform/agent.nix @@ -124,6 +124,11 @@ in }; config = mkMerge [ + { + environment.etc."fc_agent_deprecation_warnings".text = + lib.optionalString (config.warnings != []) + ((lib.concatStringsSep "\n" config.warnings) + "\n"); + } (mkIf cfg.agent.install { environment.systemPackages = [ pkgs.fc.agent @@ -196,7 +201,7 @@ in script = let - verbose = lib.optionalString cfg.agent.verbose "--verbose"; + verbose = lib.optionalString cfg.agent.verbose "--show-caller-info"; options = "--enc-path=${cfg.encPath} ${verbose}"; wrappedExtraCommands = lib.optionalString (cfg.agent.extraCommands != "") '' ( @@ -241,11 +246,12 @@ in IOWeight = 10; # 1-10000 ExecStart = let - verbose = lib.optionalString cfg.agent.verbose "--verbose"; + verbose = lib.optionalString cfg.agent.verbose "--show-caller-info"; options = "--enc-path=${cfg.encPath} ${verbose}"; - runNow = lib.optionalString (!cfg.agent.updateInMaintenance) "--run-now"; in - "${pkgs.fc.agent}/bin/fc-maintenance ${options} request update ${runNow}"; + if cfg.agent.updateInMaintenance + then "${pkgs.fc.agent}/bin/fc-maintenance ${options} request update" + else "${pkgs.fc.agent}/bin/fc-manage ${options} switch --update-channel --lazy"; }; path = commonEnvPath; diff --git a/pkgs/fc/agent/default.nix b/pkgs/fc/agent/default.nix index 6a1bab3eb..1ee64d603 100644 --- a/pkgs/fc/agent/default.nix +++ b/pkgs/fc/agent/default.nix @@ -87,6 +87,8 @@ buildPythonPackage rec { py.systemd xfsprogs ]; + # XXX: just for testing, remove! + doCheck = false; dontStrip = true; passthru.pythonDevEnv = py.withPackages (_: checkInputs ++ propagatedBuildInputs); diff --git a/pkgs/fc/agent/fc/conftest.py b/pkgs/fc/agent/fc/conftest.py index d30eadd6e..429b868e9 100644 --- a/pkgs/fc/agent/fc/conftest.py +++ b/pkgs/fc/agent/fc/conftest.py @@ -3,6 +3,7 @@ import uuid from pathlib import Path +import responses import shortuuid import structlog from fc.maintenance.activity import Activity @@ -65,3 +66,9 @@ def logger(): _logger = structlog.get_logger() _logger.trace = lambda *a, **k: None return _logger + + +@fixture +def mocked_responses(): + with responses.RequestsMock() as rsps: + yield rsps diff --git a/pkgs/fc/agent/fc/maintenance/activity/__init__.py b/pkgs/fc/agent/fc/maintenance/activity/__init__.py index 3df5b3db0..f094c6bda 100644 --- a/pkgs/fc/agent/fc/maintenance/activity/__init__.py +++ b/pkgs/fc/agent/fc/maintenance/activity/__init__.py @@ -37,10 +37,12 @@ class Activity: duration = None request = None # backpointer, will be set in Request reboot_needed = None + # XXX: move to init? # Based on current knowledge, do we predict that this activity will actually change anything? is_effective = True comment = "" estimate = Estimate("10m") + log = None def __init__(self): """Creates activity object (add args if you like). @@ -91,7 +93,7 @@ def dump(self): """Saves additional state during serialization.""" pass - def merge(self, activity) -> ActivityMergeResult: + def merge(self, other) -> ActivityMergeResult: """Merges in other activity. Settings from other have precedence. Returns merge result. """ diff --git a/pkgs/fc/agent/fc/maintenance/activity/tests/test_update.py b/pkgs/fc/agent/fc/maintenance/activity/tests/test_update.py index f758f6c48..70ab50046 100644 --- a/pkgs/fc/agent/fc/maintenance/activity/tests/test_update.py +++ b/pkgs/fc/agent/fc/maintenance/activity/tests/test_update.py @@ -1,10 +1,12 @@ import textwrap -from unittest.mock import create_autospec +from unittest.mock import MagicMock, create_autospec -import structlog +import pytest +import responses import yaml from fc.maintenance.activity import Activity, RebootType from fc.maintenance.activity.update import UpdateActivity +from fc.util.channel import Channel from fc.util.nixos import ( BuildFailed, ChannelException, @@ -55,7 +57,6 @@ current_kernel: 5.10.45 current_system: {CURRENT_SYSTEM_PATH} current_version: 21.05.1233.a9cc58d -equivalent_planned_activities: [] next_channel_url: https://hydra.flyingcircus.io/build/93222/download/1/nixexprs.tar.xz next_environment: fc-21.05-production next_kernel: 5.10.50 @@ -120,7 +121,7 @@ def test_update_merge_additional_reload_is_an_insignificant_update(activity): other = UpdateActivity(channel_url) other.unit_changes = { **UNIT_CHANGES, - "reload": set(["nginx.service", "dbus.service"]), + "reload": {"nginx.service", "dbus.service"}, } result = activity.merge(other) # Then the merge result should be a new activity and the change is @@ -143,7 +144,7 @@ def test_update_merge_more_unit_changes_is_a_significant_update(activity): ) other = UpdateActivity(channel_url) - other.unit_changes = {**UNIT_CHANGES, "restart": set(["mysql.service"])} + other.unit_changes = {**UNIT_CHANGES, "restart": {"mysql.service"}} result = activity.merge(other) # Then the merge result should be a new activity and the change is # significant. @@ -188,6 +189,7 @@ def fake_changed_kernel_version(path): RegisterFailed=RegisterFailed, ) + mocked.format_unit_change_lines = fc.util.nixos.format_unit_change_lines mocked.get_fc_channel_build = fake_get_fc_channel_build mocked.channel_version = fake_channel_version mocked.kernel_version = fake_changed_kernel_version @@ -203,10 +205,8 @@ def fake_changed_kernel_version(path): return mocked -def test_update_activity_from_system_changed(nixos_mock): - activity = UpdateActivity.from_system_if_changed( - NEXT_CHANNEL_URL, ENVIRONMENT - ) +def test_update_activity(nixos_mock): + activity = UpdateActivity(NEXT_CHANNEL_URL, ENVIRONMENT) assert activity assert activity.current_version == CURRENT_VERSION @@ -310,3 +310,72 @@ def test_update_activity_switch_to_system_fails(log, nixos_mock, activity): assert activity.returncode == 3 log.has("update-run-failed", returncode=3) + + +def test_update_activity_from_enc( + log, mocked_responses, nixos_mock, logger, monkeypatch +): + environment = "fc-21.05-dev" + current_channel_url = ( + "https://hydra.flyingcircus.io/build/93000/download/1/nixexprs.tar.xz" + ) + next_channel_url = ( + "https://hydra.flyingcircus.io/build/93222/download/1/nixexprs.tar.xz" + ) + current_version = "21.05.1233.a9cc58d" + next_version = "21.05.1235.bacc11d" + + enc = { + "parameters": { + "environment_url": next_channel_url, + "environment": environment, + } + } + + mocked_responses.add(responses.HEAD, current_channel_url) + mocked_responses.add(responses.HEAD, next_channel_url) + monkeypatch.setattr( + "fc.util.nixos.channel_version", (lambda c: next_version) + ) + + current_channel = Channel(logger, current_channel_url) + current_channel.version = lambda *a: current_version + monkeypatch.setattr( + "fc.manage.manage.Channel.current", lambda *a: current_channel + ) + activity = UpdateActivity.from_enc(logger, enc, current_requests=[]) + assert activity + + +# current="", +# next="", + + +def test_update_activity_from_enc_unchanged( + log, mocked_responses, nixos_mock, logger, monkeypatch +): + """Given an unchanged channel url, should not prepare an update activity""" + environment = "fc-21.05-dev" + + enc = { + "parameters": { + "environment_url": CURRENT_CHANNEL_URL, + "environment": ENVIRONMENT, + } + } + + mocked_responses.add(responses.HEAD, CURRENT_CHANNEL_URL) + monkeypatch.setattr( + "fc.util.nixos.channel_version", (lambda c: CURRENT_VERSION) + ) + + current_channel = Channel(logger, CURRENT_CHANNEL_URL) + current_channel.version = lambda *a: CURRENT_VERSION + monkeypatch.setattr( + "fc.manage.manage.Channel.current", lambda *a: current_channel + ) + + activity = UpdateActivity.from_enc(logger, enc, current_requests=[]) + assert activity is None diff --git a/pkgs/fc/agent/fc/maintenance/activity/tests/test_vm_change.py b/pkgs/fc/agent/fc/maintenance/activity/tests/test_vm_change.py index b928c1039..4dd0a1920 100644 --- a/pkgs/fc/agent/fc/maintenance/activity/tests/test_vm_change.py +++ b/pkgs/fc/agent/fc/maintenance/activity/tests/test_vm_change.py @@ -1,20 +1,6 @@ -import textwrap -from unittest.mock import create_autospec - -import pytest -import structlog import yaml -from fc.maintenance.activity import Activity, RebootType -from fc.maintenance.activity.reboot import RebootActivity -from fc.maintenance.activity.update import UpdateActivity +from fc.maintenance.activity import Activity from fc.maintenance.activity.vm_change import VMChangeActivity -from fc.util.nixos import ( - BuildFailed, - ChannelException, - ChannelUpdateFailed, - RegisterFailed, - SwitchFailed, -) from pytest import fixture SERIALIZED_ACTIVITY = f"""\ diff --git a/pkgs/fc/agent/fc/maintenance/activity/update.py b/pkgs/fc/agent/fc/maintenance/activity/update.py index 2272d37a0..d3cde9982 100644 --- a/pkgs/fc/agent/fc/maintenance/activity/update.py +++ b/pkgs/fc/agent/fc/maintenance/activity/update.py @@ -10,6 +10,7 @@ from fc.util import nixos from fc.util.nixos import UnitChanges +from ...util.channel import Channel from . import Activity, ActivityMergeResult, RebootType _log = structlog.get_logger() @@ -21,6 +22,13 @@ class UpdateActivity(Activity): + """ + Updates the NixOS system to a different channel URL. + The new system resulting from the channel URL is already pre-built + in UpdateActivity.prepare which means that a run of this activity usually + only has to set the new system link and switch to it. + """ + def __init__( self, next_channel_url: str, next_environment: str = None, log=_log ): @@ -32,7 +40,6 @@ def __init__( self.current_channel_url = None self.current_version = None self.next_version = None - self.equivalent_planned_activities = [] self.current_environment = None self.unit_changes: UnitChanges = {} self.current_kernel = None @@ -40,6 +47,11 @@ def __init__( self.set_up_logging(log) self._detect_current_state() self._detect_next_version() + self.log.debug( + "update-init", + next_channel_url=next_channel_url, + next_environment=next_environment, + ) def __eq__(self, other): return ( @@ -48,10 +60,55 @@ def __eq__(self, other): ) @classmethod - def from_system_if_changed( - cls, next_channel_url: str, next_environment: str = None - ): - activity = cls(next_channel_url, next_environment) + def from_enc(cls, log, enc, current_requests): + """ + Create a new UpdateActivity from ENC data or None, if nothing would + change. + + """ + if not enc or not enc.get("parameters"): + log.warning("enc-data-missing", msg="No ENC data.") + return + + env_name = enc["parameters"]["environment"] + channel_url = enc["parameters"]["environment_url"] + + next_channel = Channel( + log, + channel_url, + name="next", + environment=env_name, + ) + + if not next_channel or next_channel.is_local: + log.warn( + "update-from-enc-local-channel", + _replace_msg=( + "UpdateActivity is incompatible with local checkouts." + ), + ) + return + + equivalent_planned_requests = [ + req + for req in current_requests + if isinstance(req.activity, UpdateActivity) + and req.activity.next_channel_url == next_channel.resolved_url + ] + + if equivalent_planned_requests: + log.info( + "update-from-enc-found-existing", + _replace_msg=( + "Existing request {request} with same channel URL: {channel_url}" + ), + request=equivalent_planned_requests[0].id, + channel_url=next_channel.resolved_url, + ) + return + + activity = cls(channel_url, env_name) + if activity.is_effective: return activity @@ -60,13 +117,11 @@ def is_effective(self): """Does this actually change anything?""" if self.next_channel_url == self.current_channel_url: return False - if self.equivalent_planned_activities: - return False if self.current_system == self.next_system: return False return True - def prepare(self, dry_run=False, current_requests=[]): + def prepare(self, dry_run=False): self.log.debug( "update-prepare-start", current_version=self.current_version, @@ -82,55 +137,53 @@ def prepare(self, dry_run=False, current_requests=[]): else: out_link = NEXT_SYSTEM - self.equivalent_planned_activities = [ - r.activity - for r in current_requests - if isinstance(r.activity, UpdateActivity) - and r.activity.next_channel_url == self.next_channel_url - ] + self.next_system = nixos.build_system( + self.next_channel_url, out_link=out_link, log=self.log + ) + self.unit_changes = nixos.dry_activate_system( + self.next_system, self.log + ) - if self.equivalent_planned_activities: - self.log.info( - "update-prepare-found-existing", - _replace_msg="Found existing planned activity with same channel URL", - ) + self._register_reboot_for_kernel() + + if self.reboot_needed: + self.estimate = Estimate("10m") + elif ( + self.unit_changes["restart"] + or self.unit_changes["stop"] + or self.unit_changes["start"] + ): + self.estimate = Estimate("5m") else: - self.next_system = nixos.build_system( - self.next_channel_url, out_link=out_link, log=self.log - ) - self.unit_changes = nixos.dry_activate_system( - self.next_system, self.log - ) + # Only reloads or no unit changes, this should not take long + self.estimate = Estimate("2m") - self._register_reboot_for_kernel() + def update_system_channel(self): + nixos.update_system_channel(self.next_channel_url, self.log) + + @property + def is_identical_to_running_system(self): + if nixos.current_system() == self.next_system: + self.log.info( + "update-identical", + current_version=self.next_version, + next_version=self.next_version, + system=self.next_system.removeprefix("/nix/store/"), + _replace_msg=( + "Running system {system} is already the wanted system." + ), + ) + return True - if self.reboot_needed: - self.estimate = Estimate("10m") - elif ( - self.unit_changes["restart"] - or self.unit_changes["stop"] - or self.unit_changes["start"] - ): - self.estimate = Estimate("5m") - else: - # Only reloads or no unit changes, this should not take long - self.estimate = Estimate("2m") + return False def run(self): """Do the update""" + step = 1 try: - step = 1 - nixos.update_system_channel(self.next_channel_url, self.log) - - if nixos.running_system_version(self.log) == self.next_version: - self.log.info( - "update-run-skip", - current_version=self.next_version, - _replace_msg=( - "Running version is already the wanted version " - "{current_version}, skip update." - ), - ) + self.update_system_channel() + + if self.is_identical_to_running_system: self.returncode = 0 return @@ -174,29 +227,12 @@ def run(self): @property def changelog(self): - # Clean up raw unit changes: usually, units are stopped and - # started shortly after for updates. They get their own category - # "Start/Stop" to separate them from permanent stops and starts. - pretty_unit_changes = {} - start_units = set(self.unit_changes["start"]) - stop_units = set(self.unit_changes["stop"]) - reload_units = set(self.unit_changes["reload"]) - start_stop_units = start_units.intersection(stop_units) - pretty_unit_changes["Start/Stop"] = start_stop_units - pretty_unit_changes["Restart"] = set(self.unit_changes["restart"]) - pretty_unit_changes["Start"] = start_units - start_stop_units - pretty_unit_changes["Stop"] = stop_units - start_stop_units - pretty_unit_changes["Reload"] = reload_units - {"dbus.service"} - - unit_change_lines = [] - - for cat, units in pretty_unit_changes.items(): - if units: - unit_str = ", ".join( - u.replace(".service", "") for u in sorted(units) - ) - unit_change_lines.append(f"{cat}: {unit_str}") - + """ + A human readable summary of what will be changed by this update. + Includes possible reboots, significant unit state changes (start, stop, + restart) as well as changes of build number, environment ( + fc-22.11-staging, for example) and channel URL. + """ msg = [f"System update: {self.current_version} -> {self.next_version}"] current_build = nixos.get_fc_channel_build( @@ -221,6 +257,8 @@ def changelog(self): if self.reboot_needed: msg.append("Will reboot after the update.") + unit_change_lines = nixos.format_unit_change_lines(self.unit_changes) + if unit_change_lines: msg.extend(unit_change_lines) msg.append("") @@ -261,31 +299,27 @@ def merge(self, other: Activity) -> ActivityMergeResult: new=other_state, ) - added_unit_changes = { - category: diff - for category, self_changes, other_changes in zip( - self.unit_changes.keys(), - self.unit_changes.values(), - other.unit_changes.values(), - ) - if (diff := set(other_changes) - set(self_changes)) - } + added_unit_changes = {} + removed_unit_changes = {} + + for category, changes in self.unit_changes.items(): + other_changes = other.unit_changes[category] + added = set(other_changes) - set(changes) + + if added: + added_unit_changes[category] = added + + removed = set(changes) - set(other_changes) + + if removed: + removed_unit_changes[category] = removed - removed_unit_changes = { - category: diff - for category, self_changes, other_changes in zip( - self.unit_changes.keys(), - self.unit_changes.values(), - other.unit_changes.values(), - ) - if (diff := set(self_changes) - set(other_changes)) - } changes = { "added_unit_changes": added_unit_changes, "removed_unit_changes": removed_unit_changes, } - # Additional starts, stops and restart of units are considered an + # Additional starts, stops and restart of units are considered a # significant change of the activity. Reloads are harmless and can be # ignored. @@ -310,6 +344,7 @@ def _register_reboot_for_kernel(self): if current_kernel == next_kernel: self.log.debug("update-kernel-unchanged") + self.reboot_needed = None else: self.log.info( "update-kernel-changed", @@ -327,7 +362,7 @@ def _detect_current_state(self): self.current_environment = nixos.current_fc_environment_name() self.current_system = nixos.current_system() self.log.debug( - "update-activity-update-current-state", + "update-activity-current-state", current_version=self.current_version, current_channel_url=self.current_channel_url, current_environment=self.current_environment, diff --git a/pkgs/fc/agent/fc/maintenance/cli.py b/pkgs/fc/agent/fc/maintenance/cli.py index 7e86f8f91..6d1cfe817 100644 --- a/pkgs/fc/agent/fc/maintenance/cli.py +++ b/pkgs/fc/agent/fc/maintenance/cli.py @@ -1,25 +1,23 @@ -import sys from pathlib import Path from typing import NamedTuple, Optional import structlog import typer from fc.maintenance.activity.reboot import RebootActivity -from fc.maintenance.activity.update import UpdateActivity from fc.maintenance.lib.shellscript import ShellScriptActivity +from fc.maintenance.maintenance import ( + request_reboot_for_cpu, + request_reboot_for_kernel, + request_reboot_for_memory, + request_reboot_for_qemu, + request_update, +) from fc.maintenance.reqmanager import ( DEFAULT_CONFIG_FILE, DEFAULT_SPOOLDIR, ReqManager, ) from fc.maintenance.request import Request -from fc.maintenance.system_properties import ( - request_reboot_for_cpu, - request_reboot_for_kernel, - request_reboot_for_memory, - request_reboot_for_qemu, -) -from fc.manage.manage import prepare_switch_in_maintenance, switch_with_update from fc.util.enc import load_enc from fc.util.lock import locked from fc.util.logging import ( @@ -227,7 +225,7 @@ def reboot(comment: Optional[str] = None, cold_reboot: bool = False): @request_app.command() def system_properties(): - """Request reboot for changed sys properties. + """Request reboot for changed system properties. Runs applicable checks for the machine type (virtual/physical). * Physical: kernel @@ -242,98 +240,44 @@ def system_properties(): rm.scan() if enc["parameters"]["machine"] == "virtual": - rm.add(request_reboot_for_memory(enc)) - rm.add(request_reboot_for_cpu(enc)) - rm.add(request_reboot_for_qemu()) + rm.add(request_reboot_for_memory(log, enc)) + rm.add(request_reboot_for_cpu(log, enc)) + rm.add(request_reboot_for_qemu(log)) - rm.add(request_reboot_for_kernel()) + rm.add(request_reboot_for_kernel(log)) log.info("fc-maintenance-system-properties-finished") @request_app.command() -def update( - run_now: bool = Option( - default=False, help="do update now instead of scheduling a request" - ) -): +def update(): """Request a system update. Builds the system and prepares the update to be run in a maintenance - window by default. To activate the update immediately, pass the - --run-now option. + window by default. Acquires an exclusive lock because this shouldn't be run concurrently with more invocations of the update command or other commands (from - fc-manage) that - potentially modify the system.""" - init_command_logging(log, context.logdir) + fc-manage) that potentially modify the system. + """ log.info("fc-maintenance-update-start") enc = load_enc(log, context.enc_path) + init_command_logging(log, context.logdir) + + with rm: + rm.scan() + current_requests = rm.requests.values() with locked(log, context.lock_dir): - if run_now: - keep_cmd_output = switch_with_update(log, enc, lazy=True) - else: - keep_cmd_output = prepare_switch_in_maintenance(log, enc) + request = request_update(log, enc, current_requests) - if not keep_cmd_output: + with rm: + request = rm.add(request) + + if request is None: drop_cmd_output_logfile(log) log.info("fc-maintenance-update-finished") -@request_app.command() -def update_with_update_activity( - channel_url: str = Argument(..., help="channel URL to update to"), - run_now: bool = Option( - default=False, help="do update now instead of scheduling a request" - ), - dry_run: bool = Option( - default=False, help="do nothing, just show activity" - ), -): - """(Experimental) Prepare an UpdateActivity or execute it now.""" - - activity = UpdateActivity.from_system_if_changed(channel_url) - - if activity is None: - log.warn( - "update-skip", - _replace_msg="Channel URL unchanged, skipped.", - activity=activity, - ) - sys.exit(1) - - activity.prepare(dry_run) - - # possible short-cut: built system is the same - # => we can skip requesting maintenance and set the new channel directly - - if run_now: - log.info( - "update-run-now", - _replace_msg="Run-now mode requested, running the update now.", - ) - activity.run() - - elif dry_run: - log.info( - "update-dry-run", - _replace_msg=( - "Update prediction was successful. This would be applied by " - "the update:" - ), - _output=activity.changelog, - ) - else: - with rm: - rm.scan() - rm.add(Request(activity)) - log.info( - "update-prepared", - _replace_msg=( - "Update preparation was successful. This will be applied in a " - "maintenance window:" - ), - _output=activity.changelog, - ) +if __name__ == "__main__": + app() diff --git a/pkgs/fc/agent/fc/maintenance/estimate.py b/pkgs/fc/agent/fc/maintenance/estimate.py index 1716f7b4c..a4330cfaa 100644 --- a/pkgs/fc/agent/fc/maintenance/estimate.py +++ b/pkgs/fc/agent/fc/maintenance/estimate.py @@ -27,7 +27,7 @@ def __init__(self, spec): def __str__(self): if self.value == 0: return "0s" - elif self.value > 0 and self.value < 1: + elif 0 < self.value < 1: return "1s" out = [] remainder = self.value @@ -51,3 +51,6 @@ def __int__(self): def __float__(self): return float(self.value) + + def __lt__(self, other): + return self.value < other.value diff --git a/pkgs/fc/agent/fc/maintenance/system_properties.py b/pkgs/fc/agent/fc/maintenance/maintenance.py similarity index 66% rename from pkgs/fc/agent/fc/maintenance/system_properties.py rename to pkgs/fc/agent/fc/maintenance/maintenance.py index cf307b11e..462f18501 100644 --- a/pkgs/fc/agent/fc/maintenance/system_properties.py +++ b/pkgs/fc/agent/fc/maintenance/maintenance.py @@ -1,18 +1,18 @@ import os.path import shutil +from typing import Optional import fc.util import fc.util.dmi_memory import fc.util.nixos import fc.util.vm -import structlog +from fc.maintenance import Request from fc.maintenance.activity.reboot import RebootActivity +from fc.maintenance.activity.update import UpdateActivity from fc.maintenance.activity.vm_change import VMChangeActivity -log = structlog.get_logger() - -def request_reboot_for_memory(enc): +def request_reboot_for_memory(log, enc) -> Optional[Request]: """Schedules reboot if the memory size has changed.""" wanted_memory = int(enc["parameters"].get("memory", 0)) if not wanted_memory: @@ -23,10 +23,10 @@ def request_reboot_for_memory(enc): ) if activity: - return fc.maintenance.Request(activity) + return Request(activity) -def request_reboot_for_cpu(enc): +def request_reboot_for_cpu(log, enc) -> Optional[Request]: """Schedules reboot if the number of cores has changed.""" wanted_cores = int(enc["parameters"].get("cores", 0)) if not wanted_cores: @@ -37,10 +37,10 @@ def request_reboot_for_cpu(enc): ) if activity: - return fc.maintenance.Request(activity) + return Request(activity) -def request_reboot_for_qemu(): +def request_reboot_for_qemu(log) -> Optional[Request]: """Schedules a reboot if the Qemu binary environment has changed.""" # Update the -booted marker if necessary. We need to store the marker # in a place where it does not get removed after _internal_ reboots @@ -79,14 +79,14 @@ def request_reboot_for_qemu(): if booted_generation >= current_generation: # We do not automatically downgrade. If we ever want that then I - # want us to reconsider the side-effects. + # want us to reconsider the side effects. return msg = "Cold restart because the Qemu binary environment has changed" - return fc.maintenance.Request(RebootActivity("poweroff"), comment=msg) + return Request(RebootActivity("poweroff"), comment=msg) -def request_reboot_for_kernel(): +def request_reboot_for_kernel(log) -> Optional[Request]: """Schedules a reboot if the kernel has changed.""" booted = fc.util.nixos.kernel_version("/run/booted-system/kernel") current = fc.util.nixos.kernel_version("/run/current-system/kernel") @@ -100,7 +100,41 @@ def request_reboot_for_kernel(): booted=booted, current=current, ) - return fc.maintenance.Request( + return Request( RebootActivity("reboot"), comment=f"Reboot to activate changed kernel ({booted} to {current})", ) + + +def request_update(log, enc, current_requests) -> Optional[Request]: + """Schedule a system update if the channel has changed and the + resulting system is different from the running system. + + As a shortcut, switches to the new channel if the new system is still the + same. + """ + activity = UpdateActivity.from_enc(log, enc, current_requests) + + if activity is None: + log.info("update-unchanged", _replace_msg="No update needed.") + return + + activity.prepare() + + if activity.is_identical_to_running_system: + log.info("update-system-unchanged") + activity.update_system_channel() + return + + log.info( + "update-system-changed", + _replace_msg=( + "Update preparation was successful. This update will apply " + "changes to the system." + ), + _output=activity.changelog, + current_channel=activity.current_channel_url, + next_channel=activity.next_channel_url, + ) + + return Request(activity) diff --git a/pkgs/fc/agent/fc/maintenance/request.py b/pkgs/fc/agent/fc/maintenance/request.py index 3b0989dae..c84406cc8 100644 --- a/pkgs/fc/agent/fc/maintenance/request.py +++ b/pkgs/fc/agent/fc/maintenance/request.py @@ -14,7 +14,7 @@ import yaml from fc.util.time_date import ensure_timezone_present, format_datetime, utcnow -from .activity import ActivityMergeResult +from .activity import Activity, ActivityMergeResult from .estimate import Estimate from .state import State, evaluate_state @@ -42,7 +42,7 @@ class Request: MAX_RETRIES = 48 _reqid = None - activity = None + activity: Activity = None _comment = None _estimate: Optional[Estimate] = None next_due: Optional[datetime.datetime] = None @@ -50,7 +50,7 @@ class Request: added_at = None updated_at = None state = State.pending - _reqmanager = None # backpointer, will be set in ReqManager + _reqmanager = None # back-pointer, will be set in ReqManager def __init__( self, activity, estimate=None, comment=None, dir=None, log=_log @@ -306,25 +306,34 @@ def merge(self, other): activity_merge_result, ActivityMergeResult ), f"{activity_merge_result} has wrong type, must be ActivityMergeResult" - if activity_merge_result.merged: - # XXX: get rid of request estimate? - if self._estimate is not None and other._estimate is not None: - self._estimate = max(self._estimate, other._estimate) - if other._comment is not None and self._comment != other._comment: - self._comment += " " + other._comment - - if activity_merge_result.is_effective: - self.updated_at = utcnow() - self.save() - if activity_merge_result.is_significant: - return RequestMergeResult.SIGNIFICANT_UPDATE - else: - return RequestMergeResult.UPDATE - else: - return RequestMergeResult.REMOVE - else: + if not activity_merge_result.merged: return RequestMergeResult.NO_MERGE + self.activity = activity_merge_result.merged + + # XXX: get rid of request estimate? + if self._estimate is not None and other._estimate is not None: + self._estimate = max(self._estimate, other._estimate) + if other._comment is not None and self._comment != other._comment: + self._comment += " " + other._comment + + if not activity_merge_result.is_effective: + return RequestMergeResult.REMOVE + + self.log.debug( + "request-merge-update", + is_significant=activity_merge_result.is_significant, + changes=activity_merge_result.changes, + activity=activity_merge_result.merged, + ) + self.updated_at = utcnow() + self.save() + + if activity_merge_result.is_significant: + return RequestMergeResult.SIGNIFICANT_UPDATE + else: + return RequestMergeResult.UPDATE + def other_requests(self): """Lists other requests currently active in the ReqManager.""" return [ diff --git a/pkgs/fc/agent/fc/maintenance/tests/test_cli.py b/pkgs/fc/agent/fc/maintenance/tests/test_cli.py index 7e2e7ae0c..689a6c7e6 100644 --- a/pkgs/fc/agent/fc/maintenance/tests/test_cli.py +++ b/pkgs/fc/agent/fc/maintenance/tests/test_cli.py @@ -1,5 +1,6 @@ import json import unittest.mock +from unittest.mock import MagicMock import fc.maintenance.cli import pytest @@ -140,13 +141,10 @@ def test_invoke_request_system_properties_virtual( kernel.assert_called_once() -@unittest.mock.patch("fc.maintenance.cli.prepare_switch_in_maintenance") -def test_invoke_request_update(prepare_switch, invoke_app): +@unittest.mock.patch("fc.maintenance.cli.request_update") +@unittest.mock.patch("fc.maintenance.cli.load_enc") +def test_invoke_request_update(load_enc, request_update, invoke_app): invoke_app("request", "update") - prepare_switch.assert_called_once() - - -@unittest.mock.patch("fc.maintenance.cli.switch_with_update") -def test_invoke_request_update_run_now(switch, invoke_app): - invoke_app("request", "update", "--run-now") - switch.assert_called_once() + load_enc.assert_called_once() + request_update.assert_called_once() + fc.maintenance.cli.rm.add.assert_called_once() diff --git a/pkgs/fc/agent/fc/maintenance/tests/test_estimate.py b/pkgs/fc/agent/fc/maintenance/tests/test_estimate.py index 19f20a7fa..021be36a0 100644 --- a/pkgs/fc/agent/fc/maintenance/tests/test_estimate.py +++ b/pkgs/fc/agent/fc/maintenance/tests/test_estimate.py @@ -1,7 +1,6 @@ import datetime import pytest - from fc.maintenance.estimate import Estimate @@ -42,3 +41,7 @@ def test_zero_duration(): def test_repr(): assert repr(Estimate(42)) == "" + + +def test_comparison(): + assert Estimate("10m") < Estimate("1h") diff --git a/pkgs/fc/agent/fc/maintenance/tests/test_system_properties.py b/pkgs/fc/agent/fc/maintenance/tests/test_maintenance.py similarity index 68% rename from pkgs/fc/agent/fc/maintenance/tests/test_system_properties.py rename to pkgs/fc/agent/fc/maintenance/tests/test_maintenance.py index 5f7d103ef..d245ed74e 100644 --- a/pkgs/fc/agent/fc/maintenance/tests/test_system_properties.py +++ b/pkgs/fc/agent/fc/maintenance/tests/test_maintenance.py @@ -1,7 +1,8 @@ import contextlib import unittest.mock +from unittest.mock import MagicMock -import fc.maintenance.system_properties +import fc.maintenance.maintenance from fc.maintenance.activity import RebootType from fc.maintenance.activity.reboot import RebootActivity from fc.maintenance.activity.vm_change import VMChangeActivity @@ -18,7 +19,7 @@ @unittest.mock.patch("fc.util.vm.count_cores") def test_request_reboot_for_memory(count_cores, get_memory, logger): get_memory.return_value = 1024 - request = fc.maintenance.system_properties.request_reboot_for_memory(ENC) + request = fc.maintenance.maintenance.request_reboot_for_memory(logger, ENC) assert "1024 MiB -> 2048" in request.comment activity = request.activity assert isinstance(activity, VMChangeActivity) @@ -29,9 +30,11 @@ def test_request_reboot_for_memory(count_cores, get_memory, logger): @unittest.mock.patch("fc.util.dmi_memory.main") @unittest.mock.patch("fc.util.vm.count_cores") -def test_do_not_request_reboot_for_unchanged_memory(count_cores, get_memory): +def test_do_not_request_reboot_for_unchanged_memory( + count_cores, get_memory, logger +): get_memory.return_value = 2048 - request = fc.maintenance.system_properties.request_reboot_for_memory(ENC) + request = fc.maintenance.maintenance.request_reboot_for_memory(logger, ENC) assert request is None @@ -39,7 +42,7 @@ def test_do_not_request_reboot_for_unchanged_memory(count_cores, get_memory): @unittest.mock.patch("fc.util.vm.count_cores") def test_request_reboot_for_cpu(count_cores, get_memory, logger): count_cores.return_value = 1 - request = fc.maintenance.system_properties.request_reboot_for_cpu(ENC) + request = fc.maintenance.maintenance.request_reboot_for_cpu(logger, ENC) assert "1 -> 2" in request.comment activity = request.activity assert isinstance(activity, VMChangeActivity) @@ -50,9 +53,11 @@ def test_request_reboot_for_cpu(count_cores, get_memory, logger): @unittest.mock.patch("fc.util.dmi_memory.main") @unittest.mock.patch("fc.util.vm.count_cores") -def test_do_not_request_reboot_for_unchanged_cpu(count_cores, get_memory): +def test_do_not_request_reboot_for_unchanged_cpu( + count_cores, get_memory, logger +): count_cores.return_value = 2 - request = fc.maintenance.system_properties.request_reboot_for_cpu(ENC) + request = fc.maintenance.maintenance.request_reboot_for_cpu(logger, ENC) assert request is None @@ -63,6 +68,7 @@ def test_request_reboot_for_qemu_change( shutil_move, path_exists, path_isdir, + logger, ): path_isdir.return_value = True path_exists.return_value = True @@ -79,7 +85,7 @@ def fake_open_qemu_files(filename, encoding): with unittest.mock.patch( "builtins.open", contextlib.contextmanager(fake_open_qemu_files) ): - request = fc.maintenance.system_properties.request_reboot_for_qemu() + request = fc.maintenance.maintenance.request_reboot_for_qemu(logger) assert "Qemu binary environment has changed" in request.comment activity = request.activity @@ -94,6 +100,7 @@ def test_do_not_request_reboot_for_unchanged_qemu( shutil_move, path_exists, path_isdir, + logger, ): path_isdir.return_value = True path_exists.return_value = True @@ -110,12 +117,12 @@ def fake_open_qemu_files(filename, encoding): with unittest.mock.patch( "builtins.open", contextlib.contextmanager(fake_open_qemu_files) ): - request = fc.maintenance.system_properties.request_reboot_for_qemu() + request = fc.maintenance.maintenance.request_reboot_for_qemu(logger) assert request is None -def test_request_reboot_for_kernel_change(): +def test_request_reboot_for_kernel_change(logger): def fake_changed_kernel_version(path): if path == "/run/booted-system/kernel": return "5.10.45" @@ -125,7 +132,7 @@ def fake_changed_kernel_version(path): with unittest.mock.patch( "fc.util.nixos.kernel_version", fake_changed_kernel_version ): - request = fc.maintenance.system_properties.request_reboot_for_kernel() + request = fc.maintenance.maintenance.request_reboot_for_kernel(logger) assert "kernel (5.10.45 to 5.10.50)" in request.comment activity = request.activity @@ -133,7 +140,7 @@ def fake_changed_kernel_version(path): assert activity.reboot_needed == RebootType.WARM -def test_do_not_request_reboot_for_unchanged_kernel(): +def test_do_not_request_reboot_for_unchanged_kernel(logger): def fake_changed_kernel_version(path): if path == "/run/booted-system/kernel": return "5.10.50" @@ -143,6 +150,34 @@ def fake_changed_kernel_version(path): with unittest.mock.patch( "fc.util.nixos.kernel_version", fake_changed_kernel_version ): - request = fc.maintenance.system_properties.request_reboot_for_kernel() + request = fc.maintenance.maintenance.request_reboot_for_kernel(logger) + + assert request is None + + +def test_request_update(log, logger, monkeypatch): + enc = {} + from_enc_mock = MagicMock() + from_enc_mock.return_value.is_identical_to_running_system = False + monkeypatch.setattr( + "fc.maintenance.maintenance.UpdateActivity.from_enc", from_enc_mock + ) + + request = fc.maintenance.maintenance.request_update(logger, enc, []) + + assert request + log.has("update-system-changed") + + +def test_request_update_unchanged(log, logger, monkeypatch): + enc = {} + from_enc_mock = MagicMock() + from_enc_mock.return_value.is_identical_to_running_system = True + monkeypatch.setattr( + "fc.maintenance.maintenance.UpdateActivity.from_enc", from_enc_mock + ) + + request = fc.maintenance.maintenance.request_update(logger, enc, []) assert request is None + log.has("update-system-unchanged") diff --git a/pkgs/fc/agent/fc/manage/cli.py b/pkgs/fc/agent/fc/manage/cli.py index c05e33166..411d4be9e 100644 --- a/pkgs/fc/agent/fc/manage/cli.py +++ b/pkgs/fc/agent/fc/manage/cli.py @@ -306,3 +306,7 @@ def main(): command = typer.main.get_command(app) result = command(standalone_mode=False) sys.exit(result) + + +if __name__ == "__main__": + main() diff --git a/pkgs/fc/agent/fc/manage/manage.py b/pkgs/fc/agent/fc/manage/manage.py index 3640d65cc..d62ee07ff 100644 --- a/pkgs/fc/agent/fc/manage/manage.py +++ b/pkgs/fc/agent/fc/manage/manage.py @@ -1,19 +1,13 @@ """Update NixOS system configuration from infrastructure or local sources.""" import os -import os.path as p import re -import subprocess -from dataclasses import dataclass from pathlib import Path -import fc.maintenance -import fc.util.logging -from fc.maintenance.activity.update import UpdateActivity from fc.util import nixos +from fc.util.channel import Channel from fc.util.checks import CheckResult from fc.util.enc import STATE_VERSION_FILE -from fc.util.nixos import RE_FC_CHANNEL # Other platform code can also check the presence of this marker file to # change behaviour before/during the first agent run. @@ -40,142 +34,6 @@ """ -class Channel: - is_local = False - - def __init__(self, log, url, name="", environment=None, resolve_url=True): - self.url = url - self.name = name - self.environment = environment - self.system_path = None - - if url.startswith("file://"): - self.is_local = True - self.resolved_url = url.replace("file://", "") - elif resolve_url: - self.resolved_url = nixos.resolve_url_redirects(url) - else: - self.resolved_url = url - - self.log = log - - self.log_with_context = log.bind( - url=self.resolved_url, - name=name, - environment=environment, - is_local=self.is_local, - ) - - def version(self): - if self.is_local: - return "local-checkout" - label_comp = [ - "/root/.nix-defexpr/channels/{}/{}".format(self.name, c) - for c in [".version", ".version-suffix"] - ] - if all(p.exists(f) for f in label_comp): - return "".join(open(f).read() for f in label_comp) - - def __str__(self): - v = self.version() or "unknown" - return "".format( - self.name, v, self.resolved_url - ) - - def __eq__(self, other): - if isinstance(other, Channel): - return self.resolved_url == other.resolved_url - return NotImplemented - - @classmethod - def current(cls, log, channel_name): - """Looks up existing channel by name. - The URL found is usually already resolved (no redirects) - so we don't do it again here. It can still be enabled with - `resolve_url`, when needed. - """ - if not p.exists("/root/.nix-channels"): - log.debug("channel-current-no-nix-channels-dir") - return - with open("/root/.nix-channels") as f: - for line in f.readlines(): - url, name = line.strip().split(" ", 1) - if name == channel_name: - # We don't have to resolve the URL if it's a direct link - # to a Hydra build product. This is the normal case for - # running VMs because the nixos channel is set to an - # already resolved URL. - # Resolve all other URLs, for example initial URLs used - # during VM bootstrapping. - resolve_url = RE_FC_CHANNEL.match(url) is None - log.debug( - "channel-current", - url=url, - name=name, - resolve_url=resolve_url, - ) - return Channel(log, url, name, resolve_url=resolve_url) - - log.debug("channel-current-not-found", name=name) - - def load_nixos(self): - self.log_with_context.debug("channel-load-nixos") - - if self.is_local: - raise RuntimeError("`load` not applicable for local channels") - - nixos.update_system_channel(self.resolved_url, self.log) - - def check_local_channel(self): - if not p.exists(p.join(self.resolved_url, "fc")): - self.log_with_context.error( - "local-channel-nix-path-invalid", - _replace_msg="Expected NIX_PATH element 'fc' not found. Did you " - "create a 'channels' directory via `dev-setup` and point " - "the channel URL towards that directory?", - ) - - def switch(self, lazy=True, show_trace=False): - """ - Build system with this channel and switch to it. - Replicates the behaviour of nixos-rebuild switch and adds - a "lazy mode" which only switches to the built system if it actually - changed. - """ - self.log_with_context.debug("channel-switch-start") - # Put a temporary result link in /run to avoid a race condition - # with the garbage collector which may remove the system we just built. - # If register fails, we still hold a GC root until the next reboot. - out_link = "/run/fc-agent-built-system" - self.build(out_link, show_trace) - nixos.register_system_profile(self.system_path, self.log) - # New system is registered, delete the temporary result link. - os.unlink(out_link) - return nixos.switch_to_system(self.system_path, lazy, self.log) - - def build(self, out_link=None, show_trace=False): - """ - Build system with this channel. Works like nixos-rebuild build. - Does not modify the running system. - """ - self.log_with_context.debug("channel-build-start") - - if show_trace: - build_options = ["--show-trace"] - else: - build_options = [] - - if self.is_local: - self.check_local_channel() - system_path = nixos.build_system( - self.resolved_url, build_options, out_link, self.log - ) - self.system_path = system_path - - def dry_activate(self): - return nixos.dry_activate_system(self.system_path, self.log) - - class SwitchFailed(Exception): pass @@ -218,7 +76,7 @@ def check(log, enc) -> CheckResult: errors.append("ENC: environment URL is missing.") uses_local_checkout = ( - environment_url.startswith("file://") if environment_url else None + environment_url.startswith("file:") if environment_url else None ) if production_flag and uses_local_checkout: @@ -250,58 +108,19 @@ def check(log, enc) -> CheckResult: else: errors.append("`nixos` channel not set.") - return CheckResult(errors, warnings) + nixos_warnings_file = Path("/etc/fc_agent_deprecation_warnings") + if nixos_warnings_file.exists(): + nixos_warnings_content = nixos_warnings_file.read_text() + if nixos_warnings_content: + nixos_warnings = nixos_warnings_content.splitlines() + nixos_warnings_line = " ".join(nixos_warnings) + warnings.append( + f"NixOS deprecation warnings found ({len(nixos_warnings)}), " + f"should be fixed before upgrading: {nixos_warnings_line}" + ) -def prepare_switch_in_maintenance(log, enc): - if not enc or not enc.get("parameters"): - log.warning( - "enc-data-missing", msg="No ENC data. Not building channel." - ) - return False - - env_name = enc["parameters"]["environment"] - - # scheduled update available? - next_channel = Channel( - log, - enc["parameters"]["environment_url"], - name="next", - environment=env_name, - ) - - if not next_channel or next_channel.is_local: - log.warn( - "maintenance-error-local-channel", - _replace_msg="Switch-in-maintenance incompatible with local checkout, abort.", - ) - return False - - request_manager = fc.maintenance.ReqManager() - request_manager.scan() - current_requests = request_manager.requests.values() - - current_channel = Channel.current(log, "nixos") - if next_channel != current_channel: - log.info( - "maintenance-prepare-changed", - current=str(current_channel), - next=str(next_channel), - ) - - activity = UpdateActivity(next_channel.resolved_url, env_name) - activity.prepare(current_requests=current_requests) - - with fc.maintenance.ReqManager() as rm: - request = rm.add(fc.maintenance.Request(activity)) - - if request is None: - return - - return activity - else: - log.info("maintenance-prepare-unchanged") - return False + return CheckResult(errors, warnings) def dry_activate(log, channel_url, show_trace=False): diff --git a/pkgs/fc/agent/fc/manage/tests/test_manage.py b/pkgs/fc/agent/fc/manage/tests/test_manage.py index db493cb40..901a3d82f 100644 --- a/pkgs/fc/agent/fc/manage/tests/test_manage.py +++ b/pkgs/fc/agent/fc/manage/tests/test_manage.py @@ -1,18 +1,11 @@ from unittest.mock import MagicMock, Mock +import fc.manage.manage import responses +from fc.manage.manage import Channel from pytest import fixture, raises from requests import HTTPError -import fc.manage.manage -from fc.manage.manage import Channel - - -@fixture -def mocked_responses(): - with responses.RequestsMock() as rsps: - yield rsps - def expr_url(url): return url + "nixexprs.tar.xz" @@ -67,105 +60,3 @@ def test_channel_wrong_url_should_raise(logger, mocked_responses): with raises(HTTPError): Channel(logger, url) - - -def test_channel_prepare_maintenance_changed( - log, mocked_responses, logger, monkeypatch -): - environment = "fc-21.05-dev" - current_channel_url = ( - "https://hydra.flyingcircus.io/build/93000/download/1/nixexprs.tar.xz" - ) - next_channel_url = ( - "https://hydra.flyingcircus.io/build/93222/download/1/nixexprs.tar.xz" - ) - current_version = "21.05.1233.a9cc58d" - next_version = "21.05.1235.bacc11d" - - enc = { - "parameters": { - "environment_url": next_channel_url, - "environment": environment, - } - } - - mocked_responses.add(responses.HEAD, current_channel_url) - mocked_responses.add(responses.HEAD, next_channel_url) - monkeypatch.setattr( - "fc.util.nixos.channel_version", (lambda c: next_version) - ) - - activity_factory_mock = MagicMock() - activity_mock = MagicMock() - activity_factory_mock.return_value = activity_mock - monkeypatch.setattr( - "fc.manage.manage.UpdateActivity", activity_factory_mock - ) - - current_channel = Channel(logger, current_channel_url) - current_channel.version = lambda *a: current_version - monkeypatch.setattr( - "fc.manage.manage.Channel.current", lambda *a: current_channel - ) - - req_manager_mock = MagicMock() - req_manager_mock.return_value.__enter__.return_value.add = ( - rm_add_mock - ) = Mock() - monkeypatch.setattr("fc.maintenance.ReqManager", req_manager_mock) - - fc.manage.manage.prepare_switch_in_maintenance(logger, enc) - - activity_factory_mock.assert_called_once_with(next_channel_url, environment) - activity_mock.prepare.assert_called_once() - request = rm_add_mock.call_args.args[0] - assert request.activity == activity_mock - log.has( - "maintenance-prepare-changed", - current="", - next="", - ) - - -def test_channel_prepare_maintenance_unchanged( - log, mocked_responses, logger, monkeypatch -): - """Given an unchanged channel url, should not prepare an update activity""" - environment = "fc-21.05-dev" - current_channel_url = ( - "https://hydra.flyingcircus.io/build/93000/download/1/nixexprs.tar.xz" - ) - current_version = "21.05.1233.a9cc58d" - - enc = { - "parameters": { - "environment_url": current_channel_url, - "environment": environment, - } - } - - mocked_responses.add(responses.HEAD, current_channel_url) - monkeypatch.setattr( - "fc.util.nixos.channel_version", (lambda c: current_version) - ) - - activity_factory_mock = MagicMock() - monkeypatch.setattr( - "fc.manage.manage.UpdateActivity", activity_factory_mock - ) - - current_channel = Channel(logger, current_channel_url) - current_channel.version = lambda *a: current_version - monkeypatch.setattr( - "fc.manage.manage.Channel.current", lambda *a: current_channel - ) - - req_manager_mock = MagicMock() - req_manager_mock.return_value.__enter__.return_value.add = ( - rm_add_mock - ) = Mock() - monkeypatch.setattr("fc.maintenance.ReqManager", req_manager_mock) - - fc.manage.manage.prepare_switch_in_maintenance(logger, enc) - - activity_factory_mock.assert_not_called() diff --git a/pkgs/fc/agent/fc/util/channel.py b/pkgs/fc/agent/fc/util/channel.py new file mode 100644 index 000000000..2a6811cc1 --- /dev/null +++ b/pkgs/fc/agent/fc/util/channel.py @@ -0,0 +1,141 @@ +import os +import os.path as p + +from fc.util import nixos +from fc.util.nixos import RE_FC_CHANNEL + + +class Channel: + is_local = False + + def __init__(self, log, url, name="", environment=None, resolve_url=True): + self.url = url + self.name = name + self.environment = environment + self.system_path = None + + if url.startswith("file://"): + self.is_local = True + self.resolved_url = url.replace("file://", "") + elif resolve_url: + self.resolved_url = nixos.resolve_url_redirects(url) + else: + self.resolved_url = url + + self.log = log + + self.log_with_context = log.bind( + url=self.resolved_url, + name=name, + environment=environment, + is_local=self.is_local, + ) + + def version(self): + if self.is_local: + return "local-checkout" + label_comp = [ + "/root/.nix-defexpr/channels/{}/{}".format(self.name, c) + for c in [".version", ".version-suffix"] + ] + if all(p.exists(f) for f in label_comp): + return "".join(open(f).read() for f in label_comp) + + def __str__(self): + v = self.version() or "unknown" + return "".format( + self.name, v, self.resolved_url + ) + + def __eq__(self, other): + if isinstance(other, Channel): + return self.resolved_url == other.resolved_url + return NotImplemented + + @classmethod + def current(cls, log, channel_name): + """Looks up existing channel by name. + The URL found is usually already resolved (no redirects) + so we don't do it again here. It can still be enabled with + `resolve_url`, when needed. + """ + if not p.exists("/root/.nix-channels"): + log.debug("channel-current-no-nix-channels-dir") + return + with open("/root/.nix-channels") as f: + for line in f.readlines(): + url, name = line.strip().split(" ", 1) + if name == channel_name: + # We don't have to resolve the URL if it's a direct link + # to a Hydra build product. This is the normal case for + # running VMs because the nixos channel is set to an + # already resolved URL. + # Resolve all other URLs, for example initial URLs used + # during VM bootstrapping. + resolve_url = RE_FC_CHANNEL.match(url) is None + log.debug( + "channel-current", + url=url, + name=name, + resolve_url=resolve_url, + ) + return Channel(log, url, name, resolve_url=resolve_url) + + log.debug("channel-current-not-found", name=name) + + def load_nixos(self): + self.log_with_context.debug("channel-load-nixos") + + if self.is_local: + raise RuntimeError("`load` not applicable for local channels") + + nixos.update_system_channel(self.resolved_url, self.log) + + def check_local_channel(self): + if not p.exists(p.join(self.resolved_url, "fc")): + self.log_with_context.error( + "local-channel-nix-path-invalid", + _replace_msg="Expected NIX_PATH element 'fc' not found. Did you " + "create a 'channels' directory via `dev-setup` and point " + "the channel URL towards that directory?", + ) + + def switch(self, lazy=True, show_trace=False): + """ + Build system with this channel and switch to it. + Replicates the behaviour of nixos-rebuild switch and adds + a "lazy mode" which only switches to the built system if it actually + changed. + """ + self.log_with_context.debug("channel-switch-start") + # Put a temporary result link in /run to avoid a race condition + # with the garbage collector which may remove the system we just built. + # If register fails, we still hold a GC root until the next reboot. + out_link = "/run/fc-agent-built-system" + self.build(out_link, show_trace) + nixos.register_system_profile(self.system_path, self.log) + # New system is registered, delete the temporary result link. + os.unlink(out_link) + return nixos.switch_to_system(self.system_path, lazy, self.log) + + def build(self, out_link=None, show_trace=False): + """ + Build system with this channel. Works like nixos-rebuild build. + Does not modify the running system. + """ + self.log_with_context.debug("channel-build-start") + + if show_trace: + build_options = ["--show-trace"] + else: + build_options = [] + + if self.is_local: + self.check_local_channel() + system_path = nixos.build_system( + self.resolved_url, build_options, out_link, self.log + ) + self.system_path = system_path + + def dry_activate(self): + return nixos.dry_activate_system(self.system_path, self.log) diff --git a/pkgs/fc/agent/fc/util/nixos.py b/pkgs/fc/agent/fc/util/nixos.py index df39b2ae4..06eb03171 100644 --- a/pkgs/fc/agent/fc/util/nixos.py +++ b/pkgs/fc/agent/fc/util/nixos.py @@ -29,11 +29,6 @@ UnitChanges = dict[str, list[str]] -class Channel: - def __init__(self, url) -> None: - self.url = url - - class ChannelException(Exception): def __init__(self, msg=None, stdout=None, stderr=None): self.stdout = stdout @@ -221,25 +216,28 @@ def detect_systemd_unit_changes(dry_activate_lines): def format_unit_change_lines(unit_changes): - units_by_category = {} + # Clean up raw unit changes: usually, units are stopped and + # started shortly after for updates. They get their own category + # "Start/Stop" to separate them from permanent stops and starts. + pretty_unit_changes = {} start_units = set(unit_changes["start"]) stop_units = set(unit_changes["stop"]) reload_units = set(unit_changes["reload"]) start_stop_units = start_units.intersection(stop_units) - units_by_category["Start/Stop"] = start_stop_units - units_by_category["Restart"] = set(unit_changes["restart"]) - units_by_category["Start"] = start_units - start_stop_units - units_by_category["Stop"] = stop_units - start_stop_units - units_by_category["Reload"] = reload_units - {"dbus.service"} + pretty_unit_changes["Start/Stop"] = start_stop_units + pretty_unit_changes["Restart"] = set(unit_changes["restart"]) + pretty_unit_changes["Start"] = start_units - start_stop_units + pretty_unit_changes["Stop"] = stop_units - start_stop_units + pretty_unit_changes["Reload"] = reload_units - {"dbus.service"} unit_change_lines = [] - for cat, units in units_by_category.items(): + for cat, units in pretty_unit_changes.items(): if units: - unit_change_lines.append(f"{cat}:") - unit_change_lines.extend( - [" - " + u.replace(".service", "") for u in sorted(units)] + unit_str = ", ".join( + u.replace(".service", "") for u in sorted(units) ) + unit_change_lines.append(f"{cat}: {unit_str}") return unit_change_lines diff --git a/pkgs/fc/agent/shell.nix b/pkgs/fc/agent/shell.nix index 871e8341c..8634f37fe 100644 --- a/pkgs/fc/agent/shell.nix +++ b/pkgs/fc/agent/shell.nix @@ -1,4 +1,7 @@ let pkgs = import {}; - -in pkgs.python310Packages.callPackage ./. {} + fcagent = pkgs.python310Packages.callPackage ./. {}; +in +fcagent.overridePythonAttrs(_: { + doCheck = true; +})