diff --git a/tests/e2e-test-framework/conftest.py b/tests/e2e-test-framework/conftest.py index f52c0977b..8498f5b95 100644 --- a/tests/e2e-test-framework/conftest.py +++ b/tests/e2e-test-framework/conftest.py @@ -1,5 +1,6 @@ import logging from datetime import datetime +from typing import Generator import pytest from framework.description_plugin import DescriptionPlugin from framework.qtest_helper import QTestHelper @@ -91,8 +92,8 @@ def get_utils(request) -> Utils: def get_ssh_executors(request) -> dict[str, SSHCommandExecutor]: utils = get_utils(request) - ips = utils.get_controlplane_ips() + utils.get_worker_ips() - executors = {ip: SSHCommandExecutor(ip_address=ip, username=utils.vm_user, password=utils.vm_cred) for ip in ips} + worker_ips = utils.get_worker_ips() + executors = {ip: SSHCommandExecutor(ip_address=ip, username=utils.vm_user, password=utils.vm_cred) for ip in worker_ips} return executors @pytest.fixture(scope="session") @@ -115,6 +116,20 @@ def link_requirements_in_background(request): requirements_thread.start() pytest.threads.append(requirements_thread) +@pytest.fixture(autouse=True) +def keep_drive_count(drive_utils_executors: dict[str, DriveUtils]) -> Generator[None, None, None]: + hosts_per_node_before = {ip: drive_utils.get_all_hosts() for ip, drive_utils in drive_utils_executors.items()} + yield + hosts_per_node_after = {ip: drive_utils.get_all_hosts() for ip, drive_utils in drive_utils_executors.items()} + for ip, drive_utils in drive_utils_executors.items(): + drive_utils.rescan_missing_hosts(before=hosts_per_node_before[ip], after=hosts_per_node_after[ip]) + +@pytest.fixture(autouse=True) +def wipe_drives(drive_utils_executors: dict[str, DriveUtils]) -> Generator[None, None, None]: + yield + for _, drive_utils in drive_utils_executors.items(): + drive_utils.wipe_drives() + def link_requirements(request): for marker in request.node.iter_markers(): if marker.name == "requirements": diff --git a/tests/e2e-test-framework/framework/drive.py b/tests/e2e-test-framework/framework/drive.py index f6afd2686..f8e1f30f0 100644 --- a/tests/e2e-test-framework/framework/drive.py +++ b/tests/e2e-test-framework/framework/drive.py @@ -1,6 +1,6 @@ import json import logging -from typing import Any, List, TypedDict +from typing import Any, Dict, List, TypedDict from framework.ssh import SSHCommandExecutor @@ -48,6 +48,44 @@ def restore(self, host_num: int) -> None: ) self._handle_errors(errors) + def get_all_hosts(self) -> Dict[str, int]: + """ + Retrieves a dictionary of all SCSI hosts in the system. + + Returns: + dict: A dictionary mapping the SCSI ID to the host number. + """ + param = "'{print $9}'" + output, errors = self.executor.exec( + f"ls -l /sys/class/scsi_device | awk {param}" + ) + self._handle_errors(errors) + scsi_ids = output.splitlines() + return { + scsi_id: int(scsi_id.split(":")[0]) + for scsi_id in scsi_ids + if scsi_id + } + + def rescan_missing_hosts( + self, before: Dict[str, int], after: Dict[str, int] + ) -> None: + """ + Rescans for missing hosts in the system. + + Args: + before (Dict[str, int]): A dictionary mapping the SCSI ID to the host number before rescanning. + after (Dict[str, int]): A dictionary mapping the SCSI ID to the host number after rescanning. + + Example: + >>> drive = DriveUtils(executor=executor) + >>> drive.rescan_missing_hosts(before={ '18:0:0:0': 0, '18:0:0:1': 0 }, after={ '18:0:0:0': 0 }) + """ + for scsi_id, host_num in before.items(): + if after.get(scsi_id) is None: + self.restore(host_num=host_num) + logging.info(f"host{host_num} was restored") + def get_host_num(self, drive_path_or_name: str) -> int: """ Retrieves the host number associated with the specified drive path or name. @@ -87,22 +125,31 @@ def _get_drives_to_wipe(self, lsblk_out: dict) -> dict[str, DriveChild]: """ to_wipe = {} for drive in lsblk_out["blockdevices"]: - if drive['type'] == 'disk': + if drive["type"] == "disk": children = drive.get("children") drive_mountpoints = drive.get("mountpoints", []) drive_mountpoints = [ - mountpoint for mountpoint in drive_mountpoints if mountpoint + mountpoint + for mountpoint in drive_mountpoints + if mountpoint ] if len(drive_mountpoints) != 0: - logging.warning(f"found drive with drive mountpoints: \"/dev/{drive['name']}\", skipping...") + logging.warning( + f"found drive with drive mountpoints: \"/dev/{drive['name']}\", skipping..." + ) continue if children: for child in children: child_mountpoints = child.get("mountpoints", []) child_mountpoints = [ - mountpoint for mountpoint in child_mountpoints if mountpoint + mountpoint + for mountpoint in child_mountpoints + if mountpoint ] - if len(child_mountpoints) == 0 and child['type'] in ["part", "lvm"]: + if len(child_mountpoints) == 0 and child["type"] in [ + "part", + "lvm", + ]: logging.info( f"found drive \"/dev/{drive['name']}\" with child \"{child['name']}\" with no mountpoints." ) @@ -187,7 +234,9 @@ def wipe_drives(self) -> None: self._handle_errors(errors) output = json.loads(output) drives_to_wipe = self._get_drives_to_wipe(lsblk_out=output) - logging.warning(f"drives to wipe: {drives_to_wipe}") + logging.warning( + f"drives to wipe on node {self.executor.ip_address}: {drives_to_wipe}" + ) for drive, children in drives_to_wipe.items(): if children["type"] == "part": diff --git a/tests/e2e-test-framework/framework/utils.py b/tests/e2e-test-framework/framework/utils.py index 81f9a4f4b..4a4243fa7 100644 --- a/tests/e2e-test-framework/framework/utils.py +++ b/tests/e2e-test-framework/framework/utils.py @@ -327,6 +327,20 @@ def get_drive_cr(self, volume_name: str, namespace: str) -> dict: ) return drive_cr + def get_pod_node_ip(self, pod_name: str, namespace: str) -> str: + """ + Retrieves the IP address of the node associated with the given pod name and namespace. + Args: + pod_name (str): The name of the pod. + namespace (str): The namespace of the pod. + Returns: + str: The IP address of the node associated with the pod. + """ + pod = self.list_pods(name_prefix=pod_name, namespace=namespace)[0] + node_name = pod.spec.node_name + node = self.core_v1_api.read_node(name=node_name) + return node.status.addresses[0].address + def get_events_by_reason( self, plural: str, @@ -663,17 +677,13 @@ def recreate_pod(self, name: str, namespace: str) -> V1Pod: Returns: V1Pod: The recreated Pod. """ - self.core_v1_api.delete_namespaced_pod( - name=name, namespace=namespace - ) + self.core_v1_api.delete_namespaced_pod(name=name, namespace=namespace) logging.info( f"pod {name} deleted, waiting for a new pod to be created" ) time.sleep(5) - pod = self.list_pods(name, namespace=namespace)[ - 0 - ] + pod = self.list_pods(name, namespace=namespace)[0] assert self.is_pod_ready( name, timeout=120 ), "pod not ready after 120 seconds timeout" diff --git a/tests/e2e-test-framework/tests/test_fake_attach.py b/tests/e2e-test-framework/tests/test_fake_attach.py index b53e5026b..1d64c4aea 100644 --- a/tests/e2e-test-framework/tests/test_fake_attach.py +++ b/tests/e2e-test-framework/tests/test_fake_attach.py @@ -27,12 +27,11 @@ def setup_class( cls.drive_utils = drive_utils_executors cls.sts = STS(cls.namespace, cls.name, cls.replicas) cls.sts.delete() - cls.sts.create(storage_classes=[const.SSD_SC]) + cls.sts.create(storage_classes=[const.HDD_SC]) yield cls.sts.delete() - # cls.utils.clear_cluster_resources(cls.namespace) @pytest.mark.hal def test_5808_fake_attach_without_dr(self): @@ -78,7 +77,9 @@ def test_5808_fake_attach_without_dr(self): ) logging.info(f"drive {drive_name} went {const.STATUS_OFFLINE}") - pod = self.utils.recreate_pod(pod) + pod = self.utils.recreate_pod( + name=pod.metadata.name, namespace=self.namespace + ) volume_name = volume["metadata"]["name"] assert self.event_in( plural="volumes", @@ -94,7 +95,9 @@ def test_5808_fake_attach_without_dr(self): name=drive_name, expected_status=const.STATUS_ONLINE ) - self.utils.recreate_pod(pod) + self.utils.recreate_pod( + name=pod.metadata.name, namespace=self.namespace + ) assert self.event_in( plural="volumes", resource_name=volume_name,