Skip to content

Commit

Permalink
azure: case-insensitive UUID to avoid new IID during kernel upgrade (#…
Browse files Browse the repository at this point in the history
…798)

Kernel's newer than 4.15 present /sys/dmi/id/product_uuid as a
lowercase value. Previously UUID was uppercase.

Azure datasource reads the product_uuid directly as their platform's
instance-id. This presents a problem if a kernel is either
upgraded or downgraded across the 4.15 kernel version boundary because
the case of the UUID will change, resulting in cloud-init seeing a
"new" instance id and re-running all modules.

Re-running cc_ssh in cloud-init deletes and regenerates ssh_host keys
on a system which can cause concern on long-running instances that
somethingnefarious has happened.

Also add:
- An integration test for this for Azure Bionic Ubuntu FIPS upgrading from
   a FIPS kernel with uppercase UUID to a lowercase UUID in linux-azure
- A new pytest.mark.sru_next to collect all integration tests related to our
   next SRU

LP: #1835584
  • Loading branch information
blackboxsw committed Feb 19, 2021
1 parent 08d8902 commit 66e2d42
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 9 deletions.
12 changes: 10 additions & 2 deletions cloudinit/sources/DataSourceAzure.py
Expand Up @@ -683,10 +683,18 @@ def check_instance_id(self, sys_cfg):
def _iid(self, previous=None):
prev_iid_path = os.path.join(
self.paths.get_cpath('data'), 'instance-id')
iid = dmi.read_dmi_data('system-uuid')
# Older kernels than 4.15 will have UPPERCASE product_uuid.
# We don't want Azure to react to an UPPER/lower difference as a new
# instance id as it rewrites SSH host keys.
# LP: #1835584
iid = dmi.read_dmi_data('system-uuid').lower()
if os.path.exists(prev_iid_path):
previous = util.load_file(prev_iid_path).strip()
if is_byte_swapped(previous, iid):
if previous.lower() == iid:
# If uppercase/lowercase equivalent, return the previous value
# to avoid new instance id.
return previous
if is_byte_swapped(previous.lower(), iid):
return previous
return iid

Expand Down
104 changes: 104 additions & 0 deletions tests/integration_tests/bugs/test_lp1835584.py
@@ -0,0 +1,104 @@
""" Integration test for LP #1835584
Upstream linux kernels prior to 4.15 providate DMI product_uuid in uppercase.
More recent kernels switched to lowercase for DMI product_uuid. Azure
datasource uses this product_uuid as the instance-id for cloud-init.
The linux-azure-fips kernel installed in PRO FIPs images, that product UUID is
uppercase whereas the linux-azure cloud-optimized kernel reports the UUID as
lowercase.
In cases where product_uuid changes case, ensure cloud-init doesn't
recreate ssh hostkeys across reboot (due to detecting an instance_id change).
This currently only affects linux-azure-fips -> linux-azure on Bionic.
This test won't run on Xenial because both linux-azure-fips and linux-azure
report uppercase product_uuids.
The test will launch a specific Bionic Ubuntu PRO FIPS image which has a
linux-azure-fips kernel known to report product_uuid as uppercase. Then upgrade
and reboot into linux-azure kernel which is known to report product_uuid as
lowercase.
Across the reboot, assert that we didn't re-run config_ssh by virtue of
seeing only one semaphore creation log entry of type:
Writing to /var/lib/cloud/instances/<UUID>/sem/config_ssh -
https://bugs.launchpad.net/cloud-init/+bug/1835584
"""
import re

import pytest

from tests.integration_tests.instances import IntegrationAzureInstance
from tests.integration_tests.clouds import (
ImageSpecification, IntegrationCloud
)
from tests.integration_tests.conftest import get_validated_source


IMG_AZURE_UBUNTU_PRO_FIPS_BIONIC = (
"Canonical:0001-com-ubuntu-pro-bionic-fips:pro-fips-18_04:18.04.202010201"
)


def _check_iid_insensitive_across_kernel_upgrade(
instance: IntegrationAzureInstance
):
uuid = instance.read_from_file("/sys/class/dmi/id/product_uuid")
assert uuid.isupper(), (
"Expected uppercase UUID on Ubuntu FIPS image {}".format(
uuid
)
)
orig_kernel = instance.execute("uname -r").strip()
assert "azure-fips" in orig_kernel
result = instance.execute("apt-get update")
# Install a 5.4+ kernel which provides lowercase product_uuid
result = instance.execute("apt-get install linux-azure --assume-yes")
if not result.ok:
pytest.fail("Unable to install linux-azure kernel: {}".format(result))
instance.restart()
new_kernel = instance.execute("uname -r").strip()
assert orig_kernel != new_kernel
assert "azure-fips" not in new_kernel
assert "azure" in new_kernel
new_uuid = instance.read_from_file("/sys/class/dmi/id/product_uuid")
assert (
uuid.lower() == new_uuid
), "Expected UUID on linux-azure to be lowercase of FIPS: {}".format(uuid)
log = instance.read_from_file("/var/log/cloud-init.log")
RE_CONFIG_SSH_SEMAPHORE = r"Writing.*sem/config_ssh "
ssh_runs = len(re.findall(RE_CONFIG_SSH_SEMAPHORE, log))
assert 1 == ssh_runs, "config_ssh ran too many times {}".format(ssh_runs)


@pytest.mark.azure
@pytest.mark.sru_next
def test_azure_kernel_upgrade_case_insensitive_uuid(
session_cloud: IntegrationCloud
):
cfg_image_spec = ImageSpecification.from_os_image()
if (cfg_image_spec.os, cfg_image_spec.release) != ("ubuntu", "bionic"):
pytest.skip(
"Test only supports ubuntu:bionic not {0.os}:{0.release}".format(
cfg_image_spec
)
)
source = get_validated_source(session_cloud)
if not source.installs_new_version():
pytest.skip(
"Provide CLOUD_INIT_SOURCE to install expected working cloud-init"
)
image_id = IMG_AZURE_UBUNTU_PRO_FIPS_BIONIC
with session_cloud.launch(
launch_kwargs={"image_id": image_id}
) as instance:
# We can't use setup_image fixture here because we want to avoid
# taking a snapshot or cleaning the booted machine after cloud-init
# upgrade.
instance.install_new_cloud_init(
source, take_snapshot=False, clean=False
)
_check_iid_insensitive_across_kernel_upgrade(instance)
6 changes: 4 additions & 2 deletions tests/integration_tests/instances.py
Expand Up @@ -116,7 +116,8 @@ def snapshot(self):
def install_new_cloud_init(
self,
source: CloudInitSource,
take_snapshot=True
take_snapshot=True,
clean=True,
):
if source == CloudInitSource.DEB_PACKAGE:
self.install_deb()
Expand All @@ -133,7 +134,8 @@ def install_new_cloud_init(
)
version = self.execute('cloud-init -v').split()[-1]
log.info('Installed cloud-init version: %s', version)
self.instance.clean()
if clean:
self.instance.clean()
if take_snapshot:
snapshot_id = self.snapshot()
self.cloud.snapshot_id = snapshot_id
Expand Down
39 changes: 34 additions & 5 deletions tests/unittests/test_datasource/test_azure.py
Expand Up @@ -201,6 +201,7 @@ def construct_valid_ovf_env(data=None, pubkeys=None,
}

MOCKPATH = 'cloudinit.sources.DataSourceAzure.'
EXAMPLE_UUID = 'd0df4c54-4ecb-4a4b-9954-5bdf3ed5c3b8'


class TestParseNetworkConfig(CiTestCase):
Expand Down Expand Up @@ -630,7 +631,7 @@ def _get_mockds(self):
return dsaz

def _get_ds(self, data, agent_command=None, distro='ubuntu',
apply_network=None):
apply_network=None, instance_id=None):

def dsdevs():
return data.get('dsdevs', [])
Expand Down Expand Up @@ -659,7 +660,10 @@ def _pubkeys_from_crt_files(flist):
self.m_ephemeral_dhcpv4 = mock.MagicMock()
self.m_ephemeral_dhcpv4_with_reporting = mock.MagicMock()

self.instance_id = 'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8'
if instance_id:
self.instance_id = instance_id
else:
self.instance_id = EXAMPLE_UUID

def _dmi_mocks(key):
if key == 'system-uuid':
Expand Down Expand Up @@ -910,7 +914,7 @@ def test_crawl_metadata_returns_structured_data_and_caches_nothing(self):
'azure_data': {
'configurationsettype': 'LinuxProvisioningConfiguration'},
'imds': NETWORK_METADATA,
'instance-id': 'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8',
'instance-id': EXAMPLE_UUID,
'local-hostname': u'myhost',
'random_seed': 'wild'}

Expand Down Expand Up @@ -1613,6 +1617,32 @@ def test_fabric_data_included_in_metadata(self):
self.assertTrue(ret)
self.assertEqual('value', dsrc.metadata['test'])

def test_instance_id_case_insensitive(self):
"""Return the previous iid when current is a case-insensitive match."""
lower_iid = EXAMPLE_UUID.lower()
upper_iid = EXAMPLE_UUID.upper()
# lowercase current UUID
ds = self._get_ds(
{'ovfcontent': construct_valid_ovf_env()}, instance_id=lower_iid
)
# UPPERCASE previous
write_file(
os.path.join(self.paths.cloud_dir, 'data', 'instance-id'),
upper_iid)
ds.get_data()
self.assertEqual(upper_iid, ds.metadata['instance-id'])

# UPPERCASE current UUID
ds = self._get_ds(
{'ovfcontent': construct_valid_ovf_env()}, instance_id=upper_iid
)
# lowercase previous
write_file(
os.path.join(self.paths.cloud_dir, 'data', 'instance-id'),
lower_iid)
ds.get_data()
self.assertEqual(lower_iid, ds.metadata['instance-id'])

def test_instance_id_endianness(self):
"""Return the previous iid when dmi uuid is the byteswapped iid."""
ds = self._get_ds({'ovfcontent': construct_valid_ovf_env()})
Expand All @@ -1628,8 +1658,7 @@ def test_instance_id_endianness(self):
os.path.join(self.paths.cloud_dir, 'data', 'instance-id'),
'644CDFD0-CB4E-4B4A-9954-5BDF3ED5C3B8')
ds.get_data()
self.assertEqual(
'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8', ds.metadata['instance-id'])
self.assertEqual(self.instance_id, ds.metadata['instance-id'])

def test_instance_id_from_dmidecode_used(self):
ds = self._get_ds({'ovfcontent': construct_valid_ovf_env()})
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Expand Up @@ -184,5 +184,6 @@ markers =
instance_name: the name to be used for the test instance
sru_2020_11: test is part of the 2020/11 SRU verification
sru_2021_01: test is part of the 2021/01 SRU verification
sru_next: test is part of the next SRU verification
ubuntu: this test should run on Ubuntu
unstable: skip this test because it is flakey

0 comments on commit 66e2d42

Please sign in to comment.