Skip to content

Commit

Permalink
Azure VM Preprovisioning support.
Browse files Browse the repository at this point in the history
This change will enable azure vms to report provisioning has completed
twice, first to tell the fabric it has completed then a second time to
enable customer settings. The datasource for the second provisioning is
the Instance Metadata Service (IMDS),and the VM will poll indefinitely for
the new ovf-env.xml from IMDS.

This branch introduces EphemeralDHCPv4 which encapsulates common logic
used by both DataSourceEc2 an DataSourceAzure for temporary DHCP
interactions without side-effects.

LP: #1734991
  • Loading branch information
Douglas Jordan authored and blackboxsw committed Jan 24, 2018
1 parent 30597f2 commit c03bdd3
Show file tree
Hide file tree
Showing 11 changed files with 397 additions and 53 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -10,3 +10,4 @@ parts
prime
stage
*.snap
*.cover
43 changes: 42 additions & 1 deletion cloudinit/net/dhcp.py
Expand Up @@ -10,7 +10,9 @@
import re
import signal

from cloudinit.net import find_fallback_nic, get_devicelist
from cloudinit.net import (
EphemeralIPv4Network, find_fallback_nic, get_devicelist)
from cloudinit.net.network_state import mask_and_ipv4_to_bcast_addr as bcip
from cloudinit import temp_utils
from cloudinit import util
from six import StringIO
Expand All @@ -29,6 +31,45 @@ class InvalidDHCPLeaseFileError(Exception):
pass


class NoDHCPLeaseError(Exception):
"""Raised when unable to get a DHCP lease."""
pass


class EphemeralDHCPv4(object):
def __init__(self, iface=None):
self.iface = iface
self._ephipv4 = None

def __enter__(self):
try:
leases = maybe_perform_dhcp_discovery(self.iface)
except InvalidDHCPLeaseFileError:
raise NoDHCPLeaseError()
if not leases:
raise NoDHCPLeaseError()
lease = leases[-1]
LOG.debug("Received dhcp lease on %s for %s/%s",
lease['interface'], lease['fixed-address'],
lease['subnet-mask'])
nmap = {'interface': 'interface', 'ip': 'fixed-address',
'prefix_or_mask': 'subnet-mask',
'broadcast': 'broadcast-address',
'router': 'routers'}
kwargs = dict([(k, lease.get(v)) for k, v in nmap.items()])
if not kwargs['broadcast']:
kwargs['broadcast'] = bcip(kwargs['prefix_or_mask'], kwargs['ip'])
ephipv4 = EphemeralIPv4Network(**kwargs)
ephipv4.__enter__()
self._ephipv4 = ephipv4
return lease

def __exit__(self, excp_type, excp_value, excp_traceback):
if not self._ephipv4:
return
self._ephipv4.__exit__(excp_type, excp_value, excp_traceback)


def maybe_perform_dhcp_discovery(nic=None):
"""Perform dhcp discovery if nic valid and dhclient command exists.
Expand Down
12 changes: 12 additions & 0 deletions cloudinit/net/network_state.py
Expand Up @@ -961,4 +961,16 @@ def mask_to_net_prefix(mask):
return ipv4_mask_to_net_prefix(mask)


def mask_and_ipv4_to_bcast_addr(mask, ip):
"""Calculate the broadcast address from the subnet mask and ip addr.
Supports ipv4 only."""
ip_bin = int(''.join([bin(int(x) + 256)[3:] for x in ip.split('.')]), 2)
mask_dec = ipv4_mask_to_net_prefix(mask)
bcast_bin = ip_bin | (2**(32 - mask_dec) - 1)
bcast_str = '.'.join([str(bcast_bin >> (i << 3) & 0xFF)
for i in range(4)[::-1]])
return bcast_str


# vi: ts=4 expandtab
138 changes: 129 additions & 9 deletions cloudinit/sources/DataSourceAzure.py
Expand Up @@ -11,13 +11,16 @@
import os
import os.path
import re
from time import time
from xml.dom import minidom
import xml.etree.ElementTree as ET

from cloudinit import log as logging
from cloudinit import net
from cloudinit.net.dhcp import EphemeralDHCPv4
from cloudinit import sources
from cloudinit.sources.helpers.azure import get_metadata_from_fabric
from cloudinit.url_helper import readurl, wait_for_url, UrlError
from cloudinit import util

LOG = logging.getLogger(__name__)
Expand All @@ -44,6 +47,9 @@
DEFAULT_FS = 'ext4'
# DMI chassis-asset-tag is set static for all azure instances
AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77'
REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
IMDS_URL = "http://169.254.169.254/metadata/reprovisiondata"
IMDS_RETRIES = 5


def find_storvscid_from_sysctl_pnpinfo(sysctl_out, deviceid):
Expand Down Expand Up @@ -276,19 +282,20 @@ def bounce_network_with_azure_hostname(self):

with temporary_hostname(azure_hostname, self.ds_cfg,
hostname_command=hostname_command) \
as previous_hostname:
if (previous_hostname is not None and
as previous_hn:
if (previous_hn is not None and
util.is_true(self.ds_cfg.get('set_hostname'))):
cfg = self.ds_cfg['hostname_bounce']

# "Bouncing" the network
try:
perform_hostname_bounce(hostname=azure_hostname,
cfg=cfg,
prev_hostname=previous_hostname)
return perform_hostname_bounce(hostname=azure_hostname,
cfg=cfg,
prev_hostname=previous_hn)
except Exception as e:
LOG.warning("Failed publishing hostname: %s", e)
util.logexc(LOG, "handling set_hostname failed")
return False

def get_metadata_from_agent(self):
temp_hostname = self.metadata.get('local-hostname')
Expand Down Expand Up @@ -345,15 +352,20 @@ def _get_data(self):
ddir = self.ds_cfg['data_dir']

candidates = [self.seed_dir]
if os.path.isfile(REPROVISION_MARKER_FILE):
candidates.insert(0, "IMDS")
candidates.extend(list_possible_azure_ds_devs())
if ddir:
candidates.append(ddir)

found = None

reprovision = False
for cdev in candidates:
try:
if cdev.startswith("/dev/"):
if cdev == "IMDS":
ret = None
reprovision = True
elif cdev.startswith("/dev/"):
if util.is_FreeBSD():
ret = util.mount_cb(cdev, load_azure_ds_dir,
mtype="udf", sync=False)
Expand All @@ -370,6 +382,8 @@ def _get_data(self):
LOG.warning("%s was not mountable", cdev)
continue

if reprovision or self._should_reprovision(ret):
ret = self._reprovision()
(md, self.userdata_raw, cfg, files) = ret
self.seed = cdev
self.metadata = util.mergemanydict([md, DEFAULT_METADATA])
Expand Down Expand Up @@ -428,6 +442,83 @@ def setup(self, is_new_instance):
LOG.debug("negotiating already done for %s",
self.get_instance_id())

def _poll_imds(self, report_ready=True):
"""Poll IMDS for the new provisioning data until we get a valid
response. Then return the returned JSON object."""
url = IMDS_URL + "?api-version=2017-04-02"
headers = {"Metadata": "true"}
LOG.debug("Start polling IMDS")

def sleep_cb(response, loop_n):
return 1

def exception_cb(msg, exception):
if isinstance(exception, UrlError) and exception.code == 404:
return
LOG.warning("Exception during polling. Will try DHCP.",
exc_info=True)

# If we get an exception while trying to call IMDS, we
# call DHCP and setup the ephemeral network to acquire the new IP.
raise exception

need_report = report_ready
for i in range(IMDS_RETRIES):
try:
with EphemeralDHCPv4() as lease:
if need_report:
self._report_ready(lease=lease)
need_report = False
wait_for_url([url], max_wait=None, timeout=60,
status_cb=LOG.info,
headers_cb=lambda url: headers, sleep_time=1,
exception_cb=exception_cb,
sleep_time_cb=sleep_cb)
return str(readurl(url, headers=headers))
except Exception:
LOG.debug("Exception during polling-retrying dhcp" +
" %d more time(s).", (IMDS_RETRIES - i),
exc_info=True)

def _report_ready(self, lease):
"""Tells the fabric provisioning has completed
before we go into our polling loop."""
try:
get_metadata_from_fabric(None, lease['unknown-245'])
except Exception as exc:
LOG.warning(
"Error communicating with Azure fabric; You may experience."
"connectivity issues.", exc_info=True)

def _should_reprovision(self, ret):
"""Whether or not we should poll IMDS for reprovisioning data.
Also sets a marker file to poll IMDS.
The marker file is used for the following scenario: the VM boots into
this polling loop, which we expect to be proceeding infinitely until
the VM is picked. If for whatever reason the platform moves us to a
new host (for instance a hardware issue), we need to keep polling.
However, since the VM reports ready to the Fabric, we will not attach
the ISO, thus cloud-init needs to have a way of knowing that it should
jump back into the polling loop in order to retrieve the ovf_env."""
if not ret:
return False
(md, self.userdata_raw, cfg, files) = ret
path = REPROVISION_MARKER_FILE
if (cfg.get('PreprovisionedVm') is True or
os.path.isfile(path)):
if not os.path.isfile(path):
LOG.info("Creating a marker file to poll imds")
util.write_file(path, "%s: %s\n" % (os.getpid(), time()))
return True
return False

def _reprovision(self):
"""Initiate the reprovisioning workflow."""
contents = self._poll_imds()
md, ud, cfg = read_azure_ovf(contents)
return (md, ud, cfg, {'ovf-env.xml': contents})

def _negotiate(self):
"""Negotiate with fabric and return data from it.
Expand All @@ -453,7 +544,7 @@ def _negotiate(self):
"Error communicating with Azure fabric; You may experience."
"connectivity issues.", exc_info=True)
return False

util.del_file(REPROVISION_MARKER_FILE)
return fabric_data

def activate(self, cfg, is_new_instance):
Expand Down Expand Up @@ -595,6 +686,7 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,
def perform_hostname_bounce(hostname, cfg, prev_hostname):
# set the hostname to 'hostname' if it is not already set to that.
# then, if policy is not off, bounce the interface using command
# Returns True if the network was bounced, False otherwise.
command = cfg['command']
interface = cfg['interface']
policy = cfg['policy']
Expand All @@ -614,14 +706,16 @@ def perform_hostname_bounce(hostname, cfg, prev_hostname):
else:
LOG.debug(
"Skipping network bounce: ifupdown utils aren't present.")
return # Don't bounce as networkd handles hostname DDNS updates
# Don't bounce as networkd handles hostname DDNS updates
return False
LOG.debug("pubhname: publishing hostname [%s]", msg)
shell = not isinstance(command, (list, tuple))
# capture=False, see comments in bug 1202758 and bug 1206164.
util.log_time(logfunc=LOG.debug, msg="publishing hostname",
get_uptime=True, func=util.subp,
kwargs={'args': command, 'shell': shell, 'capture': False,
'env': env})
return True


def crtfile_to_pubkey(fname, data=None):
Expand Down Expand Up @@ -838,9 +932,35 @@ def read_azure_ovf(contents):
if 'ssh_pwauth' not in cfg and password:
cfg['ssh_pwauth'] = True

cfg['PreprovisionedVm'] = _extract_preprovisioned_vm_setting(dom)

return (md, ud, cfg)


def _extract_preprovisioned_vm_setting(dom):
"""Read the preprovision flag from the ovf. It should not
exist unless true."""
platform_settings_section = find_child(
dom.documentElement,
lambda n: n.localName == "PlatformSettingsSection")
if not platform_settings_section or len(platform_settings_section) == 0:
LOG.debug("PlatformSettingsSection not found")
return False
platform_settings = find_child(
platform_settings_section[0],
lambda n: n.localName == "PlatformSettings")
if not platform_settings or len(platform_settings) == 0:
LOG.debug("PlatformSettings not found")
return False
preprovisionedVm = find_child(
platform_settings[0],
lambda n: n.localName == "PreprovisionedVm")
if not preprovisionedVm or len(preprovisionedVm) == 0:
LOG.debug("PreprovisionedVm not found")
return False
return util.translate_bool(preprovisionedVm[0].firstChild.nodeValue)


def encrypt_pass(password, salt_id="$6$"):
return crypt.crypt(password, salt_id + util.rand_str(strlen=16))

Expand Down
23 changes: 7 additions & 16 deletions cloudinit/sources/DataSourceEc2.py
Expand Up @@ -14,7 +14,7 @@
from cloudinit import ec2_utils as ec2
from cloudinit import log as logging
from cloudinit import net
from cloudinit.net import dhcp
from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError
from cloudinit import sources
from cloudinit import url_helper as uhelp
from cloudinit import util
Expand Down Expand Up @@ -102,22 +102,13 @@ def _get_data(self):
if util.is_FreeBSD():
LOG.debug("FreeBSD doesn't support running dhclient with -sf")
return False
dhcp_leases = dhcp.maybe_perform_dhcp_discovery(
self.fallback_interface)
if not dhcp_leases:
# DataSourceEc2Local failed in init-local stage. DataSourceEc2
# will still run in init-network stage.
try:
with EphemeralDHCPv4(self.fallback_interface):
return util.log_time(
logfunc=LOG.debug, msg='Crawl of metadata service',
func=self._crawl_metadata)
except NoDHCPLeaseError:
return False
dhcp_opts = dhcp_leases[-1]
net_params = {'interface': dhcp_opts.get('interface'),
'ip': dhcp_opts.get('fixed-address'),
'prefix_or_mask': dhcp_opts.get('subnet-mask'),
'broadcast': dhcp_opts.get('broadcast-address'),
'router': dhcp_opts.get('routers')}
with net.EphemeralIPv4Network(**net_params):
return util.log_time(
logfunc=LOG.debug, msg='Crawl of metadata service',
func=self._crawl_metadata)
else:
return self._crawl_metadata()

Expand Down

0 comments on commit c03bdd3

Please sign in to comment.