Skip to content

Commit

Permalink
main: avoid downloading full contents cmdline urls (#1606)
Browse files Browse the repository at this point in the history
In the case that the url content does not start with
`#cloud-config`, avoid downloading the full content.
Add deprecated logs to prefer `cloud-config-url` over
`url` on the kernel command line.
Restructure and link kernel command line docs to
instance-data docs.

Add some typing.

LP: #1937319
  • Loading branch information
aciba90 committed Aug 5, 2022
1 parent 95a3e79 commit 8e26b2f
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 84 deletions.
32 changes: 27 additions & 5 deletions cloudinit/cmd/main.py
Expand Up @@ -21,6 +21,7 @@
import sys
import time
import traceback
from typing import Tuple

from cloudinit import patcher
from cloudinit.config.modules import Modules
Expand Down Expand Up @@ -143,7 +144,7 @@ def parse_cmdline_url(cmdline, names=("cloud-config-url", "url")):
raise KeyError("No keys (%s) found in string '%s'" % (cmdline, names))


def attempt_cmdline_url(path, network=True, cmdline=None):
def attempt_cmdline_url(path, network=True, cmdline=None) -> Tuple[int, str]:
"""Write data from url referenced in command line to path.
path: a file to write content to if downloaded.
Expand Down Expand Up @@ -190,7 +191,7 @@ def attempt_cmdline_url(path, network=True, cmdline=None):

return (level, m)

kwargs = {"url": url, "timeout": 10, "retries": 2}
kwargs = {"url": url, "timeout": 10, "retries": 2, "stream": True}
if network or path_is_local:
level = logging.WARN
kwargs["sec_between"] = 1
Expand All @@ -202,22 +203,43 @@ def attempt_cmdline_url(path, network=True, cmdline=None):
header = b"#cloud-config"
try:
resp = url_helper.read_file_or_url(**kwargs)
sniffed_content = b""
if resp.ok():
data = resp.contents
if not resp.contents.startswith(header):
is_cloud_cfg = True
if isinstance(resp, url_helper.UrlResponse):
try:
sniffed_content += next(
resp.iter_content(chunk_size=len(header))
)
except StopIteration:
pass
if not sniffed_content.startswith(header):
is_cloud_cfg = False
elif not resp.contents.startswith(header):
is_cloud_cfg = False
if is_cloud_cfg:
if cmdline_name == "url":
LOG.warning(
"DEPRECATED: `url` kernel command line key is"
" deprecated for providing cloud-config via URL."
" Please use `cloud-config-url` kernel command line"
" parameter instead"
)
else:
if cmdline_name == "cloud-config-url":
level = logging.WARN
else:
level = logging.INFO
return (
level,
"contents of '%s' did not start with %s" % (url, header),
f"contents of '{url}' did not start with {str(header)}",
)
else:
return (
level,
"url '%s' returned code %s. Ignoring." % (url, resp.code),
)
data = sniffed_content + resp.contents

except url_helper.UrlError as e:
return (level, "retrieving url '%s' failed: %s" % (url, e))
Expand Down
24 changes: 21 additions & 3 deletions cloudinit/url_helper.py
Expand Up @@ -19,7 +19,7 @@
from functools import partial
from http.client import NOT_FOUND
from itertools import count
from typing import Any, Callable, List, Tuple
from typing import Any, Callable, Iterator, List, Optional, Tuple, Union
from urllib.parse import quote, urlparse, urlunparse

import requests
Expand Down Expand Up @@ -59,7 +59,7 @@ def combine_single(url, add_on):
return url


def read_file_or_url(url, **kwargs):
def read_file_or_url(url, **kwargs) -> Union["FileResponse", "UrlResponse"]:
"""Wrapper function around readurl to allow passing a file path as url.
When url is not a local file path, passthrough any kwargs to readurl.
Expand Down Expand Up @@ -113,7 +113,7 @@ def __init__(self, path, contents, code=200):


class UrlResponse(object):
def __init__(self, response):
def __init__(self, response: requests.Response):
self._response = response

@property
Expand Down Expand Up @@ -144,6 +144,20 @@ def code(self):
def __str__(self):
return self._response.text

def iter_content(
self, chunk_size: Optional[int] = 1, decode_unicode: bool = False
) -> Iterator[bytes]:
"""Iterates over the response data.
When stream=True is set on the request, this avoids reading the content
at once into memory for large responses.
:param chunk_size: Number of bytes it should read into memory.
:param decode_unicode: If True, content will be decoded using the best
available encoding based on the response.
"""
yield from self._response.iter_content(chunk_size, decode_unicode)


class UrlError(IOError):
def __init__(self, cause, code=None, headers=None, url=None):
Expand Down Expand Up @@ -191,6 +205,7 @@ def readurl(
infinite=False,
log_req_resp=True,
request_method="",
stream: bool = False,
) -> UrlResponse:
"""Wrapper around requests.Session to read the url and retry if necessary
Expand Down Expand Up @@ -222,10 +237,13 @@ def readurl(
:param request_method: String passed as 'method' to Session.request.
Typically GET, or POST. Default: POST if data is provided, GET
otherwise.
:param stream: if False, the response content will be immediately
downloaded.
"""
url = _cleanurl(url)
req_args = {
"url": url,
"stream": stream,
}
req_args.update(_get_ssl_args(url, ssl_details))
req_args["allow_redirects"] = allow_redirects
Expand Down
2 changes: 1 addition & 1 deletion doc/rtd/topics/format.rst
Expand Up @@ -66,7 +66,7 @@ Kernel Command Line

When using the :ref:`datasource_nocloud` datasource, users can pass user data
via the kernel command line parameters. See the :ref:`datasource_nocloud`
datasource documentation for more details.
datasource and :ref:`kernel_cmdline` documentations for more details.

Gzip Compressed Content
=======================
Expand Down
7 changes: 7 additions & 0 deletions doc/rtd/topics/instancedata.rst
Expand Up @@ -4,6 +4,12 @@
Instance Metadata
*****************

.. toctree::
:maxdepth: 1
:hidden:

kernel-cmdline.rst

What is instance data?
========================

Expand All @@ -16,6 +22,7 @@ comes from any number of sources:
* cloud-config seed files in the booted cloud image or distribution
* vendordata provided from files or cloud metadata services
* userdata provided at instance creation
* :ref:`kernel_cmdline`

Each cloud provider presents unique configuration metadata in different
formats to the instance. Cloud-init provides a cache of any crawled metadata
Expand Down
71 changes: 71 additions & 0 deletions doc/rtd/topics/kernel-cmdline.rst
@@ -0,0 +1,71 @@
.. _kernel_cmdline:

*******************
Kernel Command Line
*******************

In order to allow an ephemeral, or otherwise pristine image to
receive some configuration, cloud-init will read a url directed by
the kernel command line and proceed as if its data had previously existed.

This allows for configuring a meta-data service, or some other data.

.. note::

That usage of the kernel command line is somewhat of a last resort,
as it requires knowing in advance the correct command line or modifying
the boot loader to append data.

For example, when ``cloud-init init --local`` runs, it will check to
see if ``cloud-config-url`` appears in key/value fashion
in the kernel command line as in:

.. code-block:: text
root=/dev/sda ro cloud-config-url=http://foo.bar.zee/abcde
Cloud-init will then read the contents of the given url.
If the content starts with ``#cloud-config``, it will store
that data to the local filesystem in a static filename
``/etc/cloud/cloud.cfg.d/91_kernel_cmdline_url.cfg``, and consider it as
part of the config from that point forward.

If that file exists already, it will not be overwritten, and the
`cloud-config-url` parameter is completely ignored.

Then, when the DataSource runs, it will find that config already available.

So, in order to be able to configure the MAAS DataSource by controlling the
kernel command line from outside the image, you can append:

* ``cloud-config-url=http://your.url.here/abcdefg``

Then, have the following content at that url:

.. code-block:: yaml
#cloud-config
datasource:
MAAS:
metadata_url: http://mass-host.localdomain/source
consumer_key: Xh234sdkljf
token_key: kjfhgb3n
token_secret: 24uysdfx1w4
.. warning::

`url` kernel command line key is deprecated.
Please use `cloud-config-url` parameter instead"

.. note::

Because ``cloud-config-url=`` is so very generic, in order to avoid false
positives,
cloud-init requires the content to start with ``#cloud-config`` in order
for it to be considered.

.. note::

The ``cloud-config-url=`` is un-authed http GET, and contains credentials.
It could be set up to be randomly generated and also check source
address in order to be more secure.
48 changes: 0 additions & 48 deletions doc/sources/kernel-cmdline.txt

This file was deleted.

2 changes: 2 additions & 0 deletions tests/unittests/sources/test_azure.py
Expand Up @@ -3439,6 +3439,7 @@ def test_poll_imds_returns_ovf_env(
method="GET",
timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS,
url=full_url,
stream=False,
)
],
)
Expand Down Expand Up @@ -3490,6 +3491,7 @@ def test__reprovision_calls__poll_imds(
method="GET",
timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS,
url=full_url,
stream=False,
),
m_request.call_args_list,
)
Expand Down

0 comments on commit 8e26b2f

Please sign in to comment.