Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report detected/configured_hostname and fix tests #1891

Merged
merged 7 commits into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ endif::[]
//===== Bug fixes
//

=== Unreleased

// Unreleased changes go here
// When the next release happens, nest these changes under the "Python Agent version 6.x" heading
[float]
===== Features

* Collect the `configured_hostname` and `detected_hostname` separately, and switch to FQDN for the `detected_hostname`. {pull}1891[#1891]

//[float]
//===== Bug fixes
//



[[release-notes-6.x]]
Expand Down
8 changes: 6 additions & 2 deletions elasticapm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,12 +381,16 @@ def get_process_info(self):

def get_system_info(self):
system_data = {
"hostname": keyword_field(self.config.hostname),
"detected_hostname": keyword_field(self.config.detected_hostname),
basepi marked this conversation as resolved.
Show resolved Hide resolved
"architecture": platform.machine(),
"platform": platform.system().lower(),
}
if self.config.hostname:
system_data["configured_hostname"] = keyword_field(self.config.hostname)
system_data.update(cgroup.get_cgroup_container_metadata())
pod_name = os.environ.get("KUBERNETES_POD_NAME") or system_data["hostname"]
pod_name = os.environ.get("KUBERNETES_POD_NAME") or keyword_field(
self.config.hostname or self.config.detected_hostname
basepi marked this conversation as resolved.
Show resolved Hide resolved
)
changed = False
if "kubernetes" in system_data:
k8s = system_data["kubernetes"]
Expand Down
6 changes: 3 additions & 3 deletions elasticapm/conf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,11 @@
import math
import os
import re
import socket
import threading
from datetime import timedelta

from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES, TRACE_CONTINUATION_STRATEGY
from elasticapm.utils import compat, starmatch_to_regex
from elasticapm.utils import compat, getfqdn, starmatch_to_regex
from elasticapm.utils.logging import get_logger
from elasticapm.utils.threading import IntervalTimer, ThreadManager

Expand Down Expand Up @@ -572,7 +571,8 @@ class Config(_ConfigBase):
],
default=5,
)
hostname = _ConfigValue("HOSTNAME", default=socket.gethostname())
hostname = _ConfigValue("HOSTNAME", default=None)
detected_hostname = _ConfigValue("DETECTED_HOSTNAME", default=getfqdn())
basepi marked this conversation as resolved.
Show resolved Hide resolved
auto_log_stacks = _BoolConfigValue("AUTO_LOG_STACKS", default=True)
transport_class = _ConfigValue("TRANSPORT_CLASS", default="elasticapm.transport.http.Transport", required=True)
processors = _ListConfigValue(
Expand Down
27 changes: 27 additions & 0 deletions elasticapm/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import base64
import os
import re
import socket
import urllib.parse
from functools import partial
from types import FunctionType
Expand All @@ -49,6 +50,7 @@


default_ports = {"https": 443, "http": 80, "postgresql": 5432, "mysql": 3306, "mssql": 1433}
fqdn = None


def varmap(func, var, context=None, name=None, **kwargs):
Expand Down Expand Up @@ -221,3 +223,28 @@ def nested_key(d: dict, *args):
d = None
break
return d


def getfqdn():
"""
socket.getfqdn() has some issues. For one, it's slow (may do a DNS lookup).
For another, it can return `localhost.localdomain`[1], which is less useful
than socket.gethostname().

This function handles the fallbacks and also ensures we don't try to lookup
the fqdn more than once.

[1]: https://stackoverflow.com/a/43330159
"""
global fqdn
if not fqdn:
fqdn = socket.getfqdn()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For comparison, here is the Node.js APM agent impl: https://github.com/elastic/apm-agent-nodejs/blob/main/lib/apm-client/http-apm-client/detect-hostname.js#L28-L77
which is almost, but not quite what is spec'd at https://github.com/elastic/apm/blob/main/specs/agents/metadata.md#hostname (I didn't fallback to the HOSTNAME and HOST envvars).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, interesting, you actually shell out as spec'd. I'll have to think on that. It's not very pythonic. 🤔

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't love shelling out, but all the reading and poking around I did suggested that hostname -f is the authoritative way to get the FQDN on non-Windows systems, and re-implementing hostname -f functionality in Node.js at least wasn't feasible. Then on Windows I followed the spec to exec the powershell thing.

Given that Python has the FQDN-related function, I can understand your wanting to just use that.

Note that I do occasionally get this on my macOS:

>>> import socket
>>> socket.gethostname()
'pink.local'
>>> socket.getfqdn()
'1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa'

which is something related to the IPv6 localhost addr and perhaps the entry in /etc/hosts. That I get it only sometimes is weird:

% while true; do sleep 1; python3 -c 'import socket; print(socket.getfqdn())'; done
1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa
1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa
1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa
1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa
1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa
1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa
1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa
1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa
pink.local
pink.local
pink.local
pink.local
pink.local
pink.local
pink.local
pink.local
pink.local

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good thing to know. If we run into issues we can re-visit. But for now, I have limited python agent time so I'm going to call it "good enough" :)

if fqdn == "localhost.localdomain":
fqdn = socket.gethostname()
if not fqdn:
fqdn = os.environ.get("HOSTNAME")
if not fqdn:
fqdn = os.environ.get("HOST")
if fqdn is None:
fqdn = ""
fqdn = fqdn.lower().strip()
13 changes: 8 additions & 5 deletions tests/client/client_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,15 @@ def test_system_info(elasticapm_client):
with mock.patch("elasticapm.utils.cgroup.get_cgroup_container_metadata") as mocked:
mocked.return_value = {}
system_info = elasticapm_client.get_system_info()
assert {"hostname", "architecture", "platform"} == set(system_info.keys())
assert system_info["hostname"] == socket.gethostname()
assert {"detected_hostname", "architecture", "platform"} == set(system_info.keys())
assert system_info["detected_hostname"] == elasticapm_client.config.detected_hostname


@pytest.mark.parametrize("elasticapm_client", [{"hostname": "my_custom_hostname"}], indirect=True)
def test_system_info_hostname_configurable(elasticapm_client):
# mock docker/kubernetes data here to get consistent behavior if test is run in docker
system_info = elasticapm_client.get_system_info()
assert system_info["hostname"] == "my_custom_hostname"
assert system_info["configured_hostname"] == "my_custom_hostname"


@pytest.mark.parametrize("elasticapm_client", [{"global_labels": "az=us-east-1,az.rack=8"}], indirect=True)
Expand All @@ -117,7 +117,7 @@ def test_docker_kubernetes_system_info(elasticapm_client):
mock_metadata.return_value = {"container": {"id": "123"}, "kubernetes": {"pod": {"uid": "456"}}}
system_info = elasticapm_client.get_system_info()
assert system_info["container"] == {"id": "123"}
assert system_info["kubernetes"] == {"pod": {"uid": "456", "name": socket.gethostname()}}
assert system_info["kubernetes"] == {"pod": {"uid": "456", "name": elasticapm_client.config.detected_hostname}}


@mock.patch.dict(
Expand Down Expand Up @@ -185,7 +185,10 @@ def test_docker_kubernetes_system_info_except_hostname_from_environ():
mock_gethostname.return_value = "foo"
system_info = elasticapm_client.get_system_info()
assert "kubernetes" in system_info
assert system_info["kubernetes"] == {"pod": {"name": socket.gethostname()}, "namespace": "namespace"}
assert system_info["kubernetes"] == {
"pod": {"name": elasticapm_client.config.detected_hostname},
"namespace": "namespace",
}


def test_config_by_environment():
Expand Down
3 changes: 1 addition & 2 deletions tests/config/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ def test_config_inline_dict():
"secret_token": "bar",
"server_url": "http://example.com:1234",
"service_version": "1",
"hostname": "localhost",
"api_request_time": "5s",
}
)
Expand All @@ -137,7 +136,7 @@ def test_config_inline_dict():
assert config.secret_token == "bar"
assert config.server_url == "http://example.com:1234"
assert config.service_version == "1"
assert config.hostname == "localhost"
assert config.hostname is None
assert config.api_request_time.total_seconds() == 5


Expand Down