Skip to content

Commit

Permalink
fix: Ensure dump timestamps parsed as UTC (#5214)
Browse files Browse the repository at this point in the history
When parsing timestamps in `cloud-init analyze dump`, most log parsing
used UTC, but if shelling out to `date` was required, it used the local
timezone instead. This is wrong because cloud-init logs are in UTC.
This commit ensures that all timestamps are UTC-based.

Also update test code to explicitly convert generated times to UTC
before checking against what is being tested.

Fixes GH-5158
  • Loading branch information
TheRealFalcon committed Apr 25, 2024
1 parent 90c15a6 commit ddde8dd
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 27 deletions.
17 changes: 10 additions & 7 deletions cloudinit/analyze/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import calendar
import sys
from datetime import datetime
from datetime import datetime, timezone

from cloudinit import atomic_helper, subp, util

Expand Down Expand Up @@ -36,13 +36,16 @@ def parse_timestamp(timestampstr):
if "." in timestampstr:
FMT = CLOUD_INIT_JOURNALCTL_FMT
dt = datetime.strptime(
timestampstr + " " + str(datetime.now().year), FMT
)
timestamp = dt.strftime("%s.%f")
timestampstr + " " + str(datetime.now().year),
FMT,
).replace(tzinfo=timezone.utc)
timestamp = dt.timestamp()
elif "," in timestampstr:
# 2016-09-12 14:39:20,839
dt = datetime.strptime(timestampstr, CLOUD_INIT_ASCTIME_FMT)
timestamp = dt.strftime("%s.%f")
dt = datetime.strptime(timestampstr, CLOUD_INIT_ASCTIME_FMT).replace(
tzinfo=timezone.utc
)
timestamp = dt.timestamp()
else:
# allow GNU date(1) to handle other formats we don't expect
# This may throw a ValueError if no GNU date can be found
Expand Down Expand Up @@ -70,7 +73,7 @@ def parse_timestamp_from_date(timestampstr):
f"Unable to parse timestamp without GNU date: [{timestampstr}]"
)
return float(
subp.subp([date, "+%s.%3N", "-d", timestampstr]).stdout.strip()
subp.subp([date, "-u", "+%s.%3N", "-d", timestampstr]).stdout.strip()
)


Expand Down
95 changes: 75 additions & 20 deletions tests/unittests/analyze/test_dump.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This file is part of cloud-init. See LICENSE file for license information.

from datetime import datetime
from contextlib import suppress
from datetime import datetime, timezone
from textwrap import dedent

import pytest
Expand All @@ -20,8 +21,10 @@ def test_parse_timestamp_handles_cloud_init_default_format(self):
"""Logs with cloud-init detailed formats will be properly parsed."""
trusty_fmt = "%Y-%m-%d %H:%M:%S,%f"
trusty_stamp = "2016-09-12 14:39:20,839"
dt = datetime.strptime(trusty_stamp, trusty_fmt)
assert float(dt.strftime("%s.%f")) == parse_timestamp(trusty_stamp)
dt = datetime.strptime(trusty_stamp, trusty_fmt).replace(
tzinfo=timezone.utc
)
assert dt.timestamp() == parse_timestamp(trusty_stamp)

def test_parse_timestamp_handles_syslog_adding_year(self):
"""Syslog timestamps lack a year. Add year and properly parse."""
Expand All @@ -30,8 +33,10 @@ def test_parse_timestamp_handles_syslog_adding_year(self):

# convert stamp ourselves by adding the missing year value
year = datetime.now().year
dt = datetime.strptime(syslog_stamp + " " + str(year), syslog_fmt)
assert float(dt.strftime("%s.%f")) == parse_timestamp(syslog_stamp)
dt = datetime.strptime(
syslog_stamp + " " + str(year), syslog_fmt
).replace(tzinfo=timezone.utc)
assert dt.timestamp() == parse_timestamp(syslog_stamp)

def test_parse_timestamp_handles_journalctl_format_adding_year(self):
"""Journalctl precise timestamps lack a year. Add year and parse."""
Expand All @@ -40,8 +45,10 @@ def test_parse_timestamp_handles_journalctl_format_adding_year(self):

# convert stamp ourselves by adding the missing year value
year = datetime.now().year
dt = datetime.strptime(journal_stamp + " " + str(year), journal_fmt)
assert float(dt.strftime("%s.%f")) == parse_timestamp(journal_stamp)
dt = datetime.strptime(
journal_stamp + " " + str(year), journal_fmt
).replace(tzinfo=timezone.utc)
assert dt.timestamp() == parse_timestamp(journal_stamp)

@pytest.mark.allow_subp_for("date", "gdate")
def test_parse_unexpected_timestamp_format_with_date_command(self):
Expand All @@ -50,14 +57,60 @@ def test_parse_unexpected_timestamp_format_with_date_command(self):
new_stamp = "17:15 08/08"
# convert stamp ourselves by adding the missing year value
year = datetime.now().year
dt = datetime.strptime(new_stamp + " " + str(year), new_fmt)
dt = datetime.strptime(new_stamp + " " + str(year), new_fmt).replace(
tzinfo=timezone.utc
)

if has_gnu_date():
assert float(dt.strftime("%s.%f")) == parse_timestamp(new_stamp)
assert dt.timestamp() == parse_timestamp(new_stamp)
else:
with pytest.raises(ValueError):
parse_timestamp(new_stamp)

@pytest.mark.allow_subp_for("date", "gdate")
def test_parse_timestamp_round_trip(self):
"""Ensure that timezone doesn't affect the returned timestamp.
Depending on the format of the timestamp, we use different methods
to parse it. In all cases, the timestamp should be returned the
same, regardless of timezone.
"""
times = [
"Sep 12 14:39:00",
"Sep 12 14:39:00.839452",
"14:39 09/12",
"2020-09-12 14:39:00,839",
"2020-09-12 14:39:00.839452+00:00",
]

timestamps = []
for f in times:
with suppress(ValueError):
timestamps.append(parse_timestamp(f))

new_times = [
datetime.fromtimestamp(ts, tz=timezone.utc).strftime(
"%Y-%m-%d %H:%M:%S"
)
for ts in timestamps
]
assert all(t.endswith("-09-12 14:39:00") for t in new_times)

@pytest.mark.allow_subp_for("date", "gdate")
def test_parse_timestamp_handles_explicit_timezone(self):
"""Explicitly provided timezones are parsed and properly offset."""
if not has_gnu_date():
pytest.skip("GNU date is required for this test")

original_ts = "2020-09-12 14:39:20.839452+02:00"
parsed_ts = parse_timestamp(original_ts)
assert (
datetime.fromtimestamp(parsed_ts, tz=timezone.utc).strftime(
"%Y-%m-%d %H:%M:%S"
)
== "2020-09-12 12:39:20"
)


class TestParseCILogLine:
def test_parse_logline_returns_none_without_separators(self):
Expand All @@ -81,8 +134,8 @@ def test_parse_logline_returns_event_for_cloud_init_logs(self):
)
dt = datetime.strptime(
"2017-08-08 20:05:07,147", "%Y-%m-%d %H:%M:%S,%f"
)
timestamp = float(dt.strftime("%s.%f"))
).replace(tzinfo=timezone.utc)
timestamp = dt.timestamp()
expected = {
"description": "starting search for local datasources",
"event_type": "start",
Expand All @@ -102,8 +155,8 @@ def test_parse_logline_returns_event_for_journalctl_logs(self):
year = datetime.now().year
dt = datetime.strptime(
"Nov 03 06:51:06.074410 %d" % year, "%b %d %H:%M:%S.%f %Y"
)
timestamp = float(dt.strftime("%s.%f"))
).replace(tzinfo=timezone.utc)
timestamp = dt.timestamp()
expected = {
"description": "starting search for local datasources",
"event_type": "start",
Expand Down Expand Up @@ -144,9 +197,11 @@ def test_parse_logline_returns_event_for_amazon_linux_2_line(self):
)
# Generate the expected value using `datetime`, so that TZ
# determination is consistent with the code under test.
timestamp_dt = datetime.strptime(
"Apr 30 19:39:11", "%b %d %H:%M:%S"
).replace(year=datetime.now().year)
timestamp_dt = (
datetime.strptime("Apr 30 19:39:11", "%b %d %H:%M:%S")
.replace(year=datetime.now().year)
.replace(tzinfo=timezone.utc)
)
expected = {
"description": "attempting to read from cache [check]",
"event_type": "start",
Expand Down Expand Up @@ -184,8 +239,8 @@ def test_dump_events_with_rawdata(self, m_parse_from_date):
year = datetime.now().year
dt1 = datetime.strptime(
"Nov 03 06:51:06.074410 %d" % year, "%b %d %H:%M:%S.%f %Y"
)
timestamp1 = float(dt1.strftime("%s.%f"))
).replace(tzinfo=timezone.utc)
timestamp1 = dt1.timestamp()
expected_events = [
{
"description": "starting search for local datasources",
Expand Down Expand Up @@ -216,8 +271,8 @@ def test_dump_events_with_cisource(self, m_parse_from_date, tmpdir):
year = datetime.now().year
dt1 = datetime.strptime(
"Nov 03 06:51:06.074410 %d" % year, "%b %d %H:%M:%S.%f %Y"
)
timestamp1 = float(dt1.strftime("%s.%f"))
).replace(tzinfo=timezone.utc)
timestamp1 = dt1.timestamp()
expected_events = [
{
"description": "starting search for local datasources",
Expand Down

0 comments on commit ddde8dd

Please sign in to comment.