In [27]:
import datetime as dt
from dateutil.parser import parse
import pytz
import tzlocal

UTC = pytz.UTC
LOCAL = tzlocal.get_localzone()

def print_timestamps(timestamps):
    for ts in timestamps:
        print(ts.strftime('%Y/%m/%d %H:%M:%S %Z'))

In [28]:
# Example timestamps that contain a local-time DST jump.
utc_origin = parse('2020/03/07 20:00:00 EST').astimezone(UTC)
local_timestamps = [(utc_origin + i*dt.timedelta(hours=1, minutes=0)).astimezone(LOCAL) for i in range(10)]

print_timestamps(local_timestamps)

2020/03/07 20:00:00 EST
2020/03/07 21:00:00 EST
2020/03/07 22:00:00 EST
2020/03/07 23:00:00 EST
2020/03/08 00:00:00 EST
2020/03/08 01:00:00 EST
2020/03/08 03:00:00 EDT
2020/03/08 04:00:00 EDT
2020/03/08 05:00:00 EDT
2020/03/08 06:00:00 EDT


In [29]:
# Convert to UTC. Notice there is (correctly) no jump in UTC.
utc_timestamps = [lts.astimezone(UTC) for lts in local_timestamps]

print_timestamps(utc_timestamps)

2020/03/08 01:00:00 UTC
2020/03/08 02:00:00 UTC
2020/03/08 03:00:00 UTC
2020/03/08 04:00:00 UTC
2020/03/08 05:00:00 UTC
2020/03/08 06:00:00 UTC
2020/03/08 07:00:00 UTC
2020/03/08 08:00:00 UTC
2020/03/08 09:00:00 UTC
2020/03/08 10:00:00 UTC


In [34]:
# Shift the time some amount in UTC, then convert back to local time to "anonymize".
nonrandom_shift = dt.timedelta(weeks=52*2 + 4) # +2 years and 4 weeks.
shifted_local_timestamps = [(uts + nonrandom_shift).astimezone(LOCAL) for uts in utc_timestamps]

# Notice no time jump in the timestamps.
print_timestamps(shifted_local_timestamps)

2022/04/02 21:00:00 EDT
2022/04/02 22:00:00 EDT
2022/04/02 23:00:00 EDT
2022/04/03 00:00:00 EDT
2022/04/03 01:00:00 EDT
2022/04/03 02:00:00 EDT
2022/04/03 03:00:00 EDT
2022/04/03 04:00:00 EDT
2022/04/03 05:00:00 EDT
2022/04/03 06:00:00 EDT


In [39]:
# Shift the time some amount in UTC, then convert back to local time to "anonymize".
# This time, we'll shift enough to hit another timezone boundary, so there will be a time jump,
# but since we're using timezone-aware timestamps in the HDF file we can handle this without issue.
nonrandom_shift = dt.timedelta(weeks=34)
shifted_local_timestamps = [(uts + nonrandom_shift).astimezone(LOCAL) for uts in utc_timestamps]

# Notice no time jump in the timestamps.
print_timestamps(shifted_local_timestamps)

2020/10/31 21:00:00 EDT
2020/10/31 22:00:00 EDT
2020/10/31 23:00:00 EDT
2020/11/01 00:00:00 EDT
2020/11/01 01:00:00 EDT
2020/11/01 01:00:00 EST
2020/11/01 02:00:00 EST
2020/11/01 03:00:00 EST
2020/11/01 04:00:00 EST
2020/11/01 05:00:00 EST
