In [2]:
import numpy as np

from datetime import datetime, timezone
from tzwhere import tzwhere
import pytz
import timezonefinder

In [5]:
# array of UTC timestamps as naive datetime objects
def base_date_arr(year: int):
    start_time = np.datetime64(str(year))
    end_time = np.datetime64(str(year + 1))

    arr_dt_1d = np.arange(start_time, end_time, dtype='datetime64[m]').astype(datetime)  # 1D array

    return arr_dt_1d

In [6]:
# array of naive timestamps to array of time zone aware timestamps
def to_utc(arr, lon, lat, use_dst: bool):
    # tz_str = tzwhere.tzwhere().tzNameAt(lat, lon)
    
    tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
    tz = pytz.timezone(tz_str)

    def one_dt_utc(dt):
        dt_offset = (dt - offset)
        return dt_offset.replace(tzinfo=timezone.utc)

    def one_dt_localized(dt):
        return tz.localize(dt).astimezone(pytz.utc)

    if use_dst:
        return np.vectorize(one_dt_localized)(arr)
    else:
        # offset = tz.utcoffset(arr[0, 0], is_dst=False)
        offset = tz.utcoffset(arr[0], is_dst=False)
        return np.vectorize(one_dt_utc)(arr)

In [92]:
def time_arr(year, lon, lat, use_dst=True):
    # determine timezone based on lon, lat
    tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
    tz = pytz.timezone(tz_str)
    
    # generate local start and end times
    # localize with derived time zone
    start_time = pd.to_datetime(datetime(year, 1, 1)).tz_localize(tz)
    end_time = pd.to_datetime(datetime(year+1, 1, 1)).tz_localize(tz)
    
    if use_dst:
        # generate times
        times = pd.date_range(start_time, end_time, freq='min').tz_convert('UTC')
    else:
        # convert to UTC to capture offset
        start_time = start_time.tz_convert('UTC')
        end_time = end_time.tz_convert('UTC')
        
        # generate times
        times = pd.date_range(start_time, end_time, freq='min')
    
    # convert to python datetime objects in numpy array
    return times.to_pydatetime()

In [94]:
bef = datetime.now()
time_arr(2021, lon, lat, use_dst=False)
print(datetime.now() - bef)

0:00:00.228205


In [102]:
start_time = pd.to_datetime(datetime(year, 1, 1))
end_time = pd.to_datetime(datetime(year+1, 1, 1))
times = pd.date_range(start_time, end_time, freq='min').tz_localize(tz, ambiguous=True, nonexistent='shift_forward').tz_convert('UTC')
times

DatetimeIndex(['2021-01-01 05:00:00+00:00', '2021-01-01 05:01:00+00:00',
               '2021-01-01 05:02:00+00:00', '2021-01-01 05:03:00+00:00',
               '2021-01-01 05:04:00+00:00', '2021-01-01 05:05:00+00:00',
               '2021-01-01 05:06:00+00:00', '2021-01-01 05:07:00+00:00',
               '2021-01-01 05:08:00+00:00', '2021-01-01 05:09:00+00:00',
               ...
               '2022-01-01 04:51:00+00:00', '2022-01-01 04:52:00+00:00',
               '2022-01-01 04:53:00+00:00', '2022-01-01 04:54:00+00:00',
               '2022-01-01 04:55:00+00:00', '2022-01-01 04:56:00+00:00',
               '2022-01-01 04:57:00+00:00', '2022-01-01 04:58:00+00:00',
               '2022-01-01 04:59:00+00:00', '2022-01-01 05:00:00+00:00'],
              dtype='datetime64[ns, UTC]', length=525601, freq=None)

In [51]:
tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
tz = pytz.timezone(tz_str)
# start_time = datetime(2021, 1, 1, tzinfo=tz)
# start_time = datetime(2021, 1, 1)
# pd.to_datetime(start_time).tz_localize(tz)

start_time = pd.to_datetime(datetime(year, 1, 1)).tz_localize(tz)
end_time = pd.to_datetime(datetime(year+1, 1, 1)).tz_localize(tz)

pd.date_range(start_time, end_time, freq='min').tz_convert('UTC').to_pydatetime()

numpy.ndarray

In [67]:
lon = -71.057083
lat = 42.361145

tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
tz = pytz.timezone(tz_str)
# start_time = datetime(year, 1, 1, tzinfo='tz')
# offset = tz.utcoffset(datetime(year, 1, 1, tzinfo=tz), is_dst=False)
# start_time = datetime(year, 1, 1, tzinfo=pytz.UTC) - offset
# end_time = datetime(year+1, 1, 1, tzinfo=pytz.UTC) - offset
# np.arange(start_time, end_time, dtype='datetime64[m]').astype(datetime)
offset = tz.utcoffset(datetime(year, 1, 1, tzinfo=tz), is_dst=False)
start_time = pd.to_datetime(datetime(year, 1, 1, tzinfo=pytz.UTC)) - offset
end_time = pd.to_datetime(datetime(year+1, 1, 1, tzinfo=pytz.UTC)) - offset


pd.date_range(start_time, end_time, freq='min').to_pydatetime()

array([datetime.datetime(2021, 1, 1, 4, 56, tzinfo=<UTC>),
       datetime.datetime(2021, 1, 1, 4, 57, tzinfo=<UTC>),
       datetime.datetime(2021, 1, 1, 4, 58, tzinfo=<UTC>), ...,
       datetime.datetime(2022, 1, 1, 4, 54, tzinfo=<UTC>),
       datetime.datetime(2022, 1, 1, 4, 55, tzinfo=<UTC>),
       datetime.datetime(2022, 1, 1, 4, 56, tzinfo=<UTC>)], dtype=object)

In [84]:
dan = pytz.timezone(tz_str).normnalize(datetime(year, 1, 1, tzinfo=timezone('UTC')))

offset = tz.utcoffset(dan, is_dst=False)
print(offset)
offset = tz.utcoffset(dan, is_dst=True)
print(offset)

AttributeError: 'America/New_York' object has no attribute 'normnalize'

In [80]:
tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
pytz.timezone(tz_str)

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

In [91]:
start_time = pd.to_datetime(datetime(year, 1, 1)).tz_localize(tz)
start_time.utcoffset()

start_time = pd.to_datetime(datetime(year, 1, 1)).tz_localize(tz).tz_convert('UTC')
end_time = pd.to_datetime(datetime(year+1, 1, 1)).tz_localize(tz).tz_convert('UTC')

pd.date_range(start_time, end_time, freq='min')

DatetimeIndex(['2021-01-01 05:00:00+00:00', '2021-01-01 05:01:00+00:00',
               '2021-01-01 05:02:00+00:00', '2021-01-01 05:03:00+00:00',
               '2021-01-01 05:04:00+00:00', '2021-01-01 05:05:00+00:00',
               '2021-01-01 05:06:00+00:00', '2021-01-01 05:07:00+00:00',
               '2021-01-01 05:08:00+00:00', '2021-01-01 05:09:00+00:00',
               ...
               '2022-01-01 04:51:00+00:00', '2022-01-01 04:52:00+00:00',
               '2022-01-01 04:53:00+00:00', '2022-01-01 04:54:00+00:00',
               '2022-01-01 04:55:00+00:00', '2022-01-01 04:56:00+00:00',
               '2022-01-01 04:57:00+00:00', '2022-01-01 04:58:00+00:00',
               '2022-01-01 04:59:00+00:00', '2022-01-01 05:00:00+00:00'],
              dtype='datetime64[ns, UTC]', length=525601, freq='T')

In [7]:
year = 2021

lon = 144.9631
lat = -37.8136


print('Building array of datetime objects...')
bef = datetime.now()
arr_dt = base_date_arr(year)  # naive timestamps as datetime.datetime
af = datetime.now() - bef
print(f'Took {af}')

print('Converting dates to UTC...')
bef = datetime.now()
arr_utc = to_utc(arr_dt, lon, lat, use_dst=True)
af = datetime.now() - bef
print(f'Took {af}')

Building array of datetime objects...
Took 0:00:00.038606
Converting dates to UTC...
Took 0:00:08.752671


In [30]:
import pandas as pd
dt_pd = pd.to_datetime(arr_dt)
utc_localized = dt_pd.tz_localize("UTC")

tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
tz = pytz.timezone(tz_str)
# offset = tz.utcoffset(dt_pd[0], is_dst=False)
# (dt_pd - offset).tz_localize("UTC")
utc_localized.tz_convert(tz)

DatetimeIndex(['2021-01-01 11:00:00+11:00', '2021-01-01 11:01:00+11:00',
               '2021-01-01 11:02:00+11:00', '2021-01-01 11:03:00+11:00',
               '2021-01-01 11:04:00+11:00', '2021-01-01 11:05:00+11:00',
               '2021-01-01 11:06:00+11:00', '2021-01-01 11:07:00+11:00',
               '2021-01-01 11:08:00+11:00', '2021-01-01 11:09:00+11:00',
               ...
               '2022-01-01 10:50:00+11:00', '2022-01-01 10:51:00+11:00',
               '2022-01-01 10:52:00+11:00', '2022-01-01 10:53:00+11:00',
               '2022-01-01 10:54:00+11:00', '2022-01-01 10:55:00+11:00',
               '2022-01-01 10:56:00+11:00', '2022-01-01 10:57:00+11:00',
               '2022-01-01 10:58:00+11:00', '2022-01-01 10:59:00+11:00'],
              dtype='datetime64[ns, Australia/Melbourne]', length=525600, freq=None)

In [None]:
# bef = datetime.now()

# tz_str = tzwhere.tzwhere().tzNameAt(lat, lon)
# tz = pytz.timezone(tz_str)
# # tz.localize(arr_dt).astimezone(pytz.utc)

# af = datetime.now() - bef
# print(f'Took {af}')

In [13]:
# from datetime import datetime
# lon = 144.9631
# lat = -37.8136


# bef = datetime.now()

# import timezonefinder

# tf = timezonefinder.TimezoneFinder()
# timezone_str = tf.certain_timezone_at(lat=lat, lng=lon)

# af = datetime.now() - bef
# print(f'Took {af}')

In [None]:
# offset = tz.utcoffset(arr_dt[0], is_dst=False)
# (arr_dt - offset).replace(tzinfo=timezone.utc)

In [14]:
# test = np.array([arr_dt[0]])
# test

In [15]:
# lon = 144.9631
# lat = -37.8136
# tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
# tz = pytz.timezone(tz_str)
# tz.localize(test).astimezone(pytz.utc)

    # tz_str = tzwhere.tzwhere().tzNameAt(lat, lon)
    
tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
tz = pytz.timezone(tz_str)

def one_dt_utc(dt):
    dt_offset = (dt - offset)
    return dt_offset.replace(tzinfo=timezone.utc)

def one_dt_localized(dt):
    return tz.localize(dt).astimezone(pytz.utc)

if use_dst:
    return np.vectorize(one_dt_localized)(arr)
else:
    # offset = tz.utcoffset(arr[0, 0], is_dst=False)
    offset = tz.utcoffset(arr[0], is_dst=False)
    return np.vectorize(one_dt_utc)(arr)

In [None]:
import pandas as pd

def to_utc_pd(arr, lon, lat, use_dst: bool):
    dt_pd = pd.to_datetime(arr)
    
    tz_str = timezonefinder.TimezoneFinder().certain_timezone_at(lat=lat, lng=lon)
    tz = pytz.timezone(tz_str)
    
    if use_dst:
        return dt_pd.tz_localize(tz).values
    else:
        offset = tz.utcoffset(dt_pd[0], is_dst=False)
        return (dt_pd - offset).tz_localize("UTC").values
        