In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# datetime

In [2]:
import datetime as dt
import time

Basic python module datetime provides date, time and datetime objects. They can be converted to/from strings, can contain timezone information. Can get date and time components from datetime object.

In [83]:
dt.datetime(year=2020, month=1, day=31, hour=13, minute=14, second=31)  # usually omit keywords
d
dt.date(2020, 1, 3)
dt.time(12, 32)
d.date(), d.time()

datetime.datetime(2020, 1, 31, 13, 14, 31)

datetime.datetime(2020, 10, 20, 0, 0)

datetime.date(2020, 1, 3)

datetime.time(12, 32)

(datetime.date(2020, 10, 20), datetime.time(0, 0))

In [81]:
d = dt.datetime.strptime('2020-10-20', r'%Y-%m-%d')
d

d.strftime('%m/%d/%Y')

datetime.datetime(2020, 10, 20, 0, 0)

'10/20/2020'

In [82]:
date = dt.date(2020, 1, 20)
time = dt.time(11, 23)
dt.datetime.combine(date, time)

datetime.datetime(2020, 1, 20, 11, 23)

In [5]:
# get today or now
dt.date.today()
dt.datetime.now()

datetime.date(2020, 9, 15)

datetime.datetime(2020, 9, 15, 20, 11, 33, 225456)

###  timedelta objects

In [7]:
# time delta

td = dt.datetime.now() - dt.datetime(2020, 2, 1)
td
type(td)
td.days

datetime.timedelta(days=227, seconds=72693, microseconds=350453)

datetime.timedelta

227

### computer system time representation

* all computers count time from Unit epoch instant : 1970-01-01:00-00-00 UTC

* UTC is never adjust to daylight saving, etc - the only stable timezone

* On 32-bit systems the time will overflow on 19 Jan 2038 - has to be solved in advance (similar to 2K problem)

* It's preferred to use datetime rather time module
* There may be some minor simplifications when dealing with time due to leap seconds, etc

In [3]:
time.time()  # number of seconds since Unit epoch

1600197093.088696

### naive and aware datetime objects
* naive objects don't contain any timezone information
* aware objects contain timezone

* naive instances do not represent well defined moment in time
* difference is determined by **tzinfo** attribute
* python 3.9 will have new implementation zoneinfo for tzinfo, dateutils also has tzinfo implementations

* working with datetimes in different timezones is complicated for multiple reasons
* one is irregularity of daylight saving time periods - that's why time itself cannot be converted between different timezones without a date (!!!)

In [86]:
import dateutil
import pytz

In [87]:
lon_tz = dateutil.tz.gettz('Europe/London')
lon_tz

# datetime with timezone
dtz = dt.datetime(2010, 12, 21, tzinfo=lon_tz)
dtz

dtz.tzname()

tzfile('/usr/share/zoneinfo/Europe/London')

datetime.datetime(2010, 12, 21, 0, 0, tzinfo=tzfile('/usr/share/zoneinfo/Europe/London'))

'GMT'

### timezone localization and convertion using pytz

In [92]:
pytz.utc  # UTC timezone
eastern = pytz.timezone('US/Eastern')  # timezone() returns tzinfo instance for tz aware datetime
eastern.zone  # return zone string
amsterdam = timezone('Europe/Amsterdam')

<UTC>

'US/Eastern'

In [94]:
dtm = dt.datetime(2010, 12, 21, 10)
dtz = amsterdam.localize(dtm)  #localize naive datetime

dtm
dtz
dtz.astimezone(eastern)

datetime.datetime(2010, 12, 21, 10, 0)

datetime.datetime(2010, 12, 21, 10, 0, tzinfo=<DstTzInfo 'Europe/Amsterdam' CET+1:00:00 STD>)

datetime.datetime(2010, 12, 21, 4, 0, tzinfo=<DstTzInfo 'US/Eastern' EST-1 day, 19:00:00 STD>)

In [96]:
pytz.all_timezones[:10]  # list of all timezones (15 shown here) - quite useful to have

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau']

### relative delta functionality (equivalents in pandas ??!)

In [12]:
# relativedelta to use months, years etc
from dateutil.relativedelta import relativedelta
next_month = relativedelta(months=+1)
dt.date.today() + next_month  

relativedelta(dt.date.today(), dt.date.today() + dt.timedelta(days = 1000))  # days to (years, months, days)

dt.date.today() + relativedelta(years=+1, months=-1)  # next year, one month before

datetime.date(2020, 10, 15)

relativedelta(years=-2, months=-8, days=-27)

datetime.date(2021, 8, 15)

In [13]:
# relativedelta has many other uses
import calendar
from dateutil import rrule

today = dt.date.today()

today + relativedelta(weekday=rrule.FR)  # nearest friday
today + relativedelta(day=31, weekday=rrule.FR(-1))  # nearest friday

datetime.date(2020, 9, 18)

datetime.date(2020, 9, 25)

In [14]:
# dateutil.rrule
list(rrule.rrule(freq=rrule.MONTHLY, count=4, dtstart=today))  # returns an iterator

[datetime.datetime(2020, 9, 15, 0, 0),
 datetime.datetime(2020, 10, 15, 0, 0),
 datetime.datetime(2020, 11, 15, 0, 0),
 datetime.datetime(2020, 12, 15, 0, 0)]

In [15]:
# every January select days by a list of weekdays
list(rrule.rrule(rrule.YEARLY, bymonth=1, byweekday=[1, 3, 5], dtstart=today,
             until=today+relativedelta(years=2)))

[datetime.datetime(2021, 1, 2, 0, 0),
 datetime.datetime(2021, 1, 5, 0, 0),
 datetime.datetime(2021, 1, 7, 0, 0),
 datetime.datetime(2021, 1, 9, 0, 0),
 datetime.datetime(2021, 1, 12, 0, 0),
 datetime.datetime(2021, 1, 14, 0, 0),
 datetime.datetime(2021, 1, 16, 0, 0),
 datetime.datetime(2021, 1, 19, 0, 0),
 datetime.datetime(2021, 1, 21, 0, 0),
 datetime.datetime(2021, 1, 23, 0, 0),
 datetime.datetime(2021, 1, 26, 0, 0),
 datetime.datetime(2021, 1, 28, 0, 0),
 datetime.datetime(2021, 1, 30, 0, 0),
 datetime.datetime(2022, 1, 1, 0, 0),
 datetime.datetime(2022, 1, 4, 0, 0),
 datetime.datetime(2022, 1, 6, 0, 0),
 datetime.datetime(2022, 1, 8, 0, 0),
 datetime.datetime(2022, 1, 11, 0, 0),
 datetime.datetime(2022, 1, 13, 0, 0),
 datetime.datetime(2022, 1, 15, 0, 0),
 datetime.datetime(2022, 1, 18, 0, 0),
 datetime.datetime(2022, 1, 20, 0, 0),
 datetime.datetime(2022, 1, 22, 0, 0),
 datetime.datetime(2022, 1, 25, 0, 0),
 datetime.datetime(2022, 1, 27, 0, 0),
 datetime.datetime(2022, 1, 29, 0

In [16]:
# monthly on the second Friday, 10 occurencies
list(rrule.rrule(rrule.MONTHLY, count=10, byweekday=rrule.FR(2), dtstart=today))

[datetime.datetime(2020, 10, 9, 0, 0),
 datetime.datetime(2020, 11, 13, 0, 0),
 datetime.datetime(2020, 12, 11, 0, 0),
 datetime.datetime(2021, 1, 8, 0, 0),
 datetime.datetime(2021, 2, 12, 0, 0),
 datetime.datetime(2021, 3, 12, 0, 0),
 datetime.datetime(2021, 4, 9, 0, 0),
 datetime.datetime(2021, 5, 14, 0, 0),
 datetime.datetime(2021, 6, 11, 0, 0),
 datetime.datetime(2021, 7, 9, 0, 0)]

lots of tricks for individual dates can be done with dateutils, see https://dateutil.readthedocs.io/en/stable/index.html

# numpy datetime64

* python native datetime is a bit too simplistic and also does provide methods to work with sequences of dates (can only have an ndarray of datetimes or dates)

* numpy implements np.datetime64 type which also serves as dtype for corresponding np.ndarray

* np.datetime64[ns] is a general type (machine independent), on a particular machine it may be represented by '<M8[ns]' '>M8[ns]' (little/big endian etc)
(more generally, see [here](https://stackoverflow.com/questions/29206612/difference-between-data-type-datetime64ns-and-m8ns#:~:text=2%20Answers&text=datetime64%5Bns%5D%20is%20a%20general,of%20NumPy%20to%20the%20next.&text=However%2C%20on%20a%20big%20endian%20machine%2C%20np.), same as int64 and '<i8', '>i8')

In [104]:
# create single np.datetime64 object
np.datetime64(date_iso)  # from ISO string
np.datetime64(dt.date(2020, 1, 1))  # from datetime objects
np.datetime64(dt.datetime(2020, 1, 1, 10, 30))

numpy.datetime64('2020-08-14')

numpy.datetime64('2020-01-01')

numpy.datetime64('2020-01-01T10:30:00.000000')

In [107]:
dtz
np.datetime64(dtz)  # timezones are deprecated - NOT TO USE

datetime.datetime(2010, 12, 21, 10, 0, tzinfo=<DstTzInfo 'Europe/Amsterdam' CET+1:00:00 STD>)

  np.datetime64(dtz)  # timezones are deprecated - NOT TO USE


numpy.datetime64('2010-12-21T09:00:00.000000')

In [118]:
# datetime64 has unit parameter
np.datetime64('2020', 'W')
np.datetime64('2020', 'M')
np.datetime64('2020', 'Y')

numpy.datetime64('2019-12-26')

numpy.datetime64('2020-01')

numpy.datetime64('2020')

In [120]:
# for numpy version unit parameter may be useful in many ways, e.g.
np.arange('2005-02', '2005-03', dtype='datetime64[D]')
np.arange('2005-02', '2005-03', dtype='datetime64[W]')  # weekly frequency
# see more in the numpy guide (or put datetime related part here)

array(['2005-02-01', '2005-02-02', '2005-02-03', '2005-02-04',
       '2005-02-05', '2005-02-06', '2005-02-07', '2005-02-08',
       '2005-02-09', '2005-02-10', '2005-02-11', '2005-02-12',
       '2005-02-13', '2005-02-14', '2005-02-15', '2005-02-16',
       '2005-02-17', '2005-02-18', '2005-02-19', '2005-02-20',
       '2005-02-21', '2005-02-22', '2005-02-23', '2005-02-24',
       '2005-02-25', '2005-02-26', '2005-02-27', '2005-02-28'],
      dtype='datetime64[D]')

array(['2005-01-27', '2005-02-03', '2005-02-10', '2005-02-17'],
      dtype='datetime64[W]')

In [121]:
# create array
dtarr = np.array([dt.date(2020, 1, 1), dt.date(2020, 1, 3), dt.date(2020, 2, 1)]).astype(np.datetime64)
dtarr  # unit is D (days)

array(['2020-01-01', '2020-01-03', '2020-02-01'], dtype='datetime64[D]')

In [122]:
dtarr.astype('datetime64[ns]')  # convert unit

array(['2020-01-01T00:00:00.000000000', '2020-01-03T00:00:00.000000000',
       '2020-02-01T00:00:00.000000000'], dtype='datetime64[ns]')

* **pandas only implements an equivalent of datetime64[ns]  (see on pandas objects later)**
* it has other ways to deal with ranges and frequencies

# pandas datetime functionality
* interaction with xarray

In [123]:
import pandas as pd
import xarray as xr

In [124]:
# pandas basic individual datetime type - Timestamp
# use to_datetime to convert to Timestamp
pd.to_datetime(dt.date(2020, 1, 2))

Timestamp('2020-01-02 00:00:00')

In [144]:
# to_datetime can take lots of different things as argument
pd.to_datetime('08/14/2020')
pd.to_datetime(np.datetime64(date_iso))
# always better to have ISO string date as argument and specifically parse it

date_iso = '2020-08-14'
tst = pd.to_datetime(date_iso, format='%Y-%m-%d')
tst
# pd.to_datetime('09/14/2020', format='%Y-%m-%d')  # other formats won't work in this case

Timestamp('2020-08-14 00:00:00')

Timestamp('2020-08-14 00:00:00')

Timestamp('2020-08-14 00:00:00')

In [152]:
# convert to other formats
tst.to_datetime64()
tst.strftime('%m/%d/%Y')

# to native datetime objects
tst.to_pydatetime()
tst.date()
tst.time()

numpy.datetime64('2020-08-14T00:00:00.000000000')

'08/14/2020'

datetime.datetime(2020, 8, 14, 0, 0)

datetime.date(2020, 8, 14)

datetime.time(0, 0)

### pandas datetime arrays

In [131]:
dates = pd.to_datetime(['2020-01-01', '2020-01-02', '2020-01-03'])
dates

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03'], dtype='datetime64[ns]', freq=None)

In [134]:
pd.Index(['2020-01-01', '2020-01-02', '2020-01-03'])  #could create simple index from it
# to_datetime applied to array-like object create DatetimeIndex

# it has dtype and freq parameters
# dtype is always datetime64[ns] 
# (only ns is implemented, it's pandas 'equivalent' to numpy.datetime64[ns])
# unlike numpy, it can be tz aware
dates.tz_localize('Europe/London')  # getting datetime64[ns, Europe/London]

Index(['2020-01-01', '2020-01-02', '2020-01-03'], dtype='object')

DatetimeIndex(['2020-01-01 00:00:00+00:00', '2020-01-02 00:00:00+00:00',
               '2020-01-03 00:00:00+00:00'],
              dtype='datetime64[ns, Europe/London]', freq=None)

In [137]:
ser = pd.Series(['2020-01-01', '2020-01-02', '2020-01-03'])
ser
pd.to_datetime(ser)  # returns series when applied to series, DatetimeIndex otherwise

0    2020-01-01
1    2020-01-02
2    2020-01-03
dtype: object

0   2020-01-01
1   2020-01-02
2   2020-01-03
dtype: datetime64[ns]

In [153]:
# convert DatetimeIndex to other formats
dates.date
dates.time
dates.strftime('%Y-%m-%d')
dates.values
dates.to_pydatetime()

array([datetime.date(2020, 1, 1), datetime.date(2020, 1, 2),
       datetime.date(2020, 1, 3)], dtype=object)

array([datetime.time(0, 0), datetime.time(0, 0), datetime.time(0, 0)],
      dtype=object)

Index(['2020-01-01', '2020-01-02', '2020-01-03'], dtype='object')

array(['2020-01-01T00:00:00.000000000', '2020-01-02T00:00:00.000000000',
       '2020-01-03T00:00:00.000000000'], dtype='datetime64[ns]')

array([datetime.datetime(2020, 1, 1, 0, 0),
       datetime.datetime(2020, 1, 2, 0, 0),
       datetime.datetime(2020, 1, 3, 0, 0)], dtype=object)

In [166]:
dates2 = pd.to_datetime(['2022-01-01', '2022-01-02'])

# set-like DatetimeIndex operations
dates.union(dates2)
dates.intersection(dates2)
dates.difference(dates2)

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2022-01-01',
               '2022-01-02'],
              dtype='datetime64[ns]', freq=None)

DatetimeIndex([], dtype='datetime64[ns]', freq=None)

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03'], dtype='datetime64[ns]', freq=None)

### creating ranges

In [157]:
pd.date_range('2020-01-01', '2020-01-05')
pd.date_range('2020-01-01', periods=5, freq='W')

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05'],
              dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2020-01-05', '2020-01-12', '2020-01-19', '2020-01-26',
               '2020-02-02'],
              dtype='datetime64[ns]', freq='W-SUN')

In [163]:
dates = pd.bdate_range('2020-01-01', '2020-01-05') # has frequency
dates.union(pd.to_datetime(['2020-02-02']))  # freq is None

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-02-02'], dtype='datetime64[ns]', freq=None)

### more ranges
### slicing
### timezones, convertions with xarray
### timedeltas and offsets
### offsets and freqs correspondance
### periods
### relativedelta tricks equivalences

read full pandas.pydata documentation on datetimes 

In [31]:
pd.Series(index=dates)  # has frequency if index has
x = pd.DataFrame(index=dates, columns=list('abc'))

  pd.Series(index=dates)  # has frequency if index has


2020-01-01   NaN
2020-01-02   NaN
2020-01-03   NaN
2020-01-06   NaN
2020-01-07   NaN
              ..
2020-05-26   NaN
2020-05-27   NaN
2020-05-28   NaN
2020-05-29   NaN
2020-06-01   NaN
Freq: B, Length: 109, dtype: float64

In [32]:
x.index.freq
# df does not have frequency but .index has

<BusinessDay>

In [33]:
pd.Series(index=dates2)

  pd.Series(index=dates2)


2020-01-01   NaN
2020-01-02   NaN
2020-01-03   NaN
2020-01-06   NaN
2020-01-07   NaN
              ..
2022-01-27   NaN
2022-01-28   NaN
2022-01-29   NaN
2022-01-30   NaN
2022-01-31   NaN
Length: 140, dtype: float64

In [39]:
dates[dates < d]

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-06',
               '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10',
               '2020-01-13', '2020-01-14',
               ...
               '2020-05-19', '2020-05-20', '2020-05-21', '2020-05-22',
               '2020-05-25', '2020-05-26', '2020-05-27', '2020-05-28',
               '2020-05-29', '2020-06-01'],
              dtype='datetime64[ns]', length=109, freq='B')

In [41]:
 #dates[dates < dt.date(2020,3,3)]

In [42]:
dt.date(2020,3,3) < dates.values[0]

TypeError: '>' not supported between instances of 'int' and 'datetime.date'

In [None]:
dates[0]

In [43]:
dates.values[0]

numpy.datetime64('2020-01-01T00:00:00.000000000')

In [49]:
dates[0].to_datetime64()

numpy.datetime64('2020-01-01T00:00:00.000000000')

In [50]:
dates.values.dtype

dtype('<M8[ns]')

In [51]:
dates.values.astype(np.datetime64).dtype

dtype('<M8[ns]')

In [52]:
# https://stackoverflow.com/questions/29206612/difference-between-data-type-datetime64ns-and-m8ns
# difference between np.datetime64 and '<M8[ns]'

In [53]:
tzdates = pd.date_range('20130101',periods=3,tz='US/Eastern')

In [54]:
tzdates.dtype

datetime64[ns, US/Eastern]

In [55]:
tzdates.values.dtype

dtype('<M8[ns]')

In [56]:
# timezone aware index and xarray

df = pd.DataFrame(0, index=tzdates, columns=list('ABC'))
df.index.name = 'date'
df.columns.name = 'field'
df.index = df.index.tz_convert('UTC').tz_localize(None)
df

field,A,B,C
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-01 05:00:00,0,0,0
2013-01-02 05:00:00,0,0,0
2013-01-03 05:00:00,0,0,0


In [57]:
xarr = xr.DataArray(df)

In [58]:
xarr

In [59]:
df_new = xarr.to_pandas()
df_new.index = df_new.index.tz_localize('UTC').tz_convert('US/Eastern')
df_new
df

field,A,B,C
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-01 00:00:00-05:00,0,0,0
2013-01-02 00:00:00-05:00,0,0,0
2013-01-03 00:00:00-05:00,0,0,0


field,A,B,C
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-01 05:00:00,0,0,0
2013-01-02 05:00:00,0,0,0
2013-01-03 05:00:00,0,0,0


In [60]:
d

numpy.datetime64('2020-08-14')

In [61]:
df.index

DatetimeIndex(['2013-01-01 05:00:00', '2013-01-02 05:00:00',
               '2013-01-03 05:00:00'],
              dtype='datetime64[ns]', name='date', freq=None)

In [62]:
dates.tz_localize('Europe/London')

DatetimeIndex(['2020-01-01 00:00:00+00:00', '2020-01-02 00:00:00+00:00',
               '2020-01-03 00:00:00+00:00', '2020-01-06 00:00:00+00:00',
               '2020-01-07 00:00:00+00:00', '2020-01-08 00:00:00+00:00',
               '2020-01-09 00:00:00+00:00', '2020-01-10 00:00:00+00:00',
               '2020-01-13 00:00:00+00:00', '2020-01-14 00:00:00+00:00',
               ...
               '2020-05-19 00:00:00+01:00', '2020-05-20 00:00:00+01:00',
               '2020-05-21 00:00:00+01:00', '2020-05-22 00:00:00+01:00',
               '2020-05-25 00:00:00+01:00', '2020-05-26 00:00:00+01:00',
               '2020-05-27 00:00:00+01:00', '2020-05-28 00:00:00+01:00',
               '2020-05-29 00:00:00+01:00', '2020-06-01 00:00:00+01:00'],
              dtype='datetime64[ns, Europe/London]', length=109, freq=None)

In [63]:
# xarray stores numpy datetime64 dates, and so does not support tz aware dates; to_pandas will convert timezone aware 
# dates to epochs ns; then to_pandas will make correct UTC datetimes from them;
# better to convert datetimes to UTC to store in xarray; then when converting to pandas - get UTC
# and convert back to the original timezone

In [64]:
ofst = pd.offsets.BDay(1)
type(ofst)

pandas._libs.tslibs.offsets.BusinessDay

As mentioned in the Overview in the Time Series docs, https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#overview, DateOffset respect calendar arithmetic while Timedelta respects absolute time arithmetic.

### periods

In [68]:
pd.Period('2020-01', freq='Q').end()

AttributeError: 'Period' object has no attribute 'end'