In [1]:
import pandas as pd 
import numpy as np 

dt = '20110107'
dt = pd.to_datetime(dt)

In [2]:
dt

Timestamp('2011-01-07 00:00:00')

In [3]:
pd.to_datetime('2011-01-07')

Timestamp('2011-01-07 00:00:00')

In [4]:
# get time fields
print (dt.year)

print (dt.month)

print (dt.day)

# maybe you care about price behavior on a particular hour, minute or second of the day?
print (dt.hour)

print (dt.minute)

print (dt.second)

2011
1
7
0
0
0


In [5]:
# useful for checking if weekend
dt.weekday()

4

In [6]:
# handles hours/minute/seconds as well
pd.to_datetime('20110107 12:10:30')

Timestamp('2011-01-07 12:10:30')

In [7]:
# convert back to a string
dt.strftime('%Y%m%d')

'20110107'

Check documentation for other string formatting codes: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [8]:
# can shift dates
dt + pd.tseries.offsets.Day()

Timestamp('2011-01-08 00:00:00')

In [9]:
# can shift by multiple time units
dt + pd.tseries.offsets.Day(2)

Timestamp('2011-01-09 00:00:00')

In [10]:
# shift only by business day (skip weekends)
dt + pd.tseries.offsets.BDay()

Timestamp('2011-01-10 00:00:00')

In [11]:
# roll forward / backward
print (pd.tseries.offsets.MonthEnd().rollforward(dt))
print (pd.tseries.offsets.MonthEnd().rollback(dt))

2011-01-31 00:00:00
2010-12-31 00:00:00


Check documentation for other offsets: https://pandas.pydata.org/docs/reference/offset_frequency.html

In [34]:
# subtract two times 
# maybe for checking days till next earnings?

dt1 = pd.to_datetime('20110101')
dt2 = pd.to_datetime('20120630')
print(dt1)
print(dt2)

diff = dt2 - dt1
diff 

2011-01-01 00:00:00
2012-06-30 00:00:00


Timedelta('546 days 00:00:00')

In [35]:
diff.days

546

In [36]:
diff

Timedelta('546 days 00:00:00')

In [37]:
# faster way to create multiple timestamp objects. useful for converting excel dates from strings to timestamps
date = ['20110102','20110103','20110105']
dt_index = pd.DatetimeIndex(date)
dt_index

DatetimeIndex(['2011-01-02', '2011-01-03', '2011-01-05'], dtype='datetime64[ns]', freq=None)

In [38]:
dt_index[0]

Timestamp('2011-01-02 00:00:00')

In [39]:
## get time range 
days = pd.date_range(dt1,dt2,freq='D')
days

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2012-06-21', '2012-06-22', '2012-06-23', '2012-06-24',
               '2012-06-25', '2012-06-26', '2012-06-27', '2012-06-28',
               '2012-06-29', '2012-06-30'],
              dtype='datetime64[ns]', length=547, freq='D')

In [49]:
# can use strings as inputs and also use monthly frequency
months = pd.date_range('20110101','20121231',freq='M')
months

  months = pd.date_range('20110101','20121231',freq='M')


DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31',
               '2012-01-31', '2012-02-29', '2012-03-31', '2012-04-30',
               '2012-05-31', '2012-06-30', '2012-07-31', '2012-08-31',
               '2012-09-30', '2012-10-31', '2012-11-30', '2012-12-31'],
              dtype='datetime64[ns]', freq='ME')

See documentation for other frequencies: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases

In [44]:
## DataFrames with pandas datetime index 
univ = ['SPY','TLT','VXX','QQQ']
days = pd.date_range('20190101','20210630',freq='min')
df = pd.DataFrame(np.random.randn(len(days),len(univ)),index=days,columns=univ)
df

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,1.618166,-2.738568,-1.630197,2.408701
2019-01-01 00:01:00,1.021112,-0.427073,0.529318,-0.771058
2019-01-01 00:02:00,-1.315814,-1.359258,-0.686280,-0.526976
2019-01-01 00:03:00,0.051565,0.190940,-1.661638,-0.879876
2019-01-01 00:04:00,-1.179026,-1.749931,0.752927,1.902366
...,...,...,...,...
2021-06-29 23:56:00,-0.437594,0.617012,0.159898,0.545339
2021-06-29 23:57:00,2.025051,-1.083892,-0.159526,0.884442
2021-06-29 23:58:00,-2.176453,-0.512629,-1.487678,0.734907
2021-06-29 23:59:00,-0.528031,-1.018049,0.124548,1.993652


In [50]:
df.loc['2019-01-01 00:00:00']

SPY    1.618166
TLT   -2.738568
VXX   -1.630197
QQQ    2.408701
Name: 2019-01-01 00:00:00, dtype: float64

In [51]:
df.loc['2019-01-01']

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,1.618166,-2.738568,-1.630197,2.408701
2019-01-01 00:01:00,1.021112,-0.427073,0.529318,-0.771058
2019-01-01 00:02:00,-1.315814,-1.359258,-0.686280,-0.526976
2019-01-01 00:03:00,0.051565,0.190940,-1.661638,-0.879876
2019-01-01 00:04:00,-1.179026,-1.749931,0.752927,1.902366
...,...,...,...,...
2019-01-01 23:55:00,-1.238921,-0.537953,1.112248,0.564175
2019-01-01 23:56:00,-0.067810,0.274642,-1.073771,0.651298
2019-01-01 23:57:00,0.885956,-0.293423,0.697448,0.535240
2019-01-01 23:58:00,0.284411,1.021089,-1.007168,-1.551924


In [52]:
df.resample('M').sum()

  df.resample('M').sum()


Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-31,-63.241247,57.831485,-111.641882,-180.020662
2019-02-28,140.365287,278.066317,36.835114,-235.893329
2019-03-31,-149.170851,-108.832634,-433.966931,-199.669144
2019-04-30,-411.397929,175.323039,191.660174,19.167235
2019-05-31,96.227681,111.844991,223.332092,353.886383
2019-06-30,262.604177,-34.240613,173.967995,-249.082876
2019-07-31,-273.993472,131.772968,64.050259,67.028133
2019-08-31,485.367442,-501.820647,-34.762681,244.21273
2019-09-30,443.465297,6.844091,231.772454,-114.715207
2019-10-31,-211.69513,74.229901,-281.354789,-112.294739


In [53]:
# 5 minute high 
df.resample('5min').max()

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,1.618166,0.190940,0.752927,2.408701
2019-01-01 00:05:00,1.950695,1.932127,1.592176,1.876271
2019-01-01 00:10:00,2.460919,1.937586,0.786979,0.963111
2019-01-01 00:15:00,1.463353,0.870690,1.202587,0.850782
2019-01-01 00:20:00,0.428156,1.376779,0.637806,2.702120
...,...,...,...,...
2021-06-29 23:40:00,1.981675,1.329879,1.304437,0.095754
2021-06-29 23:45:00,2.322226,1.554655,1.652049,1.936296
2021-06-29 23:50:00,3.189520,1.085600,2.466998,0.715054
2021-06-29 23:55:00,2.188835,0.651735,1.163902,1.993652


In [54]:
# close
df.resample('5min').last()

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,-1.179026,-1.749931,0.752927,1.902366
2019-01-01 00:05:00,1.488540,0.881002,-0.857759,-0.733317
2019-01-01 00:10:00,1.046961,-1.236226,0.458063,-1.572681
2019-01-01 00:15:00,-1.133360,-0.490527,0.034679,-0.150706
2019-01-01 00:20:00,-0.441571,-0.250846,0.637806,-0.829735
...,...,...,...,...
2021-06-29 23:40:00,0.092104,-0.259391,-1.132514,0.095754
2021-06-29 23:45:00,0.421597,1.554655,-1.201277,1.936296
2021-06-29 23:50:00,0.969531,-1.688555,-0.380536,0.222917
2021-06-29 23:55:00,-0.528031,-1.018049,0.124548,1.993652


Again, refer to documentation for how to specify the frequencies: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases


Many other topics on time series in pandas not covered here!