In [1]:
import pandas as pd 
import numpy as np 

dt = '20110107'
dt = pd.to_datetime(dt)

In [2]:
dt

Timestamp('2011-01-07 00:00:00')

In [3]:
pd.to_datetime('2011-01-07')

Timestamp('2011-01-07 00:00:00')

In [4]:
# get time fields
print (dt.year)

print (dt.month)

print (dt.day)

# maybe you care about price behavior on a particular hour, minute or second of the day?
print (dt.hour)

print (dt.minute)

print (dt.second)

2011
1
7
0
0
0


In [5]:
# useful for checking if weekend
dt.weekday()

4

In [6]:
# handles hours/minute/seconds as well
pd.to_datetime('20110107 12:10:30')

Timestamp('2011-01-07 12:10:30')

In [8]:
# convert back to a string
dt.strftime('%Y%m%d')

'20110107'

Check documentation for other string formatting codes: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [9]:
# can shift dates
dt + pd.tseries.offsets.Day()

Timestamp('2011-01-08 00:00:00')

In [10]:
# can shift by multiple time units
dt + pd.tseries.offsets.Day(2)

Timestamp('2011-01-09 00:00:00')

In [11]:
# shift only by business day (skip weekends)
dt + pd.tseries.offsets.BDay()

Timestamp('2011-01-10 00:00:00')

In [13]:
# roll forward / backward
print (pd.tseries.offsets.MonthEnd().rollforward(dt))
print (pd.tseries.offsets.MonthEnd().rollback(dt))

2011-01-31 00:00:00
2010-12-31 00:00:00


Check documentation for other offsets: https://pandas.pydata.org/docs/reference/offset_frequency.html

In [14]:
# subtract two times 
# maybe for checking days till next earnings?

dt1 = pd.to_datetime('20110101')
dt2 = pd.to_datetime('20120630')

diff = dt2 - dt1
diff 

Timedelta('546 days 00:00:00')

In [15]:
diff.days

546

In [16]:
# faster way to create multiple timestamp objects. useful for converting excel dates from strings to timestamps
date = ['20110102','20110103','20110105']
dt_index = pd.DatetimeIndex(date)
dt_index

DatetimeIndex(['2011-01-02', '2011-01-03', '2011-01-05'], dtype='datetime64[us]', freq=None)

In [17]:
dt_index[0]

Timestamp('2011-01-02 00:00:00')

In [18]:
## get time range 
days = pd.date_range(dt1,dt2,freq='D')
days

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2012-06-21', '2012-06-22', '2012-06-23', '2012-06-24',
               '2012-06-25', '2012-06-26', '2012-06-27', '2012-06-28',
               '2012-06-29', '2012-06-30'],
              dtype='datetime64[us]', length=547, freq='D')

In [23]:
# can use strings as inputs and also use monthly frequency
months = pd.date_range('20110101','20121231',freq='ME')
months

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31',
               '2012-01-31', '2012-02-29', '2012-03-31', '2012-04-30',
               '2012-05-31', '2012-06-30', '2012-07-31', '2012-08-31',
               '2012-09-30', '2012-10-31', '2012-11-30', '2012-12-31'],
              dtype='datetime64[us]', freq='ME')

See documentation for other frequencies: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases

In [24]:
## DataFrames with pandas datetime index 
univ = ['SPY','TLT','VXX','QQQ']
days = pd.date_range('20190101','20210630',freq='min')
df = pd.DataFrame(np.random.randn(len(days),len(univ)),index=days,columns=univ)
df

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,0.073602,0.418672,-0.001450,2.170962
2019-01-01 00:01:00,-0.122390,-1.377134,0.082134,0.043347
2019-01-01 00:02:00,-1.628785,0.069387,-1.794052,0.498884
2019-01-01 00:03:00,0.293349,-0.351019,0.437874,0.501329
2019-01-01 00:04:00,-1.066152,-1.128060,-1.638994,0.124774
...,...,...,...,...
2021-06-29 23:56:00,-0.108013,-3.204518,-0.810625,-0.230524
2021-06-29 23:57:00,1.545043,1.215019,-0.313478,0.550376
2021-06-29 23:58:00,-0.826231,-0.032187,-0.109125,-1.477603
2021-06-29 23:59:00,-0.409581,0.256893,-0.663690,1.411136


In [25]:
df.loc['2019-01-01 00:00:00']

SPY    0.073602
TLT    0.418672
VXX   -0.001450
QQQ    2.170962
Name: 2019-01-01 00:00:00, dtype: float64

In [26]:
df.loc['2019-01-01']

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,0.073602,0.418672,-0.001450,2.170962
2019-01-01 00:01:00,-0.122390,-1.377134,0.082134,0.043347
2019-01-01 00:02:00,-1.628785,0.069387,-1.794052,0.498884
2019-01-01 00:03:00,0.293349,-0.351019,0.437874,0.501329
2019-01-01 00:04:00,-1.066152,-1.128060,-1.638994,0.124774
...,...,...,...,...
2019-01-01 23:55:00,0.422890,0.849893,0.099330,-0.549377
2019-01-01 23:56:00,-0.725307,1.299389,-0.198472,0.141894
2019-01-01 23:57:00,1.219611,2.257551,0.974259,-0.785142
2019-01-01 23:58:00,-0.742919,-2.616081,-1.986043,-1.184595


In [28]:
df.resample('ME').sum()

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-31,337.606731,54.60683,190.894353,-179.246616
2019-02-28,50.451215,24.649023,185.182391,127.756432
2019-03-31,211.210943,28.593614,240.783583,-297.814647
2019-04-30,208.729677,-255.392048,-438.459761,-44.849467
2019-05-31,-27.622305,54.577861,145.164634,133.244819
2019-06-30,156.158964,-471.752312,-350.257686,-1.678659
2019-07-31,-121.165238,-1.378506,181.757098,-197.580082
2019-08-31,-54.157465,293.725345,168.273099,340.128173
2019-09-30,-180.099114,-265.776522,-3.182284,-98.855471
2019-10-31,-88.431914,233.46446,-28.454785,-179.782261


In [29]:
# 5 minute high 
df.resample('5min').max()

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,0.293349,0.418672,0.437874,2.170962
2019-01-01 00:05:00,1.397086,0.500386,1.881369,2.085558
2019-01-01 00:10:00,1.630374,1.101348,2.197601,0.068951
2019-01-01 00:15:00,1.240618,1.908265,3.101760,0.011811
2019-01-01 00:20:00,0.679472,1.433124,-0.299282,0.916847
...,...,...,...,...
2021-06-29 23:40:00,0.227706,1.578435,0.092833,2.048649
2021-06-29 23:45:00,0.219328,1.292807,0.923667,1.650296
2021-06-29 23:50:00,1.610528,0.250995,0.119897,-0.238515
2021-06-29 23:55:00,1.545043,1.215019,-0.109125,1.411136


In [30]:
# close
df.resample('5min').last()

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,-1.066152,-1.128060,-1.638994,0.124774
2019-01-01 00:05:00,-0.087037,-1.074730,-0.184633,2.085558
2019-01-01 00:10:00,1.324826,0.442064,0.961096,-0.295070
2019-01-01 00:15:00,-1.343421,-1.070104,-0.401994,0.000523
2019-01-01 00:20:00,0.679472,1.066464,-0.299282,0.720235
...,...,...,...,...
2021-06-29 23:40:00,-0.854965,-0.306924,-1.032581,-0.564564
2021-06-29 23:45:00,-0.556610,-0.324340,-0.461973,1.650296
2021-06-29 23:50:00,0.326311,-1.539354,0.095164,-0.447626
2021-06-29 23:55:00,-0.409581,0.256893,-0.663690,1.411136


Again, refer to documentation for how to specify the frequencies: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases


Many other topics on time series in pandas not covered here!