# DateTimeIndex - Indexing

One of the main uses for DatetimeIndex is as an index for pandas objects. The DatetimeIndex class contains many time series related optimizations:

- A large range of dates for various offsets are pre-computed and cached under the hood in order to make generating subsequent date ranges very fast (just have to grab a slice).

- Fast shifting using the shift method on pandas objects.

- Unioning of overlapping DatetimeIndex objects with the same frequency is very fast (important for fast data alignment).

- Quick access to date fields via properties such as year, month, etc.

- Regularization functions like snap and very fast asof logic.

In [5]:
import numpy as np
import pandas as pd
import datetime

In [162]:
date = np.array('2012-06-04', dtype=np.datetime64)
date

array('2012-06-04', dtype='datetime64[D]')

In [163]:
# index = date + np.arange(4)
index = ['2012-06-04', '2012-06-04', '2012-06-05', '2012-06-06']
dataset = np.random.randint(10, size=[4,4])

In [164]:
data = pd.DataFrame(dataset, 
                    index = index, columns=['one', 'two', 'three', 'four'])

In [165]:
data

Unnamed: 0,one,two,three,four
2012-06-04,7,6,6,7
2012-06-04,4,5,4,6
2012-06-05,2,8,3,8
2012-06-06,2,6,3,6


In [166]:
# data.index = pd.DatetimeIndex(data.index)
data.index = pd.to_datetime(data.index)

In [167]:
data.index

DatetimeIndex(['2012-06-04', '2012-06-04', '2012-06-05', '2012-06-06'], dtype='datetime64[ns]', freq=None)

In [168]:
data.reset_index()

Unnamed: 0,index,one,two,three,four
0,2012-06-04,7,6,6,7
1,2012-06-04,4,5,4,6
2,2012-06-05,2,8,3,8
3,2012-06-06,2,6,3,6


In [169]:
data.index

DatetimeIndex(['2012-06-04', '2012-06-04', '2012-06-05', '2012-06-06'], dtype='datetime64[ns]', freq=None)

In [170]:
data.iloc[[3]]

Unnamed: 0,one,two,three,four
2012-06-06,2,6,3,6


In [171]:
data['2012-06-04':'2012-06-04']

Unnamed: 0,one,two,three,four
2012-06-04,7,6,6,7
2012-06-04,4,5,4,6


In [172]:
data['2012']

Unnamed: 0,one,two,three,four
2012-06-04,7,6,6,7
2012-06-04,4,5,4,6
2012-06-05,2,8,3,8
2012-06-06,2,6,3,6


In [173]:
data.iloc[[1]]

Unnamed: 0,one,two,three,four
2012-06-04,4,5,4,6


In [174]:
data[['two', 'three']]

Unnamed: 0,two,three
2012-06-04,6,6
2012-06-04,5,4
2012-06-05,8,3
2012-06-06,6,3


In [33]:
index3 = pd.MultiIndex.from_arrays([['FR', 'SWE', 'FR', 'SWE'], ['2012-06-04', '2012-06-04', '2012-06-05', '2012-06-06']])
dataset3 = np.random.randint(10, size=[4,4])

In [34]:
data3 = pd.DataFrame(dataset3, 
                    index = index3, columns=['one', 'two', 'three', 'four'])

In [35]:
data3

Unnamed: 0,Unnamed: 1,one,two,three,four
FR,2012-06-04,0,1,3,8
SWE,2012-06-04,1,5,2,7
FR,2012-06-05,7,0,8,8
SWE,2012-06-06,2,6,3,3


In [36]:
data3.loc['FR']

Unnamed: 0,one,two,three,four
2012-06-04,0,1,3,8
2012-06-05,7,0,8,8


In [37]:
data3.loc[('FR','2012-06-04')]

one      0
two      1
three    3
four     8
Name: (FR, 2012-06-04), dtype: int64

In [38]:
index3

MultiIndex([( 'FR', '2012-06-04'),
            ('SWE', '2012-06-04'),
            ( 'FR', '2012-06-05'),
            ('SWE', '2012-06-06')],
           )

In [40]:
index3.values

array([('FR', '2012-06-04'), ('SWE', '2012-06-04'), ('FR', '2012-06-05'),
       ('SWE', '2012-06-06')], dtype=object)

In [41]:
index3.codes

FrozenList([[0, 1, 0, 1], [0, 0, 1, 2]])

In [42]:
data3.index

MultiIndex([( 'FR', '2012-06-04'),
            ('SWE', '2012-06-04'),
            ( 'FR', '2012-06-05'),
            ('SWE', '2012-06-06')],
           )

In [44]:
data3.index.names = ['Country', 'Date']

In [47]:
data3.index.columns = 'Season'

In [48]:
data3

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three,four
Country,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FR,2012-06-04,0,1,3,8
SWE,2012-06-04,1,5,2,7
FR,2012-06-05,7,0,8,8
SWE,2012-06-06,2,6,3,3


In [50]:
data3.index.columns

'Season'

In [56]:
data3.stack()

Country  Date             
FR       2012-06-04  one      0
                     two      1
                     three    3
                     four     8
SWE      2012-06-04  one      1
                     two      5
                     three    2
                     four     7
FR       2012-06-05  one      7
                     two      0
                     three    8
                     four     8
SWE      2012-06-06  one      2
                     two      6
                     three    3
                     four     3
dtype: int64

In [6]:
start = datetime.datetime(2011, 1, 1)
end = datetime.datetime(2012, 1, 1)

In [8]:
rng = pd.date_range(start, end, freq='BM')
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [11]:
ts['10/31/2011':'12/31/2011']

2011-10-31   -0.135225
2011-11-30   -0.045533
2011-12-30   -0.049118
Freq: BM, dtype: float64

In [12]:
ts['2011']

2011-01-31   -0.409752
2011-02-28    0.721109
2011-03-31   -1.100811
2011-04-29   -0.179795
2011-05-31    0.997164
2011-06-30   -0.438432
2011-07-29   -0.102879
2011-08-31   -0.919533
2011-09-30    1.856630
2011-10-31   -0.135225
2011-11-30   -0.045533
2011-12-30   -0.049118
Freq: BM, dtype: float64

## Multi-index

In [23]:
dft2 = pd.DataFrame(np.random.randn(20, 1), columns=['A'], 
                    index=pd.MultiIndex.from_product([pd.date_range('20130101', periods=10, freq='12H'),
                                                      ['a', 'b']]))

In [24]:
dft2.head()

Unnamed: 0,Unnamed: 1,A
2013-01-01 00:00:00,a,0.809121
2013-01-01 00:00:00,b,-0.513095
2013-01-01 12:00:00,a,-0.182565
2013-01-01 12:00:00,b,0.684358
2013-01-02 00:00:00,a,0.400683


In [17]:
dft2.loc['2013-01-05']

Unnamed: 0,Unnamed: 1,A
2013-01-05 00:00:00,a,-0.030792
2013-01-05 00:00:00,b,0.918783
2013-01-05 12:00:00,a,-0.472045
2013-01-05 12:00:00,b,-0.59651


In [25]:
idx = pd.IndexSlice
dft2 = dft2.swaplevel(0, 1).sort_index()
dft2.head()

Unnamed: 0,Unnamed: 1,A
a,2013-01-01 00:00:00,0.809121
a,2013-01-01 12:00:00,-0.182565
a,2013-01-02 00:00:00,0.400683
a,2013-01-02 12:00:00,-0.929708
a,2013-01-03 00:00:00,0.002233


In [26]:
dft2.loc[idx[:, '2013-01-05'], :]

Unnamed: 0,Unnamed: 1,A
a,2013-01-05 00:00:00,0.806427
a,2013-01-05 12:00:00,0.064371
b,2013-01-05 00:00:00,0.71566
b,2013-01-05 12:00:00,0.086666


## Slice vs. exact match

The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match.

In [27]:
series_minute = pd.Series([1, 2, 3], pd.DatetimeIndex(['2011-12-31 23:59:00',
                                                       '2012-01-01 00:00:00',
                                                       '2012-01-01 00:02:00']))

In [28]:
series_minute.index.resolution

'minute'

In [35]:
s = (series_minute['2011-12-31 23:59'], series_minute['2011-12-31 23:59:00'])
s

(1, 1)

In [36]:
type(s)

tuple

In [38]:
dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=series_minute.index)
dft_minute

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4
2012-01-01 00:00:00,2,5
2012-01-01 00:02:00,3,6


In [39]:
dft_minute['2011-12-31 23']

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4


In [40]:
dft_minute['2011-12-31']

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4


In [44]:
pd.DatetimeIndex(['2011-12-31 23:59:00']).quarter

Int64Index([4], dtype='int64')

In [48]:
idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
idx

DatetimeIndex(['2019-12-29', '2019-12-30', '2019-12-31', '2020-01-01'], dtype='datetime64[ns]', freq='D')

## Shifting / lagging

In [52]:
ts = pd.Series(range(len(rng)), index=rng)
ts = ts[:5]
ts

2011-01-31    0
2011-02-28    1
2011-03-31    2
2011-04-29    3
2011-05-31    4
Freq: BM, dtype: int64

In [53]:
ts.shift(1)

2011-01-31    NaN
2011-02-28    0.0
2011-03-31    1.0
2011-04-29    2.0
2011-05-31    3.0
Freq: BM, dtype: float64

In [54]:
ts.shift(5, freq='D')

2011-02-05    0
2011-03-05    1
2011-04-05    2
2011-05-04    3
2011-06-05    4
dtype: int64

In [55]:
ts.shift(5, freq=pd.offsets.BDay())

2011-02-07    0
2011-03-07    1
2011-04-07    2
2011-05-06    3
2011-06-07    4
dtype: int64

In [56]:
ts.shift(5, freq='BM')

2011-06-30    0
2011-07-29    1
2011-08-31    2
2011-09-30    3
2011-10-31    4
Freq: BM, dtype: int64

## Resampling

In [57]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')

In [60]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.head()

2012-01-01 00:00:00    447
2012-01-01 00:00:01    335
2012-01-01 00:00:02    113
2012-01-01 00:00:03    181
2012-01-01 00:00:04    303
Freq: S, dtype: int64

In [65]:
ts.resample('5S').sum()

2012-01-01 00:00:00    1379
2012-01-01 00:00:05    1290
2012-01-01 00:00:10    1619
2012-01-01 00:00:15    1196
2012-01-01 00:00:20    1295
2012-01-01 00:00:25    1297
2012-01-01 00:00:30    1055
2012-01-01 00:00:35    1465
2012-01-01 00:00:40    1032
2012-01-01 00:00:45    1496
2012-01-01 00:00:50     905
2012-01-01 00:00:55    1478
2012-01-01 00:01:00    1242
2012-01-01 00:01:05    1238
2012-01-01 00:01:10    1295
2012-01-01 00:01:15    1052
2012-01-01 00:01:20    1600
2012-01-01 00:01:25    1075
2012-01-01 00:01:30    1410
2012-01-01 00:01:35    1370
Freq: 5S, dtype: int64