# 11.3 Date Ranges, Frequencies, and Shifting

In [4]:
import numpy as np
import pandas as pd
from datetime import datetime

In [5]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)

In [6]:
ts

2011-01-02   -0.173418
2011-01-05   -0.322852
2011-01-07    0.239531
2011-01-08   -0.470536
2011-01-10    0.557996
2011-01-12   -0.734844
dtype: float64

In [7]:
resampler = ts.resample('D')

In [8]:
resampler

DatetimeIndexResampler [freq=<Day>, axis=0, closed=left, label=left, convention=start, base=0]

---

## Generating Date Ranges

In [19]:
index = pd.date_range('2011-04-01', '2012-06-01')

In [20]:
index
# generate daily timestamps

DatetimeIndex(['2011-04-01', '2011-04-02', '2011-04-03', '2011-04-04',
               '2011-04-05', '2011-04-06', '2011-04-07', '2011-04-08',
               '2011-04-09', '2011-04-10',
               ...
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
               '2012-05-27', '2012-05-28', '2012-05-29', '2012-05-30',
               '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', length=428, freq='D')

In [21]:
pd.date_range('21-09-1997', periods=20)

DatetimeIndex(['1997-09-21', '1997-09-22', '1997-09-23', '1997-09-24',
               '1997-09-25', '1997-09-26', '1997-09-27', '1997-09-28',
               '1997-09-29', '1997-09-30', '1997-10-01', '1997-10-02',
               '1997-10-03', '1997-10-04', '1997-10-05', '1997-10-06',
               '1997-10-07', '1997-10-08', '1997-10-09', '1997-10-10'],
              dtype='datetime64[ns]', freq='D')

In [22]:
pd.date_range(end='21-09-2019', periods=20)

DatetimeIndex(['2019-09-02', '2019-09-03', '2019-09-04', '2019-09-05',
               '2019-09-06', '2019-09-07', '2019-09-08', '2019-09-09',
               '2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
               '2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17',
               '2019-09-18', '2019-09-19', '2019-09-20', '2019-09-21'],
              dtype='datetime64[ns]', freq='D')

In [24]:
pd.date_range('2000-01-01', '2001-01-01', freq='BM')
# BM is bussiness end of month

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30', '2000-12-29'],
              dtype='datetime64[ns]', freq='BM')

In [25]:
pd.date_range('2012-05-02 12:56:31', periods=5)

DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

Sometimes you will have start or end dates with time information but want to generate
a set of timestamps normalized to midnight as a convention. To do this, there is a
normalize option:

In [26]:
pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)
# normalize=True, means the we use the convention of using the midnight

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

---

## Frequencies and Date Offsets

Frequencies in pandas are composed of a base frequency and a multiplier. Base frequencies
are typically referred to by a string alias, like 'M' for monthly or 'H' for
hourly. For each base frequency, there is an object defined generally referred to as a
date offset. For example, hourly frequency can be represented with the Hour class:

In [27]:
from pandas.tseries.offsets import Hour, Minute

In [28]:
hour = Hour()

In [29]:
hour

<Hour>

In [30]:
four_hours = Hour(4)

In [32]:
# using aliases to use base frequency
pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [33]:
pd.date_range('2000-01-01', '2000-01-03', freq=Hour(6))

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 06:00:00',
               '2000-01-01 12:00:00', '2000-01-01 18:00:00',
               '2000-01-02 00:00:00', '2000-01-02 06:00:00',
               '2000-01-02 12:00:00', '2000-01-02 18:00:00',
               '2000-01-03 00:00:00'],
              dtype='datetime64[ns]', freq='6H')

In [38]:
pd.date_range('2000-01-01', periods=10, freq='1h30min35s')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:35',
               '2000-01-01 03:01:10', '2000-01-01 04:31:45',
               '2000-01-01 06:02:20', '2000-01-01 07:32:55',
               '2000-01-01 09:03:30', '2000-01-01 10:34:05',
               '2000-01-01 12:04:40', '2000-01-01 13:35:15'],
              dtype='datetime64[ns]', freq='5435S')

### Week of month dates

In [39]:
rng = pd.date_range('2012-01-01', '2012-09-01', freq='WOM-3FRI')
# the dates are the third week and friday

In [40]:
rng

DatetimeIndex(['2012-01-20', '2012-02-17', '2012-03-16', '2012-04-20',
               '2012-05-18', '2012-06-15', '2012-07-20', '2012-08-17'],
              dtype='datetime64[ns]', freq='WOM-3FRI')

In [41]:
list(rng)

[Timestamp('2012-01-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-02-17 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-03-16 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-04-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-05-18 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-06-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-07-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-08-17 00:00:00', freq='WOM-3FRI')]

---

## Shifting (Leading and Lagging) Data

“Shifting” refers to moving data backward and forward through time. Both Series and
DataFrame have a shift method for doing naive shifts forward or backward, leaving
the index unmodified:

In [46]:
ts = pd.Series(np.random.randn(4),
                index=pd.date_range('1/1/2000', periods=4, freq='M'))

In [47]:
ts

2000-01-31   -0.254981
2000-02-29   -0.366661
2000-03-31    1.527052
2000-04-30   -0.091256
Freq: M, dtype: float64

In [48]:
ts.shift(2)
# shift the dat forward, leaving missing value, index are not changed

2000-01-31         NaN
2000-02-29         NaN
2000-03-31   -0.254981
2000-04-30   -0.366661
Freq: M, dtype: float64

In [49]:
ts.shift(-2)

2000-01-31    1.527052
2000-02-29   -0.091256
2000-03-31         NaN
2000-04-30         NaN
Freq: M, dtype: float64

Because naive shifts leave the index unmodified, some data is discarded. Thus if the
frequency is known, it can be passed to shift to advance the timestamps instead of
simply the data:

In [50]:
ts.shift(2, freq='M')

2000-03-31   -0.254981
2000-04-30   -0.366661
2000-05-31    1.527052
2000-06-30   -0.091256
Freq: M, dtype: float64

In [51]:
ts.shift(3, freq='D')

2000-02-03   -0.254981
2000-03-03   -0.366661
2000-04-03    1.527052
2000-05-03   -0.091256
dtype: float64

In [52]:
ts.shift(1, freq='90T')
# T stands for minutes

2000-01-31 01:30:00   -0.254981
2000-02-29 01:30:00   -0.366661
2000-03-31 01:30:00    1.527052
2000-04-30 01:30:00   -0.091256
Freq: M, dtype: float64

### Shifting dates with offsets

In [53]:
from pandas.tseries.offsets import Day, MonthEnd

In [54]:
now = datetime(2011, 11, 17)

In [55]:
now + 3 * Day()

Timestamp('2011-11-20 00:00:00')

In [56]:
now + MonthEnd()
# turn it to the end of the month

Timestamp('2011-11-30 00:00:00')

In [57]:
now + MonthEnd(2)

Timestamp('2011-12-31 00:00:00')

In [58]:
offset = MonthEnd()

In [59]:
offset.rollforward(now)

Timestamp('2011-11-30 00:00:00')

In [61]:
offset.rollback(now)

Timestamp('2011-10-31 00:00:00')

In [62]:
ts = pd.Series(np.random.randn(20),
                index=pd.date_range('1/15/2000', periods=20, freq='4d'))

In [63]:
ts

2000-01-15    1.066456
2000-01-19    1.294573
2000-01-23   -0.195433
2000-01-27    0.957247
2000-01-31   -0.906520
2000-02-04    0.016260
2000-02-08   -2.286543
2000-02-12   -0.916955
2000-02-16    2.424735
2000-02-20   -0.780204
2000-02-24   -1.868674
2000-02-28    0.425979
2000-03-03   -1.233182
2000-03-07    2.674603
2000-03-11   -0.399477
2000-03-15   -0.495425
2000-03-19   -0.393690
2000-03-23    0.020562
2000-03-27    1.357122
2000-03-31    1.774529
Freq: 4D, dtype: float64

In [66]:
ts.groupby(offset.rollforward).mean()

2000-01-31    0.443264
2000-02-29   -0.426486
2000-03-31    0.413130
dtype: float64

In [67]:
ts.resample('M').mean()

2000-01-31    0.443264
2000-02-29   -0.426486
2000-03-31    0.413130
Freq: M, dtype: float64

___