# Chapter 11 - Time Series

## 11.3 - Date Ranges, Frequencies and Shifting

In [1]:
import datetime
from datetime import datetime as dt
import pandas as pd
from pandas.tseries.offsets import (Hour, Minute, Day, MonthEnd)
import numpy as np

from dateutil.parser import parse

A time series with irregular periods can be converted to one with regular periods with `resample`.

In [2]:
s1 = pd.Series(range(0,3),
               index =[parse('2019-01-01'), parse('2019-01-03'), parse('2019-01-10')])
display(s1)

2019-01-01    0
2019-01-03    1
2019-01-10    2
dtype: int64

In [3]:
# Instantiate a daily resampler, followed by a function.
s1.resample('D').sum()

2019-01-01    0
2019-01-02    0
2019-01-03    1
2019-01-04    0
2019-01-05    0
2019-01-06    0
2019-01-07    0
2019-01-08    0
2019-01-09    0
2019-01-10    2
Freq: D, dtype: int64

### Generating Date Ranges
Use `pd.date_range` to generate a `DateTimeIndex` with a specified length and frequency. Alternatively, use it to generate the number of periods wanted.

In [4]:
# Generate daily date range
r1 = pd.date_range('2019-06-01', '2019-06-15')
display(r1)

# Generate monthly date range. MS is month start while M is month end
r2 = pd.date_range('2018-01-01', '2020-01-01', freq='MS')
display(r2)

DatetimeIndex(['2019-06-01', '2019-06-02', '2019-06-03', '2019-06-04',
               '2019-06-05', '2019-06-06', '2019-06-07', '2019-06-08',
               '2019-06-09', '2019-06-10', '2019-06-11', '2019-06-12',
               '2019-06-13', '2019-06-14', '2019-06-15'],
              dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2018-01-01', '2018-02-01', '2018-03-01', '2018-04-01',
               '2018-05-01', '2018-06-01', '2018-07-01', '2018-08-01',
               '2018-09-01', '2018-10-01', '2018-11-01', '2018-12-01',
               '2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
               '2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
               '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='MS')

In [5]:
# Generate daily date range
r3 = pd.date_range('2019-12-01', freq='D', periods=10)
display(r3)

# Generate monthly date range. MS is month start while M is month end
r4 = pd.date_range('2018-01-01', freq='M', periods=3)
display(r4)

DatetimeIndex(['2019-12-01', '2019-12-02', '2019-12-03', '2019-12-04',
               '2019-12-05', '2019-12-06', '2019-12-07', '2019-12-08',
               '2019-12-09', '2019-12-10'],
              dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], dtype='datetime64[ns]', freq='M')

### Frequencies and Date Offsets
Frequencies are composed of a base frequency (time unit) and multipler.

In [6]:
# Hourly frequency can be represented by the Hour class
h = Hour()
display(h)
h4 = Hour(4)
display(h4)

# Minute frequency can be represented by the Minute class
m = Minute()
display(m)
m30 = Minute(30)
display(m30)

<Hour>

<4 * Hours>

<Minute>

<30 * Minutes>

In [7]:
# Add frequencies to get a longer frequency
# Movie length is 90 minutes
mlength = Hour(1) + Minute(30)
display(mlength)

<90 * Minutes>

Most of the time, the frequency can be represented by such an expression.

In [8]:
dr1 = pd.date_range('2019-03-01 00:00:00', freq='12h', periods=5)
display(dr1)

DatetimeIndex(['2019-03-01 00:00:00', '2019-03-01 12:00:00',
               '2019-03-02 00:00:00', '2019-03-02 12:00:00',
               '2019-03-03 00:00:00'],
              dtype='datetime64[ns]', freq='12H')

### Shifting (Leading and Lagging) Data
"Shifting" is moving data backwards or forwards through time.

In [9]:
dr2 = pd.date_range('2019-02-03', freq='D', periods=5)
display(dr2)

# Shifting 3 means shift the index 3 days forward
display(dr2.shift(3))

DatetimeIndex(['2019-02-03', '2019-02-04', '2019-02-05', '2019-02-06',
               '2019-02-07'],
              dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2019-02-06', '2019-02-07', '2019-02-08', '2019-02-09',
               '2019-02-10'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df1 = pd.read_csv('dataset-I2-ES3.csv', index_col=0)
df1.index = pd.to_datetime(df1.index)
p1 = df1['Close']
display(p1.head())
display(p1.tail())
# However, shift here means moving the values 1 row down while preserving the index.
display(p1.shift(1).head())

Date
2017-01-03    2.96
2017-01-04    2.98
2017-01-05    3.01
2017-01-06    3.03
2017-01-09    3.03
Name: Close, dtype: float64

Date
2019-04-24    3.365
2019-04-25    3.360
2019-04-26    3.362
2019-04-29    3.410
2019-04-30    3.415
Name: Close, dtype: float64

Date
2017-01-03     NaN
2017-01-04    2.96
2017-01-05    2.98
2017-01-06    3.01
2017-01-09    3.03
Name: Close, dtype: float64

In [11]:
# Other ways to shift data is to move it forward into time. This time, the index
# is shifted forward.
p1.shift(10, freq='D')

Date
2017-01-13    2.960
2017-01-14    2.980
2017-01-15    3.010
2017-01-16    3.030
2017-01-19    3.030
2017-01-20    3.060
2017-01-21    3.060
2017-01-22    3.060
2017-01-23    3.090
2017-01-26    3.070
2017-01-27    3.060
2017-01-28    3.060
2017-01-29    3.070
2017-01-30    3.080
2017-02-02    3.080
2017-02-03    3.090
2017-02-04    3.100
2017-02-05    3.120
2017-02-06    3.120
2017-02-10    3.110
2017-02-11    3.130
2017-02-12    3.050
2017-02-13    3.050
2017-02-16    3.060
2017-02-17    3.080
2017-02-18    3.080
2017-02-19    3.080
2017-02-20    3.120
2017-02-23    3.120
2017-02-24    3.080
              ...  
2019-03-29    3.226
2019-03-30    3.218
2019-03-31    3.222
2019-04-01    3.216
2019-04-04    3.189
2019-04-05    3.207
2019-04-06    3.200
2019-04-07    3.208
2019-04-08    3.215
2019-04-11    3.248
2019-04-12    3.287
2019-04-13    3.315
2019-04-14    3.323
2019-04-15    3.329
2019-04-18    3.317
2019-04-19    3.327
2019-04-20    3.331
2019-04-21    3.330
2019-04-22    3

`pandas` offsets can also be used with native `dt` objects

In [12]:
datetime1 = dt.strptime('2019-01-01', '%Y-%m-%d')
print(datetime1)
print(datetime1 + Hour(2) + Minute(45))

2019-01-01 00:00:00
2019-01-01 02:45:00


Think of `MonthEnd` as an anchored offset. It will "roll forward" to the next available date based on the frequency rule. (Rolls to next end of month day)

In [13]:
print(datetime1 + MonthEnd(2))

# Since it rolls to the next available EOM day, this is the same as the above output
print(datetime1 + Day(3) + MonthEnd(2))

2019-02-28 00:00:00
2019-02-28 00:00:00


**References:**

Python for Data Analysis, 2nd Edition, McKinney (2017)