# 11.5 Periods and Period Arithmetic

In [1]:
import numpy as np
import pandas as pd

Periods represent timespans, like days, months, quarters, or years. 

In [2]:
p = pd.Period(2007, freq='A-DEC')

In [3]:
p

Period('2007', 'A-DEC')

In this case, the Period object represents the full timespan from January 1, 2007, to
December 31, 2007, inclusive.

In [4]:
p + 5

Period('2012', 'A-DEC')

In [5]:
p - 2

Period('2005', 'A-DEC')

In [6]:
pd.Period(2014, freq='A-DEC') - p

<7 * YearEnds: month=12>

In [7]:
rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')

In [8]:
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [10]:
pd.Series(np.random.randn(len(rng)), index=rng)

2000-01   -0.184677
2000-02    0.140242
2000-03    1.697006
2000-04   -0.400353
2000-05    0.611870
2000-06   -0.717200
Freq: M, dtype: float64

In [11]:
values = ['2001Q3', '2002Q2', '2003Q1']

In [12]:
index = pd.PeriodIndex(values, freq='Q-DEC')

In [13]:
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

---

## Period Frequency Conversion

Periods and PeriodIndex objects can be converted to another frequency with their
asfreq method.

In [14]:
# turn annual period into monthly period
p = pd.Period('2007', freq='A-DEC')

In [15]:
p

Period('2007', 'A-DEC')

In [16]:
p.asfreq('M', how='start')

Period('2007-01', 'M')

In [17]:
p.asfreq('M', how='end')

Period('2007-12', 'M')

In [18]:
p = pd.Period('2007', freq='A-JUN')

In [19]:
p

Period('2007', 'A-JUN')

In [20]:
p.asfreq('M', how='start')

Period('2006-07', 'M')

In [21]:
p.asfreq('M', how='end')

Period('2007-06', 'M')

In [22]:
p.asfreq('A-JUN')

Period('2007', 'A-JUN')

In [23]:
p = pd.Period('Aug-2007', 'M')

In [25]:
p.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [26]:
rng = pd.period_range('2006', '2009', freq='A-DEC')

In [27]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [28]:
ts

2006    1.615610
2007    1.393102
2008   -2.306660
2009   -0.193500
Freq: A-DEC, dtype: float64

In [31]:
ts.asfreq('M', how='start')

2006-01    1.615610
2007-01    1.393102
2008-01   -2.306660
2009-01   -0.193500
Freq: M, dtype: float64

In [32]:
ts.asfreq('B', how='end')

2006-12-29    1.615610
2007-12-31    1.393102
2008-12-31   -2.306660
2009-12-31   -0.193500
Freq: B, dtype: float64

---

## Quarterly Period Frequencies

In [33]:
p = pd.Period('2012Q4', freq='Q-JAN')

In [34]:
p

Period('2012Q4', 'Q-JAN')

In [35]:
p.asfreq('D', 'start')

Period('2011-11-01', 'D')

In [36]:
p.asfreq('D', 'end')

Period('2012-01-31', 'D')

In [37]:
p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60

In [38]:
p4pm

Period('2012-01-30 16:00', 'T')

In [39]:
p4pm.to_timestamp()

Timestamp('2012-01-30 16:00:00')

In [40]:
rng = pd.period_range('2011Q3', '2014Q4', freq='Q-JAN')

In [41]:
ts = pd.Series(np.arange(len(rng)), index=rng)

In [47]:
ts.index[0].to_timestamp()

Timestamp('2010-08-01 00:00:00')

In [48]:
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 'e') + 16 * 60

In [49]:
ts.index = new_rng.to_timestamp()

In [50]:
ts

2010-10-29 15:59:00     0
2011-01-29 15:59:00     1
2011-04-29 15:59:00     2
2011-07-29 15:59:00     3
2011-10-29 15:59:00     4
2012-01-31 15:59:00     5
2012-04-28 15:59:00     6
2012-07-31 15:59:00     7
2012-10-31 15:59:00     8
2013-01-31 15:59:00     9
2013-04-30 15:59:00    10
2013-07-31 15:59:00    11
2013-10-31 15:59:00    12
2014-01-31 15:59:00    13
dtype: int32

---

## Converting Timestamps to Periods (and Back)

In [55]:
rng = pd.date_range('2000-01-01', periods=3, freq='M')

In [56]:
rng

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31'], dtype='datetime64[ns]', freq='M')

In [57]:
ts = pd.Series(np.random.randn(3), index=rng)

In [58]:
ts

2000-01-31   -0.380168
2000-02-29   -1.141196
2000-03-31   -0.957515
Freq: M, dtype: float64

In [59]:
pts = ts.to_period()

In [60]:
pts

2000-01   -0.380168
2000-02   -1.141196
2000-03   -0.957515
Freq: M, dtype: float64

In [62]:
pd.Series(np.random.randn(3), pd.date_range('2019-01-01', periods=3, freq='MS')).to_period()

2019-01   -0.320428
2019-02   -1.632788
2019-03    0.310374
Freq: M, dtype: float64

Since periods refer to non-overlapping timespans, a timestamp can only belong to a
single period for a given frequency. While the frequency of the new PeriodIndex is
inferred from the timestamps by default, you can specify any frequency you want.
There is also no problem with having duplicate periods in the result:

In [63]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')

In [64]:
rng

DatetimeIndex(['2000-01-29', '2000-01-30', '2000-01-31', '2000-02-01',
               '2000-02-02', '2000-02-03'],
              dtype='datetime64[ns]', freq='D')

In [66]:
ts2 = pd.Series(np.random.randn(len(rng)), index=rng)

In [72]:
ts2.index

DatetimeIndex(['2000-01-29', '2000-01-30', '2000-01-31', '2000-02-01',
               '2000-02-02', '2000-02-03'],
              dtype='datetime64[ns]', freq='D')

In [73]:
ts2.to_period('M').index

PeriodIndex(['2000-01', '2000-01', '2000-01', '2000-02', '2000-02', '2000-02'], dtype='period[M]', freq='M')

In [78]:
pts = ts2.to_period()

In [79]:
pts

2000-01-29    0.029713
2000-01-30   -1.244085
2000-01-31    1.015749
2000-02-01    0.405724
2000-02-02    0.290206
2000-02-03   -1.133790
Freq: D, dtype: float64

In [85]:
ts2.to_period('W-WED', 'e')

2000-01-27/2000-02-02    0.029713
2000-01-27/2000-02-02   -1.244085
2000-01-27/2000-02-02    1.015749
2000-01-27/2000-02-02    0.405724
2000-01-27/2000-02-02    0.290206
2000-02-03/2000-02-09   -1.133790
Freq: W-WED, dtype: float64

---

## Creating a PeriodIndex from Arrays

Fixed frequency datasets are sometimes stored with timespan information spread
across multiple columns

In [86]:
data = pd.read_csv('macrodata.csv')

In [87]:
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [90]:
data.year[:5]

0    1959.0
1    1959.0
2    1959.0
3    1959.0
4    1960.0
Name: year, dtype: float64

In [92]:
data.quarter[:5]

0    1.0
1    2.0
2    3.0
3    4.0
4    1.0
Name: quarter, dtype: float64

In [93]:
# converting year and quanter columns to PeriodIndex
index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')

In [94]:
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [95]:
data.index = index

In [97]:
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
1959Q1,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1959Q2,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
1959Q3,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
1959Q4,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
1960Q1,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [100]:
data.index[0]

Period('1959Q1', 'Q-DEC')

In [101]:
data.index[0].to_timestamp()

Timestamp('1959-01-01 00:00:00')

In [102]:
data.infl[:5]

1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
Freq: Q-DEC, Name: infl, dtype: float64

---

In [112]:
index = pd.date_range('2015-01-01', periods=36, freq='MS')

In [113]:
index

DatetimeIndex(['2015-01-01', '2015-02-01', '2015-03-01', '2015-04-01',
               '2015-05-01', '2015-06-01', '2015-07-01', '2015-08-01',
               '2015-09-01', '2015-10-01', '2015-11-01', '2015-12-01',
               '2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01',
               '2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01',
               '2017-05-01', '2017-06-01', '2017-07-01', '2017-08-01',
               '2017-09-01', '2017-10-01', '2017-11-01', '2017-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [123]:
index.to_period('Q-JAN')[0]

Period('2015Q4', 'Q-JAN')