# 11.5 기간과 기간 연산

In [11]:
import pandas as pd
import numpy as np

In [4]:
p=pd.Period(2007, freq='A-DEC')
p

Period('2007', 'A-DEC')

In [5]:
p + 5

Period('2012', 'A-DEC')

In [6]:
p - 2

Period('2005', 'A-DEC')

In [7]:
# 두 기간이 같은 빈도를 가진다면 두 기간의 차는 둘 사이의 간격이 됨
pd.Period('2014', freq='A-DEC') - p

<7 * YearEnds: month=12>

In [8]:
# 일반적인 기간 범위 : period_range 함수
rng=pd.period_range('2000-01-01','2000-06-30',freq='M')
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [12]:
pd.Series(np.random.randn(6),index=rng)

2000-01    1.283404
2000-02    2.028078
2000-03   -1.358092
2000-04   -0.322531
2000-05    0.771708
2000-06   -1.358345
Freq: M, dtype: float64

In [14]:
# 문자열 배열을 이용해서 periodIndex 클래스 생성
values=['2001Q3','2002Q2','2003Q1']
index=pd.PeriodIndex(values,freq='Q-DEC')
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

# 11.5.1 Period 빈도변환

In [15]:
p=pd.Period('2007',freq='A-DEC')
p

Period('2007', 'A-DEC')

In [16]:
p.asfreq('M',how='start')

Period('2007-01', 'M')

In [17]:
p.asfreq('M',how='end')

Period('2007-12', 'M')

In [18]:
p=pd.Period('2007',freq="A-JUN")
p

Period('2007', 'A-JUN')

In [19]:
p.asfreq('M','start')

Period('2006-07', 'M')

In [20]:
p.asfreq('M','end')

Period('2007-06', 'M')

In [21]:
p=pd.Period('Aug-2007','M')
p.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [24]:
rng=pd.period_range('2006','2009',freq='A-DEC')
ts=pd.Series(np.random.randn(len(rng)),index=rng)
ts

2006    0.602364
2007    1.846548
2008    0.570818
2009   -1.582436
Freq: A-DEC, dtype: float64

In [25]:
ts.asfreq('M',how='start')

2006-01    0.602364
2007-01    1.846548
2008-01    0.570818
2009-01   -1.582436
Freq: M, dtype: float64

In [26]:
ts.asfreq('B',how='end')

2006-12-29    0.602364
2007-12-31    1.846548
2008-12-31    0.570818
2009-12-31   -1.582436
Freq: B, dtype: float64

# 11.5.2 분기 빈도

In [27]:
p=pd.Period('2012Q4',freq='Q-JAN')
p

Period('2012Q4', 'Q-JAN')

In [28]:
p.asfreq('D','start')

Period('2011-11-01', 'D')

In [29]:
p.asfreq('D','end')

Period('2012-01-31', 'D')

In [30]:
# 분기 영업 마감일의 오후 4시
p4pm=(p.asfreq('B','e')-1).asfreq('T','s')+16*60
p4pm

Period('2012-01-30 16:00', 'T')

In [31]:
p4pm.to_timestamp()

Timestamp('2012-01-30 16:00:00')

In [33]:
# period_range 사용해서 분기 범위 생성
rng=pd.period_range('2011Q3','2012Q4',freq='Q-JAN')
ts=pd.Series(np.arange(len(rng)),index=rng)
ts

2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int32

In [35]:
new_rng=(rng.asfreq('B','e')-1).asfreq('T','s')+16*60
ts.index=new_rng.to_timestamp()
ts

2010-10-28 16:00:00    0
2011-01-28 16:00:00    1
2011-04-28 16:00:00    2
2011-07-28 16:00:00    3
2011-10-28 16:00:00    4
2012-01-30 16:00:00    5
dtype: int32

# 11.5.3 타임스탬프와 기간 서로 변환하기

In [37]:
rng=pd.date_range('2000-01-01',periods=3,freq='M')
ts=pd.Series(np.random.randn(3),index=rng)
ts

2000-01-31    0.407044
2000-02-29   -2.612839
2000-03-31   -1.660754
Freq: M, dtype: float64

In [39]:
pts=ts.to_period()
pts

2000-01    0.407044
2000-02   -2.612839
2000-03   -1.660754
Freq: M, dtype: float64

In [40]:
rng=pd.date_range('1/29/2000',periods=6,freq='D')
ts2=pd.Series(np.random.randn(6),index=rng)
ts2

2000-01-29    0.534879
2000-01-30   -0.074543
2000-01-31    0.745005
2000-02-01   -1.334208
2000-02-02   -1.239975
2000-02-03   -0.084250
Freq: D, dtype: float64

In [41]:
ts2.to_period('M')

2000-01    0.534879
2000-01   -0.074543
2000-01    0.745005
2000-02   -1.334208
2000-02   -1.239975
2000-02   -0.084250
Freq: M, dtype: float64

In [42]:
#to_timestamp 메서드
pts=ts2.to_period()
pts

2000-01-29    0.534879
2000-01-30   -0.074543
2000-01-31    0.745005
2000-02-01   -1.334208
2000-02-02   -1.239975
2000-02-03   -0.084250
Freq: D, dtype: float64

In [43]:
pts.to_timestamp(how='end')

2000-01-29 23:59:59.999999999    0.534879
2000-01-30 23:59:59.999999999   -0.074543
2000-01-31 23:59:59.999999999    0.745005
2000-02-01 23:59:59.999999999   -1.334208
2000-02-02 23:59:59.999999999   -1.239975
2000-02-03 23:59:59.999999999   -0.084250
Freq: D, dtype: float64

# 11.5.4 배열로 PeriodIndex 생성하기

In [49]:
data=pd.read_csv('examples/macrodata.csv')
data.head(5)

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [50]:
data.year

0      1959.0
1      1959.0
2      1959.0
3      1959.0
4      1960.0
        ...  
198    2008.0
199    2008.0
200    2009.0
201    2009.0
202    2009.0
Name: year, Length: 203, dtype: float64

In [51]:
data.quarter

0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
      ... 
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, Length: 203, dtype: float64

In [57]:
# 이 배열을 PeriodIndex에 빈도값과 함께 전달하면 DataFrame 에서 사용할 수 있는 색인을 만들어냄
index=pd.PeriodIndex(year=data.year,quarter=data.quarter,freq='Q-DEC')
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [58]:
data.index=index

In [60]:
data.infl

1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
          ... 
2008Q3   -3.16
2008Q4   -8.79
2009Q1    0.94
2009Q2    3.37
2009Q3    3.56
Freq: Q-DEC, Name: infl, Length: 203, dtype: float64

# 11.6 리샘플링과 빈도 변환

In [63]:
rng=pd.date_range('2000-01-01',periods=100,freq='D')
ts=pd.Series(np.random.randn(len(rng)),index=rng)
ts

2000-01-01   -0.231603
2000-01-02   -0.728976
2000-01-03    0.403783
2000-01-04    1.117783
2000-01-05    0.932374
                ...   
2000-04-05    0.591832
2000-04-06   -0.443906
2000-04-07   -1.160852
2000-04-08    0.132805
2000-04-09   -0.646181
Freq: D, Length: 100, dtype: float64

In [64]:
ts.resample('M').mean()

2000-01-31   -0.047370
2000-02-29   -0.122356
2000-03-31    0.130058
2000-04-30   -0.019117
Freq: M, dtype: float64

In [65]:
ts.resample('M',kind='period').mean()

2000-01   -0.047370
2000-02   -0.122356
2000-03    0.130058
2000-04   -0.019117
Freq: M, dtype: float64

# 11.6.1 다운샘플링

In [66]:
rng=pd.date_range('2000-01-01',periods=12,freq='T')
ts=pd.Series(np.arange(12),index=rng)
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [67]:
# 5분 단위로 묶어서 그룹의 합 집계
ts.resample('5min',closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [None]:
#466쪽부터!!!!!
ts.resample('5min')