# 时间序列

## 1.日期和时间数据的类型及工具

In [2]:
from datetime import datetime

In [3]:
now = datetime.now()

In [4]:
now

datetime.datetime(2019, 7, 17, 10, 44, 14, 486764)

In [5]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)

In [6]:
delta

datetime.timedelta(days=926, seconds=56700)

In [7]:
delta.days

926

In [8]:
delta.seconds

56700

In [9]:
from datetime import timedelta

In [10]:
start = datetime(2011, 1, 7)

In [11]:
start + timedelta(12)

datetime.datetime(2011, 1, 19, 0, 0)

In [12]:
start - 2 * timedelta(2010, 12, 14, 0, 0)

datetime.datetime(2000, 1, 4, 23, 59, 35, 999972)

### 1.1字符串与datetime互相转换

In [13]:
stamp = datetime(2011, 1, 3)

In [14]:
str(stamp)

'2011-01-03 00:00:00'

In [15]:
stamp.strftime('%Y-%m-%d')

'2011-01-03'

## 2.时间序列基础

In [21]:
import pandas as pd
import numpy as np
from datetime import datetime
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
           datetime(2011, 1, 7), datetime(2011, 1, 8),
            datetime(2011, 1, 10), datetime(2011, 1, 12)]

In [22]:
ts = pd.Series(np.random.randn(6), index = dates)

In [23]:
ts

2011-01-02   -0.392374
2011-01-05   -1.083016
2011-01-07    1.716908
2011-01-08    0.693235
2011-01-10   -0.757538
2011-01-12   -1.829684
dtype: float64

In [25]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

### 2.1索引、选择、子集

In [27]:
stamp = ts.index[2]

In [28]:
ts[stamp]

1.716907874163722

In [29]:
ts['1/10/2011']

-0.757538437804975

In [30]:
ts['20110110']

-0.757538437804975

In [31]:
longer_ts = pd.Series(np.random.randn(1000),
                     index=pd.date_range('1/1/2000',periods=1000))

In [32]:
longer_ts

2000-01-01    0.126487
2000-01-02   -0.417579
2000-01-03   -0.325051
2000-01-04   -1.702581
2000-01-05    0.080871
2000-01-06    0.390307
2000-01-07   -0.511274
2000-01-08   -0.124722
2000-01-09    1.184997
2000-01-10    0.367508
2000-01-11    0.279641
2000-01-12   -1.453132
2000-01-13   -2.365855
2000-01-14    0.685325
2000-01-15    1.784815
2000-01-16    1.352767
2000-01-17    0.352040
2000-01-18   -0.919583
2000-01-19    1.106988
2000-01-20    2.003484
2000-01-21    1.557487
2000-01-22    1.522660
2000-01-23   -0.740284
2000-01-24   -0.476469
2000-01-25    0.354254
2000-01-26   -1.338284
2000-01-27    0.246052
2000-01-28    0.438786
2000-01-29    0.151175
2000-01-30   -0.975845
                ...   
2002-08-28   -1.027306
2002-08-29    0.647662
2002-08-30    0.938883
2002-08-31   -0.568889
2002-09-01    0.771965
2002-09-02    1.113373
2002-09-03   -0.175449
2002-09-04   -0.878745
2002-09-05   -0.072702
2002-09-06   -0.072121
2002-09-07   -0.335597
2002-09-08   -0.320109
2002-09-09 

In [33]:
longer_ts['2001']

2001-01-01   -0.409416
2001-01-02    1.047045
2001-01-03   -0.717226
2001-01-04    0.358451
2001-01-05    0.213129
2001-01-06   -0.012609
2001-01-07   -0.700740
2001-01-08   -0.558854
2001-01-09    0.503307
2001-01-10   -0.391864
2001-01-11   -0.988198
2001-01-12    0.384387
2001-01-13    1.011376
2001-01-14   -0.517550
2001-01-15    1.096463
2001-01-16   -0.805721
2001-01-17   -0.334484
2001-01-18   -0.155500
2001-01-19    0.927840
2001-01-20   -0.800277
2001-01-21   -2.272114
2001-01-22   -1.312795
2001-01-23   -0.464215
2001-01-24   -0.842336
2001-01-25   -0.921327
2001-01-26   -0.983329
2001-01-27   -0.382154
2001-01-28   -0.648517
2001-01-29   -1.602005
2001-01-30    2.078163
                ...   
2001-12-02   -0.146827
2001-12-03    0.075930
2001-12-04    0.856824
2001-12-05   -2.227999
2001-12-06    0.087318
2001-12-07    0.611067
2001-12-08    0.943484
2001-12-09   -0.042029
2001-12-10    0.297663
2001-12-11   -1.392426
2001-12-12    1.752763
2001-12-13   -0.136317
2001-12-14 

In [34]:
longer_ts['2001-05']

2001-05-01   -0.437292
2001-05-02   -0.529006
2001-05-03    0.467316
2001-05-04   -0.042167
2001-05-05   -1.569622
2001-05-06   -1.118406
2001-05-07    0.989573
2001-05-08    1.092106
2001-05-09   -0.635354
2001-05-10    0.124278
2001-05-11    1.147507
2001-05-12    0.692716
2001-05-13    0.207057
2001-05-14   -1.950881
2001-05-15   -0.644028
2001-05-16   -1.311651
2001-05-17    0.606178
2001-05-18    0.305768
2001-05-19   -2.883771
2001-05-20    0.240651
2001-05-21   -1.486320
2001-05-22    0.140124
2001-05-23   -1.136429
2001-05-24    1.983669
2001-05-25   -0.114681
2001-05-26   -0.035814
2001-05-27    1.007492
2001-05-28   -0.263703
2001-05-29   -0.722107
2001-05-30    1.886241
2001-05-31    0.166348
Freq: D, dtype: float64

In [36]:
ts[datetime(2011, 1, 7):]

2011-01-07    1.716908
2011-01-08    0.693235
2011-01-10   -0.757538
2011-01-12   -1.829684
dtype: float64

### 2.2含有重复时间索引

In [37]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                     '1/2/2000', '1/3/2000'])

In [38]:
dup_ts = pd.Series(np.arange(5), index = dates)

In [39]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [40]:
dup_ts.index.is_unique

False

In [41]:
dup_ts['1/3/2000']

4

In [42]:
dup_ts['1/2/2000']

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [43]:
grouped = dup_ts.groupby(level = 0)

In [44]:
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int32

In [45]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

## 3.日期范围、频率和移位

In [46]:
ts 

2011-01-02   -0.392374
2011-01-05   -1.083016
2011-01-07    1.716908
2011-01-08    0.693235
2011-01-10   -0.757538
2011-01-12   -1.829684
dtype: float64

In [47]:
resampler = ts.resample('D')

### 3.1生成日期范围

In [48]:
index = pd.date_range('2012-04-01', '2012-06-01')

In [49]:
index 

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

In [50]:
pd.date_range(start = '2012-04-01', periods = 20)

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [51]:
pd.date_range(end = '2012-06-01', periods = 20)

DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

In [52]:
pd.date_range('2000-01-01', '2000-12-01', freq = 'BM')

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [53]:
pd.date_range('2012-05-02 12:56:31', periods = 5)

DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

In [54]:
pd.date_range('2012-05-02 12:56:31', periods = 5, normalize = True)

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

### 3,2频率和日期偏置

In [55]:
from pandas.tseries.offsets import Hour, Minute

In [56]:
hour = Hour()
hour

<Hour>

In [57]:
four_hours = Hour(4)
four_hours

<4 * Hours>

In [58]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq = '4H')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [59]:
Hour(2) + Minute(30)

<150 * Minutes>

In [60]:
pd.date_range('2000-01-01', periods = 10, freq = '1h30min')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',
               '2000-01-01 03:00:00', '2000-01-01 04:30:00',
               '2000-01-01 06:00:00', '2000-01-01 07:30:00',
               '2000-01-01 09:00:00', '2000-01-01 10:30:00',
               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],
              dtype='datetime64[ns]', freq='90T')

In [61]:
rng = pd.date_range('2012-01-01', '2012-09-01', freq = 'WOM-3FRI')

In [62]:
list(rng)

[Timestamp('2012-01-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-02-17 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-03-16 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-04-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-05-18 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-06-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-07-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-08-17 00:00:00', freq='WOM-3FRI')]

### 3.3移动(超前和滞后)数据

In [63]:
ts = pd.Series(np.random.randn(4),
              index = pd.date_range('1/1/2000', periods = 4, freq = 'M'))
ts

2000-01-31   -0.540900
2000-02-29    1.112211
2000-03-31    0.177500
2000-04-30    1.302798
Freq: M, dtype: float64

In [64]:
ts.shift(2) 

2000-01-31         NaN
2000-02-29         NaN
2000-03-31   -0.540900
2000-04-30    1.112211
Freq: M, dtype: float64

In [65]:
ts/ts.shift(1) - 1

2000-01-31         NaN
2000-02-29   -3.056224
2000-03-31   -0.840408
2000-04-30    6.339715
Freq: M, dtype: float64

In [66]:
ts.shift(2, freq = 'M')

2000-03-31   -0.540900
2000-04-30    1.112211
2000-05-31    0.177500
2000-06-30    1.302798
Freq: M, dtype: float64

In [67]:
ts.shift(3, freq = 'D')

2000-02-03   -0.540900
2000-03-03    1.112211
2000-04-03    0.177500
2000-05-03    1.302798
dtype: float64

In [68]:
from pandas.tseries.offsets import Day, MonthEnd

In [69]:
now = datetime(2011, 11, 17)

In [70]:
now + 3 * Day()

Timestamp('2011-11-20 00:00:00')

In [71]:
now + MonthEnd()

Timestamp('2011-11-30 00:00:00')

In [72]:
now + MonthEnd(2)

Timestamp('2011-12-31 00:00:00')

In [73]:
offset = MonthEnd()

In [74]:
offset

<MonthEnd>

## 4.时区处理

In [76]:
import pytz

In [77]:
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [78]:
tz = pytz.timezone('America/New_York')
tz

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

### 4.1时区的本地化和转换

In [80]:
rng = pd.date_range('3/9/2012 9:30', periods = 6, freq = 'D')

In [81]:
ts = pd.Series(np.random.randn(len(rng)), index = rng)

In [82]:
ts

2012-03-09 09:30:00    1.225194
2012-03-10 09:30:00    0.120310
2012-03-11 09:30:00   -1.152259
2012-03-12 09:30:00    1.002586
2012-03-13 09:30:00   -0.391241
2012-03-14 09:30:00    0.299162
Freq: D, dtype: float64

In [83]:
pd.date_range('3/9/2012 9:30', periods = 10, freq = 'D', tz = 'UTC')

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [84]:
ts_utc = ts.tz_localize('UTC')
ts_utc

2012-03-09 09:30:00+00:00    1.225194
2012-03-10 09:30:00+00:00    0.120310
2012-03-11 09:30:00+00:00   -1.152259
2012-03-12 09:30:00+00:00    1.002586
2012-03-13 09:30:00+00:00   -0.391241
2012-03-14 09:30:00+00:00    0.299162
Freq: D, dtype: float64

In [85]:
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [86]:
ts_utc.tz_convert('America/New_York')

2012-03-09 04:30:00-05:00    1.225194
2012-03-10 04:30:00-05:00    0.120310
2012-03-11 05:30:00-04:00   -1.152259
2012-03-12 05:30:00-04:00    1.002586
2012-03-13 05:30:00-04:00   -0.391241
2012-03-14 05:30:00-04:00    0.299162
Freq: D, dtype: float64

In [87]:
ts_eastren = ts.tz_localize('America/New_York')

In [88]:
ts_eastren.tz_convert('UTC')

2012-03-09 14:30:00+00:00    1.225194
2012-03-10 14:30:00+00:00    0.120310
2012-03-11 13:30:00+00:00   -1.152259
2012-03-12 13:30:00+00:00    1.002586
2012-03-13 13:30:00+00:00   -0.391241
2012-03-14 13:30:00+00:00    0.299162
Freq: D, dtype: float64

In [89]:
ts_eastren.tz_convert('Europe/Berlin')

2012-03-09 15:30:00+01:00    1.225194
2012-03-10 15:30:00+01:00    0.120310
2012-03-11 14:30:00+01:00   -1.152259
2012-03-12 14:30:00+01:00    1.002586
2012-03-13 14:30:00+01:00   -0.391241
2012-03-14 14:30:00+01:00    0.299162
Freq: D, dtype: float64

### 4.2时区感知时间戳对象的操作

In [90]:
stamp = pd.Timestamp('2011-03-12 04:00') 

In [91]:
stamp_utc = stamp.tz_localize('UTC')

In [92]:
stamp_utc.tz_convert('America/New_York')

Timestamp('2011-03-11 23:00:00-0500', tz='America/New_York')

In [93]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz = 'Europe/Moscow')
stamp_moscow

Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

In [94]:
stamp_utc.value

1299902400000000000

In [95]:
stamp_utc.tz_convert('America/New_York').value

1299902400000000000

In [96]:
from pandas.tseries.offsets import Hour

In [97]:
stamp = pd.Timestamp('2012-03-12 01:30', tz = 'US/Eastern')
stamp

Timestamp('2012-03-12 01:30:00-0400', tz='US/Eastern')

In [98]:
stamp + Hour()

Timestamp('2012-03-12 02:30:00-0400', tz='US/Eastern')

In [99]:
stamp2 = pd.Timestamp('2012-11-04 00:30', tz = 'US/Eastern')
stamp2

Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [100]:
stamp2 + 2 * Hour()

Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

## 5.时间区间和区间算术

In [101]:
p = pd.Period('2007', freq = 'A-DEC')
p

Period('2007', 'A-DEC')

In [102]:
p + 5

Period('2012', 'A-DEC')

In [103]:
pd.Period('2014', freq = 'A-DEC') - p

7

In [104]:
rng = pd.period_range('2000-01-01', '2000-06-30', freq = 'M')
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [105]:
pd.Series(np.random.randn(6), index = rng)

2000-01   -1.172130
2000-02   -0.732318
2000-03   -0.672429
2000-04    0.726903
2000-05    2.093603
2000-06    1.032219
Freq: M, dtype: float64

In [106]:
p = pd.Period('2007', freq = 'A-DEC')
p

Period('2007', 'A-DEC')

In [107]:
p.asfreq('M', how = 'start')

Period('2007-01', 'M')

### 5.4从数组生成PeriodIndex

In [108]:
import os
os.chdir(r'C:\Users\Van\Desktop\desktop file\pydata-book-2nd-edition')
data = pd.read_csv('examples/macrodata.csv')

In [109]:
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [110]:
data.year

0      1959.0
1      1959.0
2      1959.0
3      1959.0
4      1960.0
5      1960.0
6      1960.0
7      1960.0
8      1961.0
9      1961.0
10     1961.0
11     1961.0
12     1962.0
13     1962.0
14     1962.0
15     1962.0
16     1963.0
17     1963.0
18     1963.0
19     1963.0
20     1964.0
21     1964.0
22     1964.0
23     1964.0
24     1965.0
25     1965.0
26     1965.0
27     1965.0
28     1966.0
29     1966.0
        ...  
173    2002.0
174    2002.0
175    2002.0
176    2003.0
177    2003.0
178    2003.0
179    2003.0
180    2004.0
181    2004.0
182    2004.0
183    2004.0
184    2005.0
185    2005.0
186    2005.0
187    2005.0
188    2006.0
189    2006.0
190    2006.0
191    2006.0
192    2007.0
193    2007.0
194    2007.0
195    2007.0
196    2008.0
197    2008.0
198    2008.0
199    2008.0
200    2009.0
201    2009.0
202    2009.0
Name: year, Length: 203, dtype: float64

In [111]:
data.quarter

0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
5      2.0
6      3.0
7      4.0
8      1.0
9      2.0
10     3.0
11     4.0
12     1.0
13     2.0
14     3.0
15     4.0
16     1.0
17     2.0
18     3.0
19     4.0
20     1.0
21     2.0
22     3.0
23     4.0
24     1.0
25     2.0
26     3.0
27     4.0
28     1.0
29     2.0
      ... 
173    2.0
174    3.0
175    4.0
176    1.0
177    2.0
178    3.0
179    4.0
180    1.0
181    2.0
182    3.0
183    4.0
184    1.0
185    2.0
186    3.0
187    4.0
188    1.0
189    2.0
190    3.0
191    4.0
192    1.0
193    2.0
194    3.0
195    4.0
196    1.0
197    2.0
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, Length: 203, dtype: float64

In [112]:
index = pd.PeriodIndex(year = data.year, quarter = data.quarter, freq = 'Q-DEC')

In [113]:
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [114]:
data.index = index

In [115]:
data.infl

1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
1960Q2    0.14
1960Q3    2.70
1960Q4    1.21
1961Q1   -0.40
1961Q2    1.47
1961Q3    0.80
1961Q4    0.80
1962Q1    2.26
1962Q2    0.13
1962Q3    2.11
1962Q4    0.79
1963Q1    0.53
1963Q2    2.75
1963Q3    0.78
1963Q4    2.46
1964Q1    0.13
1964Q2    0.90
1964Q3    1.29
1964Q4    2.05
1965Q1    1.28
1965Q2    2.54
1965Q3    0.89
1965Q4    2.90
1966Q1    4.99
1966Q2    2.10
          ... 
2002Q2    1.56
2002Q3    2.66
2002Q4    3.08
2003Q1    1.31
2003Q2    1.09
2003Q3    2.60
2003Q4    3.02
2004Q1    2.35
2004Q2    3.61
2004Q3    3.58
2004Q4    2.09
2005Q1    4.15
2005Q2    1.85
2005Q3    9.14
2005Q4    0.40
2006Q1    2.60
2006Q2    3.97
2006Q3   -1.58
2006Q4    3.30
2007Q1    4.58
2007Q2    2.75
2007Q3    3.45
2007Q4    6.38
2008Q1    2.82
2008Q2    8.53
2008Q3   -3.16
2008Q4   -8.79
2009Q1    0.94
2009Q2    3.37
2009Q3    3.56
Freq: Q-DEC, Name: infl, Length: 203, dtype: float64

## 6.重新采样与频率转换

In [116]:
rng = pd.date_range('2000-01-01', periods = 100, freq = 'D')

In [117]:
ts = pd.Series(np.random.randn(len(rng)), index = rng)

In [118]:
ts

2000-01-01   -0.522576
2000-01-02   -0.443457
2000-01-03    0.335032
2000-01-04    0.512513
2000-01-05   -1.445326
2000-01-06   -0.645442
2000-01-07    0.429208
2000-01-08   -0.504588
2000-01-09    1.683914
2000-01-10    0.234568
2000-01-11    1.824012
2000-01-12   -0.874949
2000-01-13    0.669982
2000-01-14   -0.170416
2000-01-15    0.372222
2000-01-16    0.550609
2000-01-17    1.510948
2000-01-18    0.984471
2000-01-19   -0.641956
2000-01-20    0.273531
2000-01-21   -1.612091
2000-01-22   -0.780929
2000-01-23    0.297311
2000-01-24   -1.735439
2000-01-25    1.214612
2000-01-26    0.458352
2000-01-27    1.468400
2000-01-28   -1.293400
2000-01-29    0.246161
2000-01-30   -2.004580
                ...   
2000-03-11   -0.831856
2000-03-12   -0.291741
2000-03-13    1.262087
2000-03-14    0.213273
2000-03-15    0.401052
2000-03-16    0.896311
2000-03-17    0.840325
2000-03-18   -0.514193
2000-03-19   -2.022310
2000-03-20    0.699006
2000-03-21   -0.251188
2000-03-22   -1.769028
2000-03-23 

In [119]:
ts.resample('M').mean()

2000-01-31    0.036079
2000-02-29   -0.127757
2000-03-31   -0.062372
2000-04-30    0.262158
Freq: M, dtype: float64

In [120]:
ts.resample('M', kind = 'period').mean()

2000-01    0.036079
2000-02   -0.127757
2000-03   -0.062372
2000-04    0.262158
Freq: M, dtype: float64

In [121]:
rng = pd.date_range('2000-01-01', periods = 12, freq = 'T')

In [122]:
ts = pd.Series(np.arange(12), index = rng)

In [123]:
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [124]:
ts.resample('5min', closed = 'right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [125]:
ts.resample('5min', closed = 'right', label = 'right', loffset = '-1s').sum()

1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32