# Pandas的时间序列处理 

## 创建

In [2]:
from datetime import datetime
import pandas as pd
import numpy as np

# 指定index为datetime的list
date_list = [datetime(2017, 2, 18), datetime(2017, 2, 19), 
             datetime(2017, 2, 25), datetime(2017, 2, 26), 
             datetime(2017, 3, 4), datetime(2017, 3, 5)]
time_s = pd.Series(np.random.randn(6), index=date_list)
print(time_s)
print(type(time_s.index))

2017-02-18    0.096263
2017-02-19    0.427484
2017-02-25    1.285475
2017-02-26    0.995017
2017-03-04    0.227895
2017-03-05   -0.085762
dtype: float64
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [3]:
# pd.date_range()
dates = pd.date_range('2017-02-18', # 起始日期
                      periods=5,    # 周期
                      freq='W-SAT') # 频率
print(dates)
print(pd.Series(np.random.randn(5), index=dates))

DatetimeIndex(['2017-02-18', '2017-02-25', '2017-03-04', '2017-03-11',
               '2017-03-18'],
              dtype='datetime64[ns]', freq='W-SAT')
2017-02-18   -0.630554
2017-02-25    1.022186
2017-03-04   -0.772116
2017-03-11   -2.520321
2017-03-18   -0.862911
Freq: W-SAT, dtype: float64


## 索引

In [4]:
# 索引位置
print(time_s[0])

0.0962631924357


In [5]:
# 索引值
print(time_s[datetime(2017, 2, 18)])

0.0962631924357


In [6]:
# 可以被解析的日期字符串
print(time_s['2017/02/18'])

0.0962631924357


In [7]:
# 按“年份”、“月份”索引
print(time_s['2017-2'])

2017-02-18    0.096263
2017-02-19    0.427484
2017-02-25    1.285475
2017-02-26    0.995017
dtype: float64


In [8]:
# 切片操作
print(time_s['2017-2-26':])

2017-02-26    0.995017
2017-03-04    0.227895
2017-03-05   -0.085762
dtype: float64


## 过滤


In [9]:
time_s.truncate(before='2017-2-25')

2017-02-25    1.285475
2017-02-26    0.995017
2017-03-04    0.227895
2017-03-05   -0.085762
dtype: float64

In [10]:
time_s.truncate(after='2017-2-25')

2017-02-18    0.096263
2017-02-19    0.427484
2017-02-25    1.285475
dtype: float64

## 生成日期范围

In [11]:
# 传入开始、结束日期，默认生成的该时间段的时间点是按天计算的
date_index = pd.date_range('2017/02/18', '2017/03/18')
print(date_index)

DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
               '2017-02-26', '2017-02-27', '2017-02-28', '2017-03-01',
               '2017-03-02', '2017-03-03', '2017-03-04', '2017-03-05',
               '2017-03-06', '2017-03-07', '2017-03-08', '2017-03-09',
               '2017-03-10', '2017-03-11', '2017-03-12', '2017-03-13',
               '2017-03-14', '2017-03-15', '2017-03-16', '2017-03-17',
               '2017-03-18'],
              dtype='datetime64[ns]', freq='D')


In [12]:
# 只传入开始或结束日期，还需要传入时间段
print(pd.date_range(start='2017/02/18', periods=10))

DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
               '2017-02-26', '2017-02-27'],
              dtype='datetime64[ns]', freq='D')


In [13]:
print(pd.date_range(end='2017/03/18', periods=10))

DatetimeIndex(['2017-03-09', '2017-03-10', '2017-03-11', '2017-03-12',
               '2017-03-13', '2017-03-14', '2017-03-15', '2017-03-16',
               '2017-03-17', '2017-03-18'],
              dtype='datetime64[ns]', freq='D')


In [14]:
# 规范化时间戳 
print(pd.date_range(start='2017/02/18 12:13:14', periods=10))
print(pd.date_range(start='2017/02/18 12:13:14', periods=10, normalize=True))

DatetimeIndex(['2017-02-18 12:13:14', '2017-02-19 12:13:14',
               '2017-02-20 12:13:14', '2017-02-21 12:13:14',
               '2017-02-22 12:13:14', '2017-02-23 12:13:14',
               '2017-02-24 12:13:14', '2017-02-25 12:13:14',
               '2017-02-26 12:13:14', '2017-02-27 12:13:14'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
               '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
               '2017-02-26', '2017-02-27'],
              dtype='datetime64[ns]', freq='D')


## 频率与偏移量

In [15]:
print(pd.date_range('2017/02/18', '2017/03/18', freq='2D'))

DatetimeIndex(['2017-02-18', '2017-02-20', '2017-02-22', '2017-02-24',
               '2017-02-26', '2017-02-28', '2017-03-02', '2017-03-04',
               '2017-03-06', '2017-03-08', '2017-03-10', '2017-03-12',
               '2017-03-14', '2017-03-16', '2017-03-18'],
              dtype='datetime64[ns]', freq='2D')


In [16]:
# 偏移量通过加法连接
sum_offset = pd.tseries.offsets.Week(2) + pd.tseries.offsets.Hour(12)
print(sum_offset)

print(pd.date_range('2017/02/18', '2017/03/18', freq=sum_offset))

14 days 12:00:00
DatetimeIndex(['2017-02-18 00:00:00', '2017-03-04 12:00:00'], dtype='datetime64[ns]', freq='348H')


## 移动数据

In [17]:
ts = pd.Series(np.random.randn(5), index=pd.date_range('20170218', periods=5, freq='W-SAT'))
print(ts)

2017-02-18   -0.734666
2017-02-25    1.399863
2017-03-04   -0.586229
2017-03-11   -0.175909
2017-03-18   -0.329034
Freq: W-SAT, dtype: float64


In [21]:
print(ts.shift(1,freq='2D'))
print(ts.shift(-1))

2017-02-20   -0.734666
2017-02-27    1.399863
2017-03-06   -0.586229
2017-03-13   -0.175909
2017-03-20   -0.329034
Freq: W-MON, dtype: float64
2017-02-18    1.399863
2017-02-25   -0.586229
2017-03-04   -0.175909
2017-03-11   -0.329034
2017-03-18         NaN
Freq: W-SAT, dtype: float64


In [19]:
?ts.shift