In [5]:
import pandas as pd
import numpy as np
from pandas.tseries.offsets import *

In [1]:

rng = pd.date_range('1/1/2011',periods=72,freq='H')
rng[:5]

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00'],
              dtype='datetime64[ns]', freq='H')

In [3]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts.head()

2011-01-01 00:00:00   -0.802980
2011-01-01 01:00:00   -0.202517
2011-01-01 02:00:00    0.403520
2011-01-01 03:00:00   -0.913172
2011-01-01 04:00:00    0.468549
Freq: H, dtype: float64

In [5]:
# 将间隔频率改为45分钟，前向填充
converted = ts.asfreq('45Min', method='pad')
converted.head()

2011-01-01 00:00:00   -0.802980
2011-01-01 00:45:00   -0.802980
2011-01-01 01:30:00   -0.202517
2011-01-01 02:15:00    0.403520
2011-01-01 03:00:00   -0.913172
Freq: 45T, dtype: float64

In [7]:
# 计算每一天的均值
ts.resample('D').mean()

2011-01-01    0.031038
2011-01-02    0.014778
2011-01-03   -0.035678
Freq: D, dtype: float64

In [9]:
pd.Timestamp(pd.datetime(2012,5,1))

Timestamp('2012-05-01 00:00:00')

In [12]:
pd.Timestamp('2012-05-01')

Timestamp('2012-05-01 00:00:00')

In [11]:
# 时间段
pd.Period('2012-01')

Period('2012-01', 'M')

In [13]:
pd.Period('2012-05', freq='D')

Period('2012-05-01', 'D')

In [14]:
dates = [pd.Timestamp('2012-01-01'),pd.Timestamp('2012-01-02'),pd.Timestamp('2012-01-03')]
ts = pd.Series(np.random.randn(3), dates)
type(ts.index)

pandas.tseries.index.DatetimeIndex

In [15]:
ts.index

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03'], dtype='datetime64[ns]', freq=None)

In [16]:
ts

2012-01-01   -1.078890
2012-01-02   -0.219157
2012-01-03    1.106422
dtype: float64

In [17]:
periods = [pd.Period('2012-01'),pd.Period('2012-02'),pd.Period('2012-03')]
ts = pd.Series(np.random.randn(3), periods)
type(ts.index)

pandas.tseries.period.PeriodIndex

In [18]:
ts.index

PeriodIndex(['2012-01', '2012-02', '2012-03'], dtype='int64', freq='M')

In [19]:
ts

2012-01    0.602544
2012-02   -1.358596
2012-03    0.863196
Freq: M, dtype: float64

## 转为时间戳

In [20]:
# Series 转为 Series
pd.to_datetime(pd.Series(['Jul 31, 2009','2010-01-10',None]))

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

In [21]:
# list 转为 DatetimeIndex
pd.to_datetime(['2005/11/23','2010.12.31'])

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)

In [22]:
pd.to_datetime(['04-01-2012 10:00'], dayfirst=True)

DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None)

In [26]:
pd.to_datetime(['04-01-2010 10:00', '01-14-2014'],dayfirst=True)

DatetimeIndex(['2010-01-04 10:00:00', '2014-01-14 00:00:00'], dtype='datetime64[ns]', freq=None)

pd.to_datetime 和 pd.Timestamp 都可以把时间转化为时间戳，但前者可接受参数dayfirst 或 format

In [24]:
pd.to_datetime('2010/11/12')#

Timestamp('2010-11-12 00:00:00')

In [25]:
pd.Timestamp('2010/11/12')

Timestamp('2010-11-12 00:00:00')

In [27]:
df = pd.DataFrame({'year':[2015,2016],
                  'month':[2,3],
                  'day':[4,5],
                  'hour':[2,3]})
pd.to_datetime(df)

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

In [28]:
# 可以仅用需要的列
pd.to_datetime(df[['year','month','day']])

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

## 无效日期

In [30]:
pd.to_datetime(['2009','asd'],errors='raise')

ValueError: Unknown string format

In [31]:
pd.to_datetime(['2009','asd'],errors='ignore')

array(['2009', 'asd'], dtype=object)

In [32]:
pd.to_datetime(['2009','asd'],errors='coerce')#当解析不出来时用Nat代替

DatetimeIndex(['2009-01-01', 'NaT'], dtype='datetime64[ns]', freq=None)

## 纪元时间戳

In [33]:
pd.to_datetime([1349720105, 1349806505, 1349892905,1349979305, 1350065705], unit='s')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

In [34]:
pd.to_datetime([1349720105, 1349806505, 1349892905,1349979305, 1350065705], unit='ms')

DatetimeIndex(['1970-01-16 14:55:20.105000', '1970-01-16 14:56:46.505000',
               '1970-01-16 14:58:12.905000', '1970-01-16 14:59:39.305000',
               '1970-01-16 15:01:05.705000'],
              dtype='datetime64[ns]', freq=None)

In [35]:
pd.to_datetime([1])

DatetimeIndex(['1970-01-01 00:00:00.000000001'], dtype='datetime64[ns]', freq=None)

In [36]:
pd.to_datetime([1,3.14],unit='s')

DatetimeIndex(['1970-01-01 00:00:01', '1970-01-01 00:00:03'], dtype='datetime64[ns]', freq=None)

## 生成随机时间戳

In [39]:
dates = [pd.datetime(2012,5,1), pd.datetime(2012,5,2), pd.datetime(2012,5,3)]
index = pd.DatetimeIndex(dates)
index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [40]:
index = pd.Index(dates)
index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [41]:
index = pd.date_range('2000-1-1',periods=1000, freq='M')
index

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-30',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-30', '2000-10-31',
               ...
               '2082-07-31', '2082-08-31', '2082-09-30', '2082-10-31',
               '2082-11-30', '2082-12-31', '2083-01-31', '2083-02-28',
               '2083-03-31', '2083-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')

bdate_range is a business day

In [43]:
index = pd.bdate_range('2016-01-01',periods=250) #business day
index

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08', '2016-01-11', '2016-01-12',
               '2016-01-13', '2016-01-14',
               ...
               '2016-12-02', '2016-12-05', '2016-12-06', '2016-12-07',
               '2016-12-08', '2016-12-09', '2016-12-12', '2016-12-13',
               '2016-12-14', '2016-12-15'],
              dtype='datetime64[ns]', length=250, freq='B')

In [42]:
start = pd.datetime(2011, 1, 1)
end = pd.datetime(2012, 1, 1)
rng = pd.date_range(start, end)
rng

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',
               '2011-12-31', '2012-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')

In [46]:
rng = pd.bdate_range(start, end)
rng

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=260, freq='B')

In [47]:
pd.date_range(start, end, freq='BM')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [48]:
pd.date_range(start,end,freq='W')

DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',
               '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',
               '2011-02-27', '2011-03-06', '2011-03-13', '2011-03-20',
               '2011-03-27', '2011-04-03', '2011-04-10', '2011-04-17',
               '2011-04-24', '2011-05-01', '2011-05-08', '2011-05-15',
               '2011-05-22', '2011-05-29', '2011-06-05', '2011-06-12',
               '2011-06-19', '2011-06-26', '2011-07-03', '2011-07-10',
               '2011-07-17', '2011-07-24', '2011-07-31', '2011-08-07',
               '2011-08-14', '2011-08-21', '2011-08-28', '2011-09-04',
               '2011-09-11', '2011-09-18', '2011-09-25', '2011-10-02',
               '2011-10-09', '2011-10-16', '2011-10-23', '2011-10-30',
               '2011-11-06', '2011-11-13', '2011-11-20', '2011-11-27',
               '2011-12-04', '2011-12-11', '2011-12-18', '2011-12-25',
               '2012-01-01'],
              dtype='datetime64[ns]', freq='W-S

## Datetime Index

In [43]:
rng = pd.date_range(start, end, freq='BM')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts.index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [51]:
ts[:5].index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31'],
              dtype='datetime64[ns]', freq='BM')

In [52]:
ts[::2].index

DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-29',
               '2011-09-30', '2011-11-30'],
              dtype='datetime64[ns]', freq='2BM')

In [53]:
ts['1/31/2011']

-1.3890001039235427

In [54]:
ts[pd.datetime(2011,12,25):]

2011-12-30   -1.815709
Freq: BM, dtype: float64

In [56]:
ts['10/31/2011':'12/31/2011']

2011-10-31   -1.530486
2011-11-30    1.032298
2011-12-30   -1.815709
Freq: BM, dtype: float64

In [57]:
ts['2011']

2011-01-31   -1.389000
2011-02-28   -1.590800
2011-03-31   -1.299550
2011-04-29   -0.868057
2011-05-31   -0.471553
2011-06-30   -2.623701
2011-07-29   -0.336142
2011-08-31   -0.654795
2011-09-30   -0.287845
2011-10-31   -1.530486
2011-11-30    1.032298
2011-12-30   -1.815709
Freq: BM, dtype: float64

In [58]:
ts['2011-6']

2011-06-30   -2.623701
Freq: BM, dtype: float64

In [60]:
dft = pd.DataFrame(np.random.randn(100000, 1),columns=['A'],
                  index = pd.date_range('20130101',periods=100000,freq='T'))
dft

Unnamed: 0,A
2013-01-01 00:00:00,0.487937
2013-01-01 00:01:00,1.863481
2013-01-01 00:02:00,-2.619854
2013-01-01 00:03:00,0.688805
2013-01-01 00:04:00,-0.119478
2013-01-01 00:05:00,0.846206
2013-01-01 00:06:00,-0.377208
2013-01-01 00:07:00,-0.586735
2013-01-01 00:08:00,-0.015885
2013-01-01 00:09:00,1.778815


In [61]:
dft['2013']

Unnamed: 0,A
2013-01-01 00:00:00,0.487937
2013-01-01 00:01:00,1.863481
2013-01-01 00:02:00,-2.619854
2013-01-01 00:03:00,0.688805
2013-01-01 00:04:00,-0.119478
2013-01-01 00:05:00,0.846206
2013-01-01 00:06:00,-0.377208
2013-01-01 00:07:00,-0.586735
2013-01-01 00:08:00,-0.015885
2013-01-01 00:09:00,1.778815


In [62]:
dft['2013-1':'2013-2']

Unnamed: 0,A
2013-01-01 00:00:00,0.487937
2013-01-01 00:01:00,1.863481
2013-01-01 00:02:00,-2.619854
2013-01-01 00:03:00,0.688805
2013-01-01 00:04:00,-0.119478
2013-01-01 00:05:00,0.846206
2013-01-01 00:06:00,-0.377208
2013-01-01 00:07:00,-0.586735
2013-01-01 00:08:00,-0.015885
2013-01-01 00:09:00,1.778815


In [64]:
dft.loc['2013-1-15 12:30:00'] #不是切片需要用loc

A   -0.642778
Name: 2013-01-15 12:30:00, dtype: float64

In [66]:
dft.truncate(before='2013-1-15',after='2013-1-25')#按范围切片

Unnamed: 0,A
2013-01-15 00:00:00,-1.260134
2013-01-15 00:01:00,-0.048268
2013-01-15 00:02:00,0.125487
2013-01-15 00:03:00,0.606289
2013-01-15 00:04:00,0.220386
2013-01-15 00:05:00,0.875749
2013-01-15 00:06:00,0.109804
2013-01-15 00:07:00,-0.497066
2013-01-15 00:08:00,-0.415658
2013-01-15 00:09:00,0.111308


In [71]:
ts[[0,2,6]].index #ts是series可以用[[]], dft是Dataframe不可以用[[]]

DatetimeIndex(['2011-01-31', '2011-03-31', '2011-07-29'], dtype='datetime64[ns]', freq=None)

In [74]:
dft.ix[[0,2,6],:].index

DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 00:02:00',
               '2013-01-01 00:06:00'],
              dtype='datetime64[ns]', freq=None)

In [76]:
d = pd.datetime(2008,8,18,9)
d

datetime.datetime(2008, 8, 18, 9, 0)

### from pandas.tseries.offsets import *
### pandas.tseries.offsets 

In [80]:

d + DateOffset(months=4)

Timestamp('2008-12-18 09:00:00')

In [81]:
d - 5*BDay()

Timestamp('2008-08-11 09:00:00')

In [82]:
d + BMonthEnd()

Timestamp('2008-08-29 09:00:00')

## Parametric offsets

In [6]:
d = pd.datetime(2016,6,21,9,18)
d + Week()

Timestamp('2016-06-28 09:18:00')

In [7]:
d + Week(weekday=4)

Timestamp('2016-06-24 09:18:00')

In [8]:
d + YearEnd()

Timestamp('2016-12-31 09:18:00')

In [9]:
rng = pd.date_range('2012-01-01','2012-01-03')
s = pd.Series(rng)
rng

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03'], dtype='datetime64[ns]', freq='D')

In [10]:
rng + DateOffset(months=2)

DatetimeIndex(['2012-03-01', '2012-03-02', '2012-03-03'], dtype='datetime64[ns]', freq='D')

In [16]:
 t = s - Day(2)

In [17]:
td = s - t
td

0   2 days
1   2 days
2   2 days
dtype: timedelta64[ns]

In [18]:
td + Minute(15)

0   2 days 00:15:00
1   2 days 00:15:00
2   2 days 00:15:00
dtype: timedelta64[ns]

In [23]:
from pandas.tseries.offsets import CustomBusinessDay
weekmask_egypt = 'Sun Mon Tue Wed Thu'
holidays = ['2012-05-01', pd.datetime(2013, 5, 1), np.datetime64('2014-05-01')]
bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt)
dt = pd.datetime(2013,4,30)
dt + 2 * bday_egypt

Timestamp('2013-05-05 00:00:00')

In [25]:
#映射周名
dts = pd.date_range(dt, periods=5, freq=bday_egypt)
pd.Series(dts.weekday, dts).map(pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split()))

2013-04-30    Tue
2013-05-02    Thu
2013-05-05    Sun
2013-05-06    Mon
2013-05-07    Tue
Freq: C, dtype: object

In [26]:
from pandas.tseries.holiday import USFederalHolidayCalendar
bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())
dt = pd.datetime(2014,1,17)
dt + bday_us

Timestamp('2014-01-21 00:00:00')

In [28]:
from pandas.tseries.offsets import CustomBusinessMonthBegin
bmth_us = CustomBusinessMonthBegin(calendar = USFederalHolidayCalendar())
dt = pd.datetime(2013,12,17)
dt + bmth_us

Timestamp('2014-01-02 00:00:00')

In [29]:
pd.DatetimeIndex(start='20100101',end='20120101',freq=bmth_us)

DatetimeIndex(['2010-01-04', '2010-02-01', '2010-03-01', '2010-04-01',
               '2010-05-03', '2010-06-01', '2010-07-01', '2010-08-02',
               '2010-09-01', '2010-10-01', '2010-11-01', '2010-12-01',
               '2011-01-03', '2011-02-01', '2011-03-01', '2011-04-01',
               '2011-05-02', '2011-06-01', '2011-07-01', '2011-08-01',
               '2011-09-01', '2011-10-03', '2011-11-01', '2011-12-01'],
              dtype='datetime64[ns]', freq='CBMS')

In [30]:
bh = BusinessHour()
bh

<BusinessHour: BH=09:00-17:00>

In [31]:
pd.Timestamp('2014-08-01 10:00').weekday()

4

In [32]:
pd.Timestamp('2014-08-01 10:00') + bh

Timestamp('2014-08-01 11:00:00')

In [33]:
pd.Timestamp('2014-08-01 08:00') + bh

Timestamp('2014-08-01 10:00:00')

In [34]:
bh = BusinessHour(start='17:00', end='09:00')
bh

<BusinessHour: BH=17:00-09:00>

In [37]:
pd.date_range(start='10:00', periods=5, freq='B')

DatetimeIndex(['2016-12-27 10:00:00', '2016-12-28 10:00:00',
               '2016-12-29 10:00:00', '2016-12-30 10:00:00',
               '2017-01-02 10:00:00'],
              dtype='datetime64[ns]', freq='B')

In [39]:
pd.date_range(start='10:00',periods=5, freq=BDay())

DatetimeIndex(['2016-12-27 10:00:00', '2016-12-28 10:00:00',
               '2016-12-29 10:00:00', '2016-12-30 10:00:00',
               '2017-01-02 10:00:00'],
              dtype='datetime64[ns]', freq='B')

In [44]:
ts[:5]

2011-01-31    0.001199
2011-02-28    0.297901
2011-03-31   -1.584172
2011-04-29    1.753769
2011-05-31    1.325897
Freq: BM, dtype: float64

In [46]:
ts.shift(1) #向后退一位

2011-01-31         NaN
2011-02-28    0.001199
2011-03-31    0.297901
2011-04-29   -1.584172
2011-05-31    1.753769
2011-06-30    1.325897
2011-07-29   -0.834254
2011-08-31   -1.761172
2011-09-30    0.526155
2011-10-31    1.458577
2011-11-30   -0.367685
2011-12-30   -0.169263
Freq: BM, dtype: float64

In [47]:
ts.shift(5, freq=BDay())

2011-02-07    0.001199
2011-03-07    0.297901
2011-04-07   -1.584172
2011-05-06    1.753769
2011-06-07    1.325897
2011-07-07   -0.834254
2011-08-05   -1.761172
2011-09-07    0.526155
2011-10-07    1.458577
2011-11-07   -0.367685
2011-12-07   -0.169263
2012-01-06    0.407352
dtype: float64

In [48]:
ts.shift(5, freq='BM')

2011-06-30    0.001199
2011-07-29    0.297901
2011-08-31   -1.584172
2011-09-30    1.753769
2011-10-31    1.325897
2011-11-30   -0.834254
2011-12-30   -1.761172
2012-01-31    0.526155
2012-02-29    1.458577
2012-03-30   -0.367685
2012-04-30   -0.169263
2012-05-31    0.407352
Freq: BM, dtype: float64

In [50]:
ts.tshift(5, freq='D')

2011-02-05    0.001199
2011-03-05    0.297901
2011-04-05   -1.584172
2011-05-04    1.753769
2011-06-05    1.325897
2011-07-05   -0.834254
2011-08-03   -1.761172
2011-09-05    0.526155
2011-10-05    1.458577
2011-11-05   -0.367685
2011-12-05   -0.169263
2012-01-04    0.407352
dtype: float64

## 重采样

In [51]:
ts.resample('5Min').ohlc()#开盘、高、低、收盘

Unnamed: 0,open,high,low,close
2011-01-31 00:00:00,0.001199,0.001199,0.001199,0.001199
2011-01-31 00:05:00,,,,
2011-01-31 00:10:00,,,,
2011-01-31 00:15:00,,,,
2011-01-31 00:20:00,,,,
2011-01-31 00:25:00,,,,
2011-01-31 00:30:00,,,,
2011-01-31 00:35:00,,,,
2011-01-31 00:40:00,,,,
2011-01-31 00:45:00,,,,


## 上采样

In [54]:
ts[:2]

2011-01-31    0.001199
2011-02-28    0.297901
Freq: BM, dtype: float64

In [52]:
ts[:2].resample('250L').asfreq()# from secondly to every 250 milliseconds

2011-01-31 00:00:00.000    0.001199
2011-01-31 00:00:00.250         NaN
2011-01-31 00:00:00.500         NaN
2011-01-31 00:00:00.750         NaN
2011-01-31 00:00:01.000         NaN
2011-01-31 00:00:01.250         NaN
2011-01-31 00:00:01.500         NaN
2011-01-31 00:00:01.750         NaN
2011-01-31 00:00:02.000         NaN
2011-01-31 00:00:02.250         NaN
2011-01-31 00:00:02.500         NaN
2011-01-31 00:00:02.750         NaN
2011-01-31 00:00:03.000         NaN
2011-01-31 00:00:03.250         NaN
2011-01-31 00:00:03.500         NaN
2011-01-31 00:00:03.750         NaN
2011-01-31 00:00:04.000         NaN
2011-01-31 00:00:04.250         NaN
2011-01-31 00:00:04.500         NaN
2011-01-31 00:00:04.750         NaN
2011-01-31 00:00:05.000         NaN
2011-01-31 00:00:05.250         NaN
2011-01-31 00:00:05.500         NaN
2011-01-31 00:00:05.750         NaN
2011-01-31 00:00:06.000         NaN
2011-01-31 00:00:06.250         NaN
2011-01-31 00:00:06.500         NaN
2011-01-31 00:00:06.750     

In [53]:
ts[:2].resample('250L').ffill(limit=2)

2011-01-31 00:00:00.000    0.001199
2011-01-31 00:00:00.250    0.001199
2011-01-31 00:00:00.500    0.001199
2011-01-31 00:00:00.750    0.001199
2011-01-31 00:00:01.000    0.001199
2011-01-31 00:00:01.250    0.001199
2011-01-31 00:00:01.500    0.001199
2011-01-31 00:00:01.750    0.001199
2011-01-31 00:00:02.000    0.001199
2011-01-31 00:00:02.250    0.001199
2011-01-31 00:00:02.500    0.001199
2011-01-31 00:00:02.750    0.001199
2011-01-31 00:00:03.000    0.001199
2011-01-31 00:00:03.250    0.001199
2011-01-31 00:00:03.500    0.001199
2011-01-31 00:00:03.750    0.001199
2011-01-31 00:00:04.000    0.001199
2011-01-31 00:00:04.250    0.001199
2011-01-31 00:00:04.500    0.001199
2011-01-31 00:00:04.750    0.001199
2011-01-31 00:00:05.000    0.001199
2011-01-31 00:00:05.250    0.001199
2011-01-31 00:00:05.500    0.001199
2011-01-31 00:00:05.750    0.001199
2011-01-31 00:00:06.000    0.001199
2011-01-31 00:00:06.250    0.001199
2011-01-31 00:00:06.500    0.001199
2011-01-31 00:00:06.750    0

## 稀疏抽样

In [55]:
rng = pd.date_range('2014-1-1', periods=100, freq='D') + pd.Timedelta('1s')
ts = pd.Series(range(100), index=rng)
ts.resample('3T').sum()

2014-01-01 00:00:00     0.0
2014-01-01 00:03:00     NaN
2014-01-01 00:06:00     NaN
2014-01-01 00:09:00     NaN
2014-01-01 00:12:00     NaN
2014-01-01 00:15:00     NaN
2014-01-01 00:18:00     NaN
2014-01-01 00:21:00     NaN
2014-01-01 00:24:00     NaN
2014-01-01 00:27:00     NaN
2014-01-01 00:30:00     NaN
2014-01-01 00:33:00     NaN
2014-01-01 00:36:00     NaN
2014-01-01 00:39:00     NaN
2014-01-01 00:42:00     NaN
2014-01-01 00:45:00     NaN
2014-01-01 00:48:00     NaN
2014-01-01 00:51:00     NaN
2014-01-01 00:54:00     NaN
2014-01-01 00:57:00     NaN
2014-01-01 01:00:00     NaN
2014-01-01 01:03:00     NaN
2014-01-01 01:06:00     NaN
2014-01-01 01:09:00     NaN
2014-01-01 01:12:00     NaN
2014-01-01 01:15:00     NaN
2014-01-01 01:18:00     NaN
2014-01-01 01:21:00     NaN
2014-01-01 01:24:00     NaN
2014-01-01 01:27:00     NaN
                       ... 
2014-04-09 22:33:00     NaN
2014-04-09 22:36:00     NaN
2014-04-09 22:39:00     NaN
2014-04-09 22:42:00     NaN
2014-04-09 22:45:00 

In [57]:
from functools import partial
from pandas.tseries.frequencies import to_offset
def round(t, freq):
    freq = to_offset(freq)
    return pd.Timestamp((t.value//freq.delta.value)*freq.delta.value)
ts.groupby(partial(round, freq='3T')).sum()

2014-01-01     0
2014-01-02     1
2014-01-03     2
2014-01-04     3
2014-01-05     4
2014-01-06     5
2014-01-07     6
2014-01-08     7
2014-01-09     8
2014-01-10     9
2014-01-11    10
2014-01-12    11
2014-01-13    12
2014-01-14    13
2014-01-15    14
2014-01-16    15
2014-01-17    16
2014-01-18    17
2014-01-19    18
2014-01-20    19
2014-01-21    20
2014-01-22    21
2014-01-23    22
2014-01-24    23
2014-01-25    24
2014-01-26    25
2014-01-27    26
2014-01-28    27
2014-01-29    28
2014-01-30    29
              ..
2014-03-12    70
2014-03-13    71
2014-03-14    72
2014-03-15    73
2014-03-16    74
2014-03-17    75
2014-03-18    76
2014-03-19    77
2014-03-20    78
2014-03-21    79
2014-03-22    80
2014-03-23    81
2014-03-24    82
2014-03-25    83
2014-03-26    84
2014-03-27    85
2014-03-28    86
2014-03-29    87
2014-03-30    88
2014-03-31    89
2014-04-01    90
2014-04-02    91
2014-04-03    92
2014-04-04    93
2014-04-05    94
2014-04-06    95
2014-04-07    96
2014-04-08    

In [58]:
df = pd.DataFrame(np.random.randn(1000, 3),
                 index = pd.date_range('1/1/2012', freq='S', periods=1000),
                 columns=['A','B','C'])
r = df.resample('3T')
r.mean()

Unnamed: 0,A,B,C
2012-01-01 00:00:00,-0.088428,-0.071741,0.027449
2012-01-01 00:03:00,0.003197,0.087279,-0.033199
2012-01-01 00:06:00,0.094311,0.089504,-0.076706
2012-01-01 00:09:00,0.035126,-0.042229,0.033392
2012-01-01 00:12:00,0.069101,-0.01761,-0.064334
2012-01-01 00:15:00,-0.038412,0.001196,0.03734


In [59]:
r['A'].agg({'result1':np.sum,
           'result2':np.mean})

Unnamed: 0,result2,result1
2012-01-01 00:00:00,-0.088428,-15.916967
2012-01-01 00:03:00,0.003197,0.575417
2012-01-01 00:06:00,0.094311,16.975921
2012-01-01 00:09:00,0.035126,6.322742
2012-01-01 00:12:00,0.069101,12.438137
2012-01-01 00:15:00,-0.038412,-3.841194


In [60]:
r['A'].agg([np.sum, np.mean, np.std])

Unnamed: 0,sum,mean,std
2012-01-01 00:00:00,-15.916967,-0.088428,1.029585
2012-01-01 00:03:00,0.575417,0.003197,0.958569
2012-01-01 00:06:00,16.975921,0.094311,1.02054
2012-01-01 00:09:00,6.322742,0.035126,1.067892
2012-01-01 00:12:00,12.438137,0.069101,0.929218
2012-01-01 00:15:00,-3.841194,-0.038412,0.813561


In [61]:
r.agg([np.sum, np.mean])

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,sum,mean,sum,mean,sum,mean
2012-01-01 00:00:00,-15.916967,-0.088428,-12.913341,-0.071741,4.940766,0.027449
2012-01-01 00:03:00,0.575417,0.003197,15.710235,0.087279,-5.975861,-0.033199
2012-01-01 00:06:00,16.975921,0.094311,16.110799,0.089504,-13.807121,-0.076706
2012-01-01 00:09:00,6.322742,0.035126,-7.601253,-0.042229,6.0105,0.033392
2012-01-01 00:12:00,12.438137,0.069101,-3.169809,-0.01761,-11.580039,-0.064334
2012-01-01 00:15:00,-3.841194,-0.038412,0.119641,0.001196,3.734033,0.03734


In [62]:
r.agg({'A': np.sum,
      'B': lambda x: np.std(x, ddof=1)})

Unnamed: 0,A,B
2012-01-01 00:00:00,-15.916967,0.912294
2012-01-01 00:03:00,0.575417,0.958724
2012-01-01 00:06:00,16.975921,1.032585
2012-01-01 00:09:00,6.322742,0.905531
2012-01-01 00:12:00,12.438137,1.017796
2012-01-01 00:15:00,-3.841194,0.967114
