In [1]:
import pandas as pd
import numpy as np

### 時間序列
* 時間戳記 timestamp
* 固定周期 period
* 時間間隔 interval

### date_range
* 可指定開始時間
* periods, 週期
* freq, H: 小時, D: 天, M: 月

In [2]:
rng = pd.date_range('2016/07/01', periods=10, freq='D')
rng

DatetimeIndex(['2016-07-01', '2016-07-02', '2016-07-03', '2016-07-04',
               '2016-07-05', '2016-07-06', '2016-07-07', '2016-07-08',
               '2016-07-09', '2016-07-10'],
              dtype='datetime64[ns]', freq='D')

In [15]:
pd.date_range('2016/07/01', periods=10, freq='3D')

DatetimeIndex(['2016-07-01', '2016-07-04', '2016-07-07', '2016-07-10',
               '2016-07-13', '2016-07-16', '2016-07-19', '2016-07-22',
               '2016-07-25', '2016-07-28'],
              dtype='datetime64[ns]', freq='3D')

In [16]:
pd.date_range('2016/07/01', periods=10, freq='M')

DatetimeIndex(['2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',
               '2016-11-30', '2016-12-31', '2017-01-31', '2017-02-28',
               '2017-03-31', '2017-04-30'],
              dtype='datetime64[ns]', freq='M')

In [11]:
time = pd.Series(np.random.randn(20),
                 index=pd.date_range('2016/1/1', periods=20, freq='D'))
print(time)

2016-01-01   -0.578282
2016-01-02   -0.172488
2016-01-03    0.676103
2016-01-04   -0.450154
2016-01-05    0.522910
2016-01-06    1.164747
2016-01-07   -0.406596
2016-01-08    0.512583
2016-01-09    0.680808
2016-01-10   -1.107972
2016-01-11   -0.198233
2016-01-12   -0.555008
2016-01-13    0.483253
2016-01-14    0.465282
2016-01-15    0.613917
2016-01-16    0.023361
2016-01-17    0.719863
2016-01-18    0.441542
2016-01-19   -1.204764
2016-01-20    1.447714
Freq: D, dtype: float64


In [17]:
print(time['2016-01-10'])

-1.1079718002785537


In [18]:
print(time['2016-01-15':'2016-01-20'])

2016-01-15    0.613917
2016-01-16    0.023361
2016-01-17    0.719863
2016-01-18    0.441542
2016-01-19   -1.204764
2016-01-20    1.447714
Freq: D, dtype: float64


### truncate過濾
* before, 包含該日, 該日之後
* after, 包含該日, 該日之前

In [27]:
time.truncate(before='2016-01-10')

2016-01-10   -1.107972
2016-01-11   -0.198233
2016-01-12   -0.555008
2016-01-13    0.483253
2016-01-14    0.465282
2016-01-15    0.613917
2016-01-16    0.023361
2016-01-17    0.719863
2016-01-18    0.441542
2016-01-19   -1.204764
2016-01-20    1.447714
Freq: D, dtype: float64

In [25]:
time.truncate(after='2016-01-10')

2016-01-01   -0.578282
2016-01-02   -0.172488
2016-01-03    0.676103
2016-01-04   -0.450154
2016-01-05    0.522910
2016-01-06    1.164747
2016-01-07   -0.406596
2016-01-08    0.512583
2016-01-09    0.680808
2016-01-10   -1.107972
Freq: D, dtype: float64

In [21]:
data = pd.date_range('2010-01-01','2011-01-01', freq='M')
data

DatetimeIndex(['2010-01-31', '2010-02-28', '2010-03-31', '2010-04-30',
               '2010-05-31', '2010-06-30', '2010-07-31', '2010-08-31',
               '2010-09-30', '2010-10-31', '2010-11-30', '2010-12-31'],
              dtype='datetime64[ns]', freq='M')

In [35]:
pd.Timestamp(2016, 7, 10)

Timestamp('2016-07-10 00:00:00')

In [36]:
pd.Timestamp(2016, 7, 10, 10, 15)

Timestamp('2016-07-10 10:15:00')

In [34]:
pd.Period('2016-01-01')

Period('2016-01-01', 'D')

In [37]:
pd.Timedelta('1 day')

Timedelta('1 days 00:00:00')

In [40]:
pd.Period('2016-01-01 10:10') + pd.Timedelta('1 day')

Period('2016-01-02 10:10', 'T')

In [41]:
p1 = pd.period_range('2016-01-01 10:10', freq='25H', periods=10)
p2 = pd.period_range('2016-01-01 10:10', freq='1D1H', periods=10)

In [42]:
p1 == p2

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [44]:
ts = pd.Series(range(10), pd.date_range('07-10-16 08:00', periods=10, freq='H'))
ts

2016-07-10 08:00:00    0
2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
2016-07-10 12:00:00    4
2016-07-10 13:00:00    5
2016-07-10 14:00:00    6
2016-07-10 15:00:00    7
2016-07-10 16:00:00    8
2016-07-10 17:00:00    9
Freq: H, dtype: int64

In [45]:
ts_period  = ts.to_period()
ts_period

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
2016-07-10 12:00    4
2016-07-10 13:00    5
2016-07-10 14:00    6
2016-07-10 15:00    7
2016-07-10 16:00    8
2016-07-10 17:00    9
Freq: H, dtype: int64

In [46]:
ts_period['2016-07-10 08:30':'2016-07-10 11:45']

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
Freq: H, dtype: int64

In [47]:
ts['2016-07-10 08:30':'2016-07-10 11:45']

2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
Freq: H, dtype: int64