# Time Series

In [1]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.set_printoptions(precision=4, suppress=True)

## Date and Time Data Types and Tools

In [3]:
from datetime import datetime
now = datetime.now()
now
now.year, now.month, now.day

(2021, 4, 4)

In [6]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta
delta.days
delta.seconds

56700

In [5]:
from datetime import timedelta
start = datetime(2011, 1, 7)
start + timedelta(12)
start - 2 * timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

### Converting Between String and Datetime

In [8]:
stamp = datetime(2011, 1, 3)
print(str(stamp))
stamp.strftime('%Y-%m-%d')

2011-01-03 00:00:00


'2011-01-03'

In [9]:
value = '2011-01-03'
datetime.strptime(value, '%Y-%m-%d')
datestrs = ['7/6/2011', '8/6/2011']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]

In [10]:
from dateutil.parser import parse
parse('2011-01-03')

datetime.datetime(2011, 1, 3, 0, 0)

In [11]:
parse('Jan 31, 1997 10:45 PM')

datetime.datetime(1997, 1, 31, 22, 45)

In [12]:
parse('6/12/2011', dayfirst=True)

datetime.datetime(2011, 12, 6, 0, 0)

In [15]:
datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [17]:
idx = pd.to_datetime(datestrs + [None])
print(idx)
idx[2]
pd.isnull(idx)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)


array([False, False,  True])

## Time Series Basics

In [18]:
from datetime import datetime
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02   -0.204708
2011-01-05    0.478943
2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
2011-01-12    1.393406
dtype: float64

In [23]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [21]:
ts[::2]

2011-01-02   -0.204708
2011-01-07   -0.519439
2011-01-10    1.965781
dtype: float64

In [20]:
ts + ts[::2] # float + NaN -> NaN

2011-01-02   -0.409415
2011-01-05         NaN
2011-01-07   -1.038877
2011-01-08         NaN
2011-01-10    3.931561
2011-01-12         NaN
dtype: float64

In [22]:
ts.index.dtype

dtype('<M8[ns]')

In [25]:
stamp = ts.index[0]
stamp
stamp?

[1;31mType:[0m        Timestamp
[1;31mString form:[0m 2011-01-02 00:00:00
[1;31mFile:[0m        c:\users\jiang\anaconda3\lib\site-packages\pandas\_libs\tslibs\timestamps.cp37-win_amd64.pyd
[1;31mDocstring:[0m  
Pandas replacement for python datetime.datetime object.

Timestamp is the pandas equivalent of python's Datetime
and is interchangeable with it in most cases. It's the type used
for the entries that make up a DatetimeIndex, and other timeseries
oriented data structures in pandas.

Parameters
----------
ts_input : datetime-like, str, int, float
    Value to be converted to Timestamp.
freq : str, DateOffset
    Offset which Timestamp will have.
tz : str, pytz.timezone, dateutil.tz.tzfile or None
    Time zone for time which Timestamp will have.
unit : str
    Unit used for conversion if ts_input is of type int or float. The
    valid values are 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For
    example, 's' means seconds and 'ms' means milliseconds.
year, month, day : int
h

### Indexing, Selection, Subsetting

In [26]:
stamp = ts.index[2]
ts[stamp]

-0.5194387150567381

In [29]:
print(ts.index)
print(ts['1/10/2011'])  # 可以通过时间字符串作为索引
print(ts['20110110'])

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)
1.9657805725027142
1.9657805725027142


In [32]:
pd.date_range('3/4/2021', periods=10)
pd.date_range?

[1;31mSignature:[0m
[0mpd[0m[1;33m.[0m[0mdate_range[0m[1;33m([0m[1;33m
[0m    [0mstart[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mend[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mperiods[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mfreq[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mtz[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mnormalize[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mname[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mclosed[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [1;33m**[0m[0mkwargs[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [0mpandas[0m[1;33m.[0m[0mcore[0m[1;33m.[0m[0mindexes[0m[1;33m.[0m[0mdatetimes[0m[1;33m.[0m[0mDatetimeIndex[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Return a fixed frequency DatetimeIndex.

Parameters
----------
start : str or datetime-like, optional
    Left 

In [33]:
longer_ts = pd.Series(np.random.randn(1000),
                      index=pd.date_range('1/1/2000', periods=1000))
longer_ts
longer_ts['2001'] # 选择一年的数据

2001-01-01    1.599534
2001-01-02    0.474071
2001-01-03    0.151326
2001-01-04   -0.542173
2001-01-05   -0.475496
                ...   
2001-12-27    0.057874
2001-12-28   -0.433739
2001-12-29    0.092698
2001-12-30   -1.397820
2001-12-31    1.457823
Freq: D, Length: 365, dtype: float64

In [34]:
longer_ts['2001-05'] # 选择一个月的数据

2001-05-01   -0.622547
2001-05-02    0.936289
2001-05-03    0.750018
2001-05-04   -0.056715
2001-05-05    2.300675
                ...   
2001-05-27    0.235477
2001-05-28    0.111835
2001-05-29   -1.251504
2001-05-30   -2.949343
2001-05-31    0.634634
Freq: D, Length: 31, dtype: float64

In [35]:
ts[datetime(2011, 1, 7):] # slice, 选择2011-1-7之后的数据

2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
2011-01-12    1.393406
dtype: float64

In [None]:
ts
ts['1/6/2011':'1/11/2011'] # slice

In [39]:
print(ts)
print(ts.truncate(after='1/9/2011'))
ts.truncate?

2011-01-02   -0.204708
2011-01-05    0.478943
2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
2011-01-12    1.393406
dtype: float64
2011-01-02   -0.204708
2011-01-05    0.478943
2011-01-07   -0.519439
2011-01-08   -0.555730
dtype: float64


[1;31mSignature:[0m [0mts[0m[1;33m.[0m[0mtruncate[0m[1;33m([0m[0mbefore[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [0mafter[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [0maxis[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [0mcopy[0m[1;33m:[0m [1;34m'bool_t'[0m [1;33m=[0m [1;32mTrue[0m[1;33m)[0m [1;33m->[0m [1;34m'FrameOrSeries'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Truncate a Series or DataFrame before and after some index value.

This is a useful shorthand for boolean indexing based on index
values above or below certain thresholds.

Parameters
----------
before : date, str, int
    Truncate all rows before this index value.
after : date, str, int
    Truncate all rows after this index value.
axis : {0 or 'index', 1 or 'columns'}, optional
    Axis to truncate. Truncates the index (rows) by default.
copy : bool, default is True,
    Return a copy of the truncated section.

Returns
-------
type of caller
    The truncated Series or DataFrame.

See 

In [43]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4),
                       index=dates,
                       columns=['Colorado', 'Texas',
                                'New York', 'Ohio'])
long_df.loc['5-2001'] # 获取一个月的数据

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,-0.006045,0.490094,-0.277186,-0.707213
2001-05-09,-0.560107,2.735527,0.927335,1.513906
2001-05-16,0.5386,1.273768,0.667876,-0.969206
2001-05-23,1.676091,-0.817649,0.050188,1.951312
2001-05-30,3.260383,0.963301,1.201206,-1.852001


### Time Series with Duplicate Indices

In [44]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                          '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [45]:
dup_ts.index.is_unique

False

In [46]:
dup_ts['1/3/2000']  # not duplicated
dup_ts['1/2/2000']  # duplicated

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [47]:
grouped = dup_ts.groupby(level=0)
grouped.mean()
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

## Date Ranges, Frequencies, and Shifting

In [50]:
ts
print(ts)
resampler = ts.resample('D')
resampler

2011-01-02   -0.204708
2011-01-05    0.478943
2011-01-07   -0.519439
2011-01-08   -0.555730
2011-01-10    1.965781
2011-01-12    1.393406
dtype: float64


<pandas.core.resample.DatetimeIndexResampler object at 0x000001F6C1A058C8>

### Generating Date Ranges

In [51]:
index = pd.date_range('2012-04-01', '2012-06-01')
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

In [52]:
print(pd.date_range(start='2012-04-01', periods=20))
pd.date_range(end='2012-06-01', periods=20)

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')


DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

In [55]:
pd.date_range('2000-01-01', '2000-12-02', freq='BM') # business month end

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [56]:
pd.date_range('2012-05-02 12:56:31', periods=5)

DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

In [59]:
pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

### Frequencies and Date Offsets

In [61]:
from pandas.tseries.offsets import Hour, Minute
hour = Hour()
hour?

[1;31mCall signature:[0m  [0mhour[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mType:[0m            Hour
[1;31mString form:[0m     <Hour>
[1;31mFile:[0m            c:\users\jiang\anaconda3\lib\site-packages\pandas\_libs\tslibs\offsets.cp37-win_amd64.pyd
[1;31mDocstring:[0m       <no docstring>
[1;31mClass docstring:[0m Base class for DateOffset methods that are not overridden by subclasses.


In [62]:
four_hours = Hour(4)
four_hours

<4 * Hours>

In [63]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [64]:
Hour(2) + Minute(30)

<150 * Minutes>

In [65]:
pd.date_range('2000-01-01', periods=10, freq='1h30min')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',
               '2000-01-01 03:00:00', '2000-01-01 04:30:00',
               '2000-01-01 06:00:00', '2000-01-01 07:30:00',
               '2000-01-01 09:00:00', '2000-01-01 10:30:00',
               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],
              dtype='datetime64[ns]', freq='90T')

#### Week of month dates

In [66]:
rng = pd.date_range('2012-01-01', '2012-09-01', freq='WOM-3FRI')
list(rng)

[Timestamp('2012-01-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-02-17 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-03-16 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-04-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-05-18 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-06-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-07-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-08-17 00:00:00', freq='WOM-3FRI')]

### Shifting (Leading and Lagging) Data

In [77]:
ts = pd.Series(np.random.randn(4),
               index=pd.date_range('1/1/2000', periods=4, freq='M'))
# ts.shift?
print(ts)
ts.shift(2)     # 向上移动index
ts.shift(-2)    # 向下移动index

2000-01-31   -0.797246
2000-02-29    0.472879
2000-03-31    0.522356
2000-04-30   -0.546348
Freq: M, dtype: float64


2000-01-31    0.522356
2000-02-29   -0.546348
2000-03-31         NaN
2000-04-30         NaN
Freq: M, dtype: float64

In [75]:
ts / ts.shift(1) - 1

2000-01-31          NaN
2000-02-29    -1.250496
2000-03-31    11.616324
2000-04-30    -0.066068
Freq: M, dtype: float64

In [79]:
print(ts)
ts.shift(2, freq='M')

2000-01-31   -0.797246
2000-02-29    0.472879
2000-03-31    0.522356
2000-04-30   -0.546348
Freq: M, dtype: float64


2000-03-31   -0.797246
2000-04-30    0.472879
2000-05-31    0.522356
2000-06-30   -0.546348
Freq: M, dtype: float64

In [82]:
ts.shift(3, freq='D')
# ts.shift(1, freq='90T')

2000-02-03   -0.797246
2000-03-03    0.472879
2000-04-03    0.522356
2000-05-03   -0.546348
dtype: float64

#### Shifting dates with offsets

In [83]:
from pandas.tseries.offsets import Day, MonthEnd
now = datetime(2011, 11, 17)
now + 3 * Day()

Timestamp('2011-11-20 00:00:00')

In [86]:
print(now + MonthEnd()) # now的月底
now + MonthEnd(2) # now的下一个月底

2011-11-30 00:00:00


Timestamp('2011-12-31 00:00:00')

In [88]:
offset = MonthEnd()
print(offset.rollforward(now))  # 到月底
print(offset.rollback(now))     # 到上一个月月底
print(now - offset)

2011-11-30 00:00:00
2011-10-31 00:00:00
2011-10-31 00:00:00


In [89]:
ts = pd.Series(np.random.randn(20),
               index=pd.date_range('1/15/2000', periods=20, freq='4d'))
print(ts)
ts.groupby(offset.rollforward).mean() # 按月统计

2000-01-15   -0.733537
2000-01-19    1.302736
2000-01-23    0.022199
2000-01-27    0.364287
2000-01-31   -0.922839
2000-02-04    0.312656
2000-02-08   -1.128497
2000-02-12   -0.333488
2000-02-16   -0.514551
2000-02-20   -0.559782
2000-02-24   -0.783408
2000-02-28   -1.797685
2000-03-03   -0.172670
2000-03-07    0.680215
2000-03-11    1.607578
2000-03-15    0.200381
2000-03-19   -0.834068
2000-03-23   -0.302988
2000-03-27    1.663261
2000-03-31   -0.996206
Freq: 4D, dtype: float64


2000-01-31    0.006569
2000-02-29   -0.686393
2000-03-31    0.230688
dtype: float64

In [None]:
ts.resample('M').mean()

## Time Zone Handling

In [None]:
import pytz
pytz.common_timezones[-5:]

In [None]:
tz = pytz.timezone('America/New_York')
tz

### Time Zone Localization and Conversion

In [90]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-03-09 09:30:00    1.521760
2012-03-10 09:30:00    0.244175
2012-03-11 09:30:00    0.423331
2012-03-12 09:30:00   -0.654040
2012-03-13 09:30:00    2.089154
2012-03-14 09:30:00   -0.060220
Freq: D, dtype: float64

In [91]:
print(ts.index.tz)

None


In [92]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [93]:
ts
ts_utc = ts.tz_localize('UTC')
ts_utc
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [94]:
ts_utc.tz_convert('America/New_York')

2012-03-09 04:30:00-05:00    1.521760
2012-03-10 04:30:00-05:00    0.244175
2012-03-11 05:30:00-04:00    0.423331
2012-03-12 05:30:00-04:00   -0.654040
2012-03-13 05:30:00-04:00    2.089154
2012-03-14 05:30:00-04:00   -0.060220
Freq: D, dtype: float64

In [None]:
ts_eastern = ts.tz_localize('America/New_York')
ts_eastern.tz_convert('UTC')
ts_eastern.tz_convert('Europe/Berlin')

In [None]:
ts.index.tz_localize('Asia/Shanghai')

### Operations with Time Zone−Aware Timestamp Objects

In [None]:
stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('America/New_York')

In [None]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')
stamp_moscow

In [None]:
stamp_utc.value
stamp_utc.tz_convert('America/New_York').value

In [None]:
from pandas.tseries.offsets import Hour
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
stamp
stamp + Hour()

In [None]:
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
stamp
stamp + 2 * Hour()

### Operations Between Different Time Zones

In [None]:
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts1[2:].tz_convert('Europe/Moscow')
result = ts1 + ts2
result.index

## Periods and Period Arithmetic

In [99]:
# (B)A(S)-DEC 一年的时间范围
# annual frequency, anchored end of December. Same as ‘A’
p = pd.Period(2007, freq='A-DEC')
p

Period('2007', 'A-DEC')

In [101]:
print(p + 5)
print(p - 2)

2012
2005


In [107]:
pd.Period('2014', freq='A-DEC') - p

<7 * YearEnds: month=12>

In [103]:
rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [104]:
pd.Series(np.random.randn(6), index=rng)

2000-01   -0.167933
2000-02    0.631634
2000-03   -1.594313
2000-04   -1.519937
2000-05    1.108752
2000-06    1.255853
Freq: M, dtype: float64

In [105]:
'''
(B)Q(S)-DEC: quarterly frequency, year ends in December. Same as ‘Q’
一个季度的时间范围
'''
values = ['2001Q3', '2002Q2', '2003Q1']
index = pd.PeriodIndex(values, freq='Q-DEC')
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

### Period Frequency Conversion

In [113]:
# (B)A(S)-DEC 一年的时间范围
# annual frequency, anchored end of December. Same as ‘A’
p = pd.Period('2007', freq='A-DEC') 
print(p)
print(p.asfreq('M', how='start'))  # 年度区间转化为开始时的月度区间
print(p.asfreq('M', how='end'))     # 年度区间转化为结束时的月度区间

2007
2007-01
2007-12


In [114]:
p = pd.Period('2007', freq='A-JUN')  # 年度区间, 6月底为年末
p
print(p.asfreq('M', 'start'))
print(p.asfreq('M', 'end'))

2006-07
2007-06


In [115]:
p = pd.Period('Aug-2007', 'M')
p.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [116]:
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
print(ts)
ts.asfreq('M', how='start')

2006   -0.024330
2007   -2.047939
2008   -0.272657
2009   -1.692615
Freq: A-DEC, dtype: float64


2006-01   -0.024330
2007-01   -2.047939
2008-01   -0.272657
2009-01   -1.692615
Freq: M, dtype: float64

In [119]:
ts.asfreq('B', how='end')

2006-12-29   -0.024330
2007-12-31   -2.047939
2008-12-31   -0.272657
2009-12-31   -1.692615
Freq: B, dtype: float64

### Quarterly Period Frequencies

In [120]:
p = pd.Period('2012Q4', freq='Q-JAN') # 季度范围，1月份结束
p

Period('2012Q4', 'Q-JAN')

In [121]:
print(p.asfreq('D', 'start'))
print(p.asfreq('D', 'end'))

2011-11-01
2012-01-31


In [122]:
p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
p4pm
p4pm.to_timestamp()

Timestamp('2012-01-30 16:00:00')

In [123]:
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
ts = pd.Series(np.arange(len(rng)), index=rng)
print(ts)
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
ts.index = new_rng.to_timestamp()
ts

2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int32


2010-10-28 16:00:00    0
2011-01-28 16:00:00    1
2011-04-28 16:00:00    2
2011-07-28 16:00:00    3
2011-10-28 16:00:00    4
2012-01-30 16:00:00    5
dtype: int32

### Converting Timestamps to Periods (and Back)

In [124]:
rng = pd.date_range('2000-01-01', periods=3, freq='M')
ts = pd.Series(np.random.randn(3), index=rng)
print(ts)
pts = ts.to_period()
pts

2000-01-31    1.423830
2000-02-29   -0.407890
2000-03-31    0.756332
Freq: M, dtype: float64


2000-01    1.423830
2000-02   -0.407890
2000-03    0.756332
Freq: M, dtype: float64

In [125]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')
ts2 = pd.Series(np.random.randn(6), index=rng)
print(ts2)
tm = ts2.to_period('M')
tm

2000-01-29   -1.288602
2000-01-30    0.867534
2000-01-31    0.575283
2000-02-01    0.304205
2000-02-02    1.814582
2000-02-03    1.634858
Freq: D, dtype: float64


2000-01   -1.288602
2000-01    0.867534
2000-01    0.575283
2000-02    0.304205
2000-02    1.814582
2000-02    1.634858
Freq: M, dtype: float64

In [None]:
pts = ts2.to_period()
pts
pts.to_timestamp(how='end')

### Creating a PeriodIndex from Arrays

In [130]:
data = pd.read_csv('examples/macrodata.csv')
print(data.info())
data.head(5)
print(data.year)
data.quarter

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203 entries, 0 to 202
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   year      203 non-null    float64
 1   quarter   203 non-null    float64
 2   realgdp   203 non-null    float64
 3   realcons  203 non-null    float64
 4   realinv   203 non-null    float64
 5   realgovt  203 non-null    float64
 6   realdpi   203 non-null    float64
 7   cpi       203 non-null    float64
 8   m1        203 non-null    float64
 9   tbilrate  203 non-null    float64
 10  unemp     203 non-null    float64
 11  pop       203 non-null    float64
 12  infl      203 non-null    float64
 13  realint   203 non-null    float64
dtypes: float64(14)
memory usage: 22.3 KB
None
0      1959.0
1      1959.0
2      1959.0
3      1959.0
4      1960.0
        ...  
198    2008.0
199    2008.0
200    2009.0
201    2009.0
202    2009.0
Name: year, Length: 203, dtype: float64


0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
      ... 
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, Length: 203, dtype: float64

In [None]:
index = pd.PeriodIndex(year=data.year, quarter=data.quarter,
                       freq='Q-DEC')
index
data.index = index
data.infl

## Resampling and Frequency Conversion

In [None]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts.resample('M').mean()
ts.resample('M', kind='period').mean()

### Downsampling

In [None]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts

In [None]:
ts.resample('5min', closed='right').sum()

In [None]:
ts.resample('5min', closed='right').sum()

In [None]:
ts.resample('5min', closed='right', label='right').sum()

In [None]:
ts.resample('5min', closed='right',
            label='right', loffset='-1s').sum()

#### Open-High-Low-Close (OHLC) resampling

In [None]:
ts.resample('5min').ohlc()

### Upsampling and Interpolation

In [None]:
frame = pd.DataFrame(np.random.randn(2, 4),
                     index=pd.date_range('1/1/2000', periods=2,
                                         freq='W-WED'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

In [None]:
df_daily = frame.resample('D').asfreq()
df_daily

In [None]:
frame.resample('D').ffill()

In [None]:
frame.resample('D').ffill(limit=2)

In [None]:
frame.resample('W-THU').ffill()

### Resampling with Periods

In [None]:
frame = pd.DataFrame(np.random.randn(24, 4),
                     index=pd.period_range('1-2000', '12-2001',
                                           freq='M'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]
annual_frame = frame.resample('A-DEC').mean()
annual_frame

In [None]:
# Q-DEC: Quarterly, year ending in December
annual_frame.resample('Q-DEC').ffill()
annual_frame.resample('Q-DEC', convention='end').ffill()

In [None]:
annual_frame.resample('Q-MAR').ffill()

## Moving Window Functions

In [None]:
close_px_all = pd.read_csv('examples/stock_px_2.csv',
                           parse_dates=True, index_col=0)
close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]
close_px = close_px.resample('B').ffill()

In [None]:
close_px.AAPL.plot()
close_px.AAPL.rolling(250).mean().plot()

In [None]:
plt.figure()

In [None]:
appl_std250 = close_px.AAPL.rolling(250, min_periods=10).std()
appl_std250[5:12]
appl_std250.plot()

In [None]:
expanding_mean = appl_std250.expanding().mean()

In [None]:
plt.figure()

In [None]:
close_px.rolling(60).mean().plot(logy=True)

In [None]:
close_px.rolling('20D').mean()

### Exponentially Weighted Functions

In [None]:
plt.figure()

In [None]:
aapl_px = close_px.AAPL['2006':'2007']
ma60 = aapl_px.rolling(30, min_periods=20).mean()
ewma60 = aapl_px.ewm(span=30).mean()
ma60.plot(style='k--', label='Simple MA')
ewma60.plot(style='k-', label='EW MA')
plt.legend()

### Binary Moving Window Functions

In [None]:
plt.figure()

In [None]:
spx_px = close_px_all['SPX']
spx_rets = spx_px.pct_change()
returns = close_px.pct_change()

In [None]:
corr = returns.AAPL.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()

In [None]:
plt.figure()

In [None]:
corr = returns.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()

### User-Defined Moving Window Functions

In [None]:
plt.figure()

In [None]:
from scipy.stats import percentileofscore
score_at_2percent = lambda x: percentileofscore(x, 0.02)
result = returns.AAPL.rolling(250).apply(score_at_2percent)
result.plot()

In [None]:
pd.options.display.max_rows = PREVIOUS_MAX_ROWS

## Conclusion