In [1]:
# pandas에서 찾아볼 수 있는 가장 기본적인 시계열 객체의 종류
# 파이썬 문자열
# datetime 객체로 표현되는 타임스태므포 색인된 Series

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime

In [3]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7), 
         datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]

In [4]:
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02    1.952312
2011-01-05   -0.483189
2011-01-07    0.716732
2011-01-08   -1.192234
2011-01-10    0.119003
2011-01-12   -0.141504
dtype: float64

In [5]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [7]:
ts + ts[::2] # ts[::2] : ts에서 매 2번째 항목 선택

2011-01-02    3.904623
2011-01-05         NaN
2011-01-07    1.433464
2011-01-08         NaN
2011-01-10    0.238005
2011-01-12         NaN
dtype: float64

In [8]:
ts.index?

[1;31mType:[0m        DatetimeIndex
[1;31mString form:[0m
DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)
[1;31mLength:[0m      6
[1;31mFile:[0m        c:\users\user\anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py
[1;31mDocstring:[0m  
Immutable ndarray of datetime64 data, represented internally as int64, and
which can be boxed to Timestamp objects that are subclasses of datetime and
carry metadata such as frequency information.

Parameters
----------
data  : array-like (1-dimensional), optional
    Optional datetime-like data to construct index with
copy  : bool
    Make a copy of input ndarray
freq : string or pandas offset object, optional
    One of pandas date offset strings or corresponding objects
start : starting value, datetime-like, optional
    If data is None, start is used as the start point in generating regular
    timestamp data.
per

In [9]:
ts.index.dtype

dtype('<M8[ns]')

In [11]:
stamp = ts.index[0]
stamp

Timestamp('2011-01-02 00:00:00')

In [13]:
stamp1 = ts.index[2]
ts[stamp1]

0.7167319014716679

In [14]:
ts['1/10/2011']

0.11900265715557576

In [15]:
ts['20110110']

0.11900265715557576

In [16]:
# 긴 시게열에서는 연을 넘기거나  연, 월만 넘겨서 데이터의 일부 구간만 선택 가능하다

In [17]:
longer_ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
longer_ts

2000-01-01   -0.308721
2000-01-02    1.106333
2000-01-03   -0.276597
2000-01-04    0.696936
2000-01-05   -0.275215
2000-01-06   -1.462139
2000-01-07    0.894154
2000-01-08   -0.517158
2000-01-09    0.060558
2000-01-10   -0.598083
2000-01-11    0.218089
2000-01-12    1.136964
2000-01-13    2.614146
2000-01-14    1.912285
2000-01-15    1.584475
2000-01-16   -0.032154
2000-01-17    0.390627
2000-01-18   -0.410297
2000-01-19   -0.024325
2000-01-20   -1.032267
2000-01-21    0.056259
2000-01-22    1.053066
2000-01-23   -1.544513
2000-01-24    1.068685
2000-01-25   -0.405022
2000-01-26   -0.988593
2000-01-27    0.769131
2000-01-28    1.170274
2000-01-29   -1.219019
2000-01-30    0.484329
                ...   
2002-08-28   -0.481907
2002-08-29    0.350017
2002-08-30   -0.591788
2002-08-31    1.820477
2002-09-01   -0.591799
2002-09-02    0.935771
2002-09-03    0.722854
2002-09-04    0.163503
2002-09-05    1.090764
2002-09-06   -0.138404
2002-09-07    0.055183
2002-09-08   -1.100811
2002-09-09 

In [19]:
longer_ts['2001'] 
# 문자열 '2001'은 연도로 해석되어 해당 기간 데이터 선택

2001-01-01   -0.960371
2001-01-02   -0.884875
2001-01-03   -0.468372
2001-01-04    0.783914
2001-01-05   -1.524300
2001-01-06    0.848238
2001-01-07   -1.935327
2001-01-08   -0.985698
2001-01-09   -0.394100
2001-01-10    0.315314
2001-01-11    0.045121
2001-01-12    1.336223
2001-01-13   -0.223626
2001-01-14    1.297252
2001-01-15    0.749902
2001-01-16   -0.860311
2001-01-17   -0.036739
2001-01-18   -0.850804
2001-01-19    0.505892
2001-01-20   -0.129657
2001-01-21   -1.860468
2001-01-22   -1.875861
2001-01-23   -0.135817
2001-01-24   -1.784538
2001-01-25    2.164424
2001-01-26    0.888289
2001-01-27   -1.868406
2001-01-28    0.277372
2001-01-29    1.814307
2001-01-30   -0.209179
                ...   
2001-12-02   -1.341267
2001-12-03   -0.199891
2001-12-04   -1.966055
2001-12-05    0.473501
2001-12-06    1.086710
2001-12-07    0.119668
2001-12-08   -1.648664
2001-12-09    0.249018
2001-12-10    0.188876
2001-12-11    0.718830
2001-12-12    1.308562
2001-12-13   -0.022597
2001-12-14 

In [20]:
longer_ts['2001-05']

2001-05-01    0.641451
2001-05-02    0.409647
2001-05-03    1.024039
2001-05-04    0.478227
2001-05-05    1.668099
2001-05-06    0.456321
2001-05-07   -1.064260
2001-05-08   -0.006362
2001-05-09   -1.048511
2001-05-10    0.429491
2001-05-11   -0.447488
2001-05-12   -1.036133
2001-05-13   -0.521496
2001-05-14    0.263520
2001-05-15   -0.639097
2001-05-16    0.428613
2001-05-17    1.559437
2001-05-18   -0.091586
2001-05-19    2.182646
2001-05-20    0.770385
2001-05-21   -1.410751
2001-05-22    0.359440
2001-05-23   -0.126035
2001-05-24    0.022066
2001-05-25    0.372862
2001-05-26    0.977666
2001-05-27   -0.273580
2001-05-28   -0.501011
2001-05-29    1.512508
2001-05-30   -0.079576
2001-05-31   -0.837784
Freq: D, dtype: float64

In [21]:
ts[datetime(2011, 1, 7):]

2011-01-07    0.716732
2011-01-08   -1.192234
2011-01-10    0.119003
2011-01-12   -0.141504
dtype: float64

In [22]:
ts

2011-01-02    1.952312
2011-01-05   -0.483189
2011-01-07    0.716732
2011-01-08   -1.192234
2011-01-10    0.119003
2011-01-12   -0.141504
dtype: float64

In [24]:
# 대부분의 시계열 데이터는 연대순으로 정렬되기 때문에 범위를 지정하기 위해 
# 시계열을 포함하지 않고 타임스탬프를 이용해서 Series를 나눌 수 있다.

In [23]:
ts['1/6/2011':'1/11/2011']

2011-01-07    0.716732
2011-01-08   -1.192234
2011-01-10    0.119003
dtype: float64