# DatetimeIndex
- pd.to_datetime() : 문자열을 날짜/시간의 자료형인 datetime 자료형으로 바꿔주는 함수
- pd.date_range() : 시작일과 종료일 또는 시작일과 기간을 입력하면 범위내의 인덱스를 자동으로 생성
- 시계열 자료 : 인덱스에 날짜나 시간이 오는 데이터
- 판다스에서 시계열 자료를 생성하려면 인덱스를 DatetimeIndex 자료형으로 만들어야 한다.

In [3]:
import numpy as np
import pandas as pd

In [5]:
date_str = ['2018, 1, 1', '2018, 1, 4', '2018, 1, 5', '2018, 1, 6']
print(type(date_str))

<class 'list'>


In [6]:
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2018-01-01', '2018-01-04', '2018-01-05', '2018-01-06'], dtype='datetime64[ns]', freq=None)

In [7]:
# Series
s = pd.Series(np.random.randn(4), index=idx)
s

2018-01-01    0.325590
2018-01-04    1.690432
2018-01-05    0.675130
2018-01-06   -1.369393
dtype: float64

In [8]:
pd.date_range('2022-4-2', '2022-4-30')

DatetimeIndex(['2022-04-02', '2022-04-03', '2022-04-04', '2022-04-05',
               '2022-04-06', '2022-04-07', '2022-04-08', '2022-04-09',
               '2022-04-10', '2022-04-11', '2022-04-12', '2022-04-13',
               '2022-04-14', '2022-04-15', '2022-04-16', '2022-04-17',
               '2022-04-18', '2022-04-19', '2022-04-20', '2022-04-21',
               '2022-04-22', '2022-04-23', '2022-04-24', '2022-04-25',
               '2022-04-26', '2022-04-27', '2022-04-28', '2022-04-29',
               '2022-04-30'],
              dtype='datetime64[ns]', freq='D')

In [9]:
pd.date_range(start='2022-10-1', periods = 30)

DatetimeIndex(['2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04',
               '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-08',
               '2022-10-09', '2022-10-10', '2022-10-11', '2022-10-12',
               '2022-10-13', '2022-10-14', '2022-10-15', '2022-10-16',
               '2022-10-17', '2022-10-18', '2022-10-19', '2022-10-20',
               '2022-10-21', '2022-10-22', '2022-10-23', '2022-10-24',
               '2022-10-25', '2022-10-26', '2022-10-27', '2022-10-28',
               '2022-10-29', '2022-10-30'],
              dtype='datetime64[ns]', freq='D')

#### freq 매개변수로 특정한 날짜만 생성되도록 설정할 수 있다.
- s : second, 초
- T : minute, 분
- H : hour, 시간
- D : day, 일
- B : 주말이 아닌 평일
- W : 주(일요일)
- W-MON : 주(월요일)
- M : 각 달의 마지막 날
- MS : 각 달의 첫날
- https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects

In [10]:
pd.date_range('2022-10-1', '2022-10-31', freq='D')

DatetimeIndex(['2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04',
               '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-08',
               '2022-10-09', '2022-10-10', '2022-10-11', '2022-10-12',
               '2022-10-13', '2022-10-14', '2022-10-15', '2022-10-16',
               '2022-10-17', '2022-10-18', '2022-10-19', '2022-10-20',
               '2022-10-21', '2022-10-22', '2022-10-23', '2022-10-24',
               '2022-10-25', '2022-10-26', '2022-10-27', '2022-10-28',
               '2022-10-29', '2022-10-30', '2022-10-31'],
              dtype='datetime64[ns]', freq='D')

In [11]:
pd.date_range('2022-10-1', '2022-12-31', freq='W')

DatetimeIndex(['2022-10-02', '2022-10-09', '2022-10-16', '2022-10-23',
               '2022-10-30', '2022-11-06', '2022-11-13', '2022-11-20',
               '2022-11-27', '2022-12-04', '2022-12-11', '2022-12-18',
               '2022-12-25'],
              dtype='datetime64[ns]', freq='W-SUN')

In [12]:
pd.date_range('2022-10-1', '2022-12-31', freq='W-MON')

DatetimeIndex(['2022-10-03', '2022-10-10', '2022-10-17', '2022-10-24',
               '2022-10-31', '2022-11-07', '2022-11-14', '2022-11-21',
               '2022-11-28', '2022-12-05', '2022-12-12', '2022-12-19',
               '2022-12-26'],
              dtype='datetime64[ns]', freq='W-MON')

### shift

In [13]:
ts = pd.Series(np.random.randn(4), index = pd.date_range('2018-1-1', periods=4, freq='M'))
ts

2018-01-31   -1.759849
2018-02-28    0.819771
2018-03-31    0.598281
2018-04-30   -0.369091
Freq: M, dtype: float64

In [14]:
ts.shift(1)

2018-01-31         NaN
2018-02-28   -1.759849
2018-03-31    0.819771
2018-04-30    0.598281
Freq: M, dtype: float64

In [15]:
ts.shift(-1)

2018-01-31    0.819771
2018-02-28    0.598281
2018-03-31   -0.369091
2018-04-30         NaN
Freq: M, dtype: float64

In [16]:
ts.shift(1, freq='M')

2018-02-28   -1.759849
2018-03-31    0.819771
2018-04-30    0.598281
2018-05-31   -0.369091
Freq: M, dtype: float64

In [17]:
ts.shift(1, freq='W')

2018-02-04   -1.759849
2018-03-04    0.819771
2018-04-01    0.598281
2018-05-06   -0.369091
dtype: float64

### resample : 날짜나 시간 간격을 재조정하는 기능
- up-sample(업 샘플링) : 데이터 양이 증가
- down-sample(다운 샘플링) : 데이터 양이 감소

In [18]:
ts = pd.Series(np.random.randn(100), index = pd.date_range('2018-1-1', 
                                                           periods=100, freq='D'))
ts.head(20)

2018-01-01    0.953981
2018-01-02    0.581156
2018-01-03    0.646386
2018-01-04   -2.117875
2018-01-05    0.815256
2018-01-06    0.473112
2018-01-07    0.827769
2018-01-08    1.513132
2018-01-09    2.488432
2018-01-10   -1.058599
2018-01-11   -0.973133
2018-01-12   -0.857677
2018-01-13    2.260909
2018-01-14    0.348334
2018-01-15    0.736485
2018-01-16   -2.232198
2018-01-17    0.593114
2018-01-18    0.355401
2018-01-19    0.189360
2018-01-20   -1.423074
Freq: D, dtype: float64

In [19]:
# 다운 샘플링의 경우에는 원래의 데이터가 그룹화
ts.resample('W').mean()

2018-01-07    0.311398
2018-01-14    0.531628
2018-01-21   -0.178224
2018-01-28   -0.435360
2018-02-04    0.395449
2018-02-11    0.119950
2018-02-18    0.373968
2018-02-25   -0.307031
2018-03-04    0.351737
2018-03-11   -0.001912
2018-03-18   -0.223305
2018-03-25    0.242510
2018-04-01   -0.241597
2018-04-08   -0.244453
2018-04-15    0.251328
Freq: W-SUN, dtype: float64

In [20]:
ts.resample('M').first()

2018-01-31    0.953981
2018-02-28    0.675145
2018-03-31    0.155678
2018-04-30   -0.166414
Freq: M, dtype: float64