# 시계열 자료 다루기

In [1]:
import numpy as np
import pandas as pd

## DatetimeIndex

In [24]:
date_str = ['2021, 8, 9','2021-8-8','20210807','2021.8.6','080521','8/4/21','8.3.21']
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2021-08-09', '2021-08-08', '2021-08-07', '2021-08-06',
               '2021-08-05', '2021-08-04', '2021-08-03'],
              dtype='datetime64[ns]', freq=None)

In [17]:
np.random.seed(2021)
s = pd.Series(np.random.randn(7), index=idx)
s

2021-08-09    1.488609
2021-08-08    0.676011
2021-08-07   -0.418451
2021-08-06   -0.806521
2021-08-05    0.555876
2021-08-04   -0.705504
2021-08-03    1.130858
dtype: float64

In [25]:
pd.date_range('20210801', '2021-8-31')

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [26]:
# 일별로 뽑으면 freq='D'
pd.date_range('20210801', periods=31)

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [29]:
# Biz-day (평일)
pd.date_range('20210801', '20210831', freq='B')

DatetimeIndex(['2021-08-02', '2021-08-03', '2021-08-04', '2021-08-05',
               '2021-08-06', '2021-08-09', '2021-08-10', '2021-08-11',
               '2021-08-12', '2021-08-13', '2021-08-16', '2021-08-17',
               '2021-08-18', '2021-08-19', '2021-08-20', '2021-08-23',
               '2021-08-24', '2021-08-25', '2021-08-26', '2021-08-27',
               '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='B')

In [30]:
# 일요일 (W)
pd.date_range('20210801', '20210831', freq='W')

DatetimeIndex(['2021-08-01', '2021-08-08', '2021-08-15', '2021-08-22',
               '2021-08-29'],
              dtype='datetime64[ns]', freq='W-SUN')

## resample 연산

In [33]:
ts = pd.Series(np.random.randn(100),
               index=pd.date_range("2021-1-1", periods=100))
ts.tail(10)

2021-04-01   -1.226517
2021-04-02   -0.722695
2021-04-03   -0.619026
2021-04-04   -0.988081
2021-04-05   -0.532415
2021-04-06    1.182246
2021-04-07    0.770814
2021-04-08   -0.416720
2021-04-09   -0.266896
2021-04-10   -1.956648
Freq: D, dtype: float64

In [36]:
ts.resample('W').sum()

2021-01-03   -1.533120
2021-01-10    0.366061
2021-01-17    0.020248
2021-01-24    0.902120
2021-01-31    0.285193
2021-02-07    3.766831
2021-02-14   -2.469666
2021-02-21   -0.367119
2021-02-28    2.792611
2021-03-07    3.588517
2021-03-14    1.982473
2021-03-21   -4.558733
2021-03-28    3.798809
2021-04-04   -4.692207
2021-04-11   -1.219619
Freq: W-SUN, dtype: float64

In [37]:
ts.resample('M').first()

2021-01-31   -2.405074
2021-02-28    0.848101
2021-03-31    0.016642
2021-04-30   -1.226517
Freq: M, dtype: float64