# 시계열 데이터

In [2]:
from datetime import datetime
import  pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({'float': [1.0],
                   'int': [1],
                   'datetime': [pd.Timestamp('20201202')],
                   'string': ['foo']})
df

Unnamed: 0,float,int,datetime,string
0,1.0,1,2020-12-02,foo


In [4]:
df.dtypes

float              float64
int                  int64
datetime    datetime64[ns]
string              object
dtype: object

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   float     1 non-null      float64       
 1   int       1 non-null      int64         
 2   datetime  1 non-null      datetime64[ns]
 3   string    1 non-null      object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 160.0+ bytes


In [6]:
dic_dates = {'str_date':['20190101','20200311','20210621'],
             'int_date':[2019010, 20200311, 20210621],
             'format_date':['2019/01/01', '2020-03-11 11:22:33', '2021/06/21 08:48:52.123456789']}
dic_dates

{'str_date': ['20190101', '20200311', '20210621'],
 'int_date': [2019010, 20200311, 20210621],
 'format_date': ['2019/01/01',
  '2020-03-11 11:22:33',
  '2021/06/21 08:48:52.123456789']}

In [7]:
df = pd.DataFrame(dic_dates)
df

Unnamed: 0,str_date,int_date,format_date
0,20190101,2019010,2019/01/01
1,20200311,20200311,2020-03-11 11:22:33
2,20210621,20210621,2021/06/21 08:48:52.123456789


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   str_date     3 non-null      object
 1   int_date     3 non-null      int64 
 2   format_date  3 non-null      object
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


## to_datetime()

In [9]:
df['str_date'] = pd.to_datetime(df['str_date'])
df['str_date']

0   2019-01-01
1   2020-03-11
2   2021-06-21
Name: str_date, dtype: datetime64[ns]

## astype()

In [10]:
df['str_date'] = df['str_date'].astype('datetime64[ns]')
df['str_date']

0   2019-01-01
1   2020-03-11
2   2021-06-21
Name: str_date, dtype: datetime64[ns]

In [11]:
df['int_date'] = df['int_date'].astype('datetime64[ns]')
df['int_date']

0   1970-01-01 00:00:00.002019010
1   1970-01-01 00:00:00.020200311
2   1970-01-01 00:00:00.020210621
Name: int_date, dtype: datetime64[ns]

In [12]:
df['format_date'] = df['format_date'].astype('datetime64[ns]')
df['format_date']

0   2019-01-01 00:00:00.000000000
1   2020-03-11 11:22:33.000000000
2   2021-06-21 08:48:52.123456789
Name: format_date, dtype: datetime64[ns]

In [13]:
df

Unnamed: 0,str_date,int_date,format_date
0,2019-01-01,1970-01-01 00:00:00.002019010,2019-01-01 00:00:00.000000000
1,2020-03-11,1970-01-01 00:00:00.020200311,2020-03-11 11:22:33.000000000
2,2021-06-21,1970-01-01 00:00:00.020210621,2021-06-21 08:48:52.123456789


In [14]:
del df['int_date'], df['str_date']
df

Unnamed: 0,format_date
0,2019-01-01 00:00:00.000000000
1,2020-03-11 11:22:33.000000000
2,2021-06-21 08:48:52.123456789


## 년/월/일, 시/분/초 분리

In [15]:
df['year'] = df['format_date'].dt.year
df['month'] = df['format_date'].dt.month
df['day'] = df['format_date'].dt.day
df

Unnamed: 0,format_date,year,month,day
0,2019-01-01 00:00:00.000000000,2019,1,1
1,2020-03-11 11:22:33.000000000,2020,3,11
2,2021-06-21 08:48:52.123456789,2021,6,21


In [16]:
df['hour'] = df['format_date'].dt.hour
df['minute'] = df['format_date'].dt.minute
df['sec'] = df['format_date'].dt.second
df

Unnamed: 0,format_date,year,month,day,hour,minute,sec
0,2019-01-01 00:00:00.000000000,2019,1,1,0,0,0
1,2020-03-11 11:22:33.000000000,2020,3,11,11,22,33
2,2021-06-21 08:48:52.123456789,2021,6,21,8,48,52


## to_period
- 특정 빈도의 period index로 변환

In [17]:
df['to_year'] = df['format_date'].dt.to_period(freq = 'A')
df['to_month'] = df['format_date'].dt.to_period(freq = 'M')
df['to_day'] = df['format_date'].dt.to_period(freq = 'D')
df['to_quarter'] = df['format_date'].dt.to_period(freq = 'Q')
df['to_week'] = df['format_date'].dt.to_period(freq = 'W')
df

Unnamed: 0,format_date,year,month,day,hour,minute,sec,to_year,to_month,to_day,to_quarter,to_week
0,2019-01-01 00:00:00.000000000,2019,1,1,0,0,0,2019,2019-01,2019-01-01,2019Q1,2018-12-31/2019-01-06
1,2020-03-11 11:22:33.000000000,2020,3,11,11,22,33,2020,2020-03,2020-03-11,2020Q1,2020-03-09/2020-03-15
2,2021-06-21 08:48:52.123456789,2021,6,21,8,48,52,2021,2021-06,2021-06-21,2021Q2,2021-06-21/2021-06-27


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   format_date  3 non-null      datetime64[ns]
 1   year         3 non-null      int64         
 2   month        3 non-null      int64         
 3   day          3 non-null      int64         
 4   hour         3 non-null      int64         
 5   minute       3 non-null      int64         
 6   sec          3 non-null      int64         
 7   to_year      3 non-null      period[A-DEC] 
 8   to_month     3 non-null      period[M]     
 9   to_day       3 non-null      period[D]     
 10  to_quarter   3 non-null      period[Q-DEC] 
 11  to_week      3 non-null      period[W-SUN] 
dtypes: datetime64[ns](1), int64(6), period[A-DEC](1), period[D](1), period[M](1), period[Q-DEC](1), period[W-SUN](1)
memory usage: 416.0 bytes


In [19]:
del df['year'], df['month'], df['day'], df['hour'], df['minute'], df['sec'], df['to_year'], df['to_month'], df['to_day'], df['to_quarter'], df['to_week']
df

Unnamed: 0,format_date
0,2019-01-01 00:00:00.000000000
1,2020-03-11 11:22:33.000000000
2,2021-06-21 08:48:52.123456789


## month_name()

In [20]:
df['month_name'] = df['format_date'].dt.month_name()
df

Unnamed: 0,format_date,month_name
0,2019-01-01 00:00:00.000000000,January
1,2020-03-11 11:22:33.000000000,March
2,2021-06-21 08:48:52.123456789,June


## day_name()

In [21]:
df['weekday_name'] = df['format_date'].dt.day_name()
df

Unnamed: 0,format_date,month_name,weekday_name
0,2019-01-01 00:00:00.000000000,January,Tuesday
1,2020-03-11 11:22:33.000000000,March,Wednesday
2,2021-06-21 08:48:52.123456789,June,Monday


## set_index()
- 해당 열을 인덱스로 만들기

In [22]:
df.set_index('format_date',inplace=True)
df

Unnamed: 0_level_0,month_name,weekday_name
format_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-01 00:00:00.000000000,January,Tuesday
2020-03-11 11:22:33.000000000,March,Wednesday
2021-06-21 08:48:52.123456789,June,Monday


 ## date_range(시작일, periods(생성일수), freq(기본값='D'))

In [23]:
ex_df = pd.date_range('1/1/2019',periods=10)
ex_df

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08',
               '2019-01-09', '2019-01-10'],
              dtype='datetime64[ns]', freq='D')

In [24]:
ex_df = pd.date_range('1/1/2019',periods=10, freq='W')
ex_df

DatetimeIndex(['2019-01-06', '2019-01-13', '2019-01-20', '2019-01-27',
               '2019-02-03', '2019-02-10', '2019-02-17', '2019-02-24',
               '2019-03-03', '2019-03-10'],
              dtype='datetime64[ns]', freq='W-SUN')

In [25]:
ex_df = pd.date_range('1/1/2019','12/31/2020', freq='W')
ex_df

DatetimeIndex(['2019-01-06', '2019-01-13', '2019-01-20', '2019-01-27',
               '2019-02-03', '2019-02-10', '2019-02-17', '2019-02-24',
               '2019-03-03', '2019-03-10',
               ...
               '2020-10-25', '2020-11-01', '2020-11-08', '2020-11-15',
               '2020-11-22', '2020-11-29', '2020-12-06', '2020-12-13',
               '2020-12-20', '2020-12-27'],
              dtype='datetime64[ns]', length=104, freq='W-SUN')

In [26]:
ex_df = pd.date_range('2019/1/1','2019/12/31', freq='M')
ex_df

DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='M')

## 슬라이싱

In [27]:
longer_df = pd.Series(np.random.permutation(1000), index=pd.date_range('1/1/2019',periods=1000))
longer_df

2019-01-01    912
2019-01-02    804
2019-01-03    183
2019-01-04    901
2019-01-05    732
             ... 
2021-09-22    736
2021-09-23    129
2021-09-24    454
2021-09-25    128
2021-09-26    105
Freq: D, Length: 1000, dtype: int32

In [28]:
longer_df.loc['2019']

2019-01-01    912
2019-01-02    804
2019-01-03    183
2019-01-04    901
2019-01-05    732
             ... 
2019-12-27    216
2019-12-28    787
2019-12-29    963
2019-12-30    284
2019-12-31     51
Freq: D, Length: 365, dtype: int32

In [29]:
longer_df.loc['2019-02']

2019-02-01    990
2019-02-02    478
2019-02-03    768
2019-02-04    976
2019-02-05    603
2019-02-06    808
2019-02-07    286
2019-02-08    696
2019-02-09    149
2019-02-10    954
2019-02-11    428
2019-02-12    523
2019-02-13    688
2019-02-14    583
2019-02-15    729
2019-02-16    112
2019-02-17    260
2019-02-18    405
2019-02-19    412
2019-02-20    343
2019-02-21    358
2019-02-22    374
2019-02-23    161
2019-02-24    480
2019-02-25    409
2019-02-26    558
2019-02-27    989
2019-02-28    373
Freq: D, dtype: int32

In [30]:
longer_df.loc['2019-06-01' : '2019-09-30']  

2019-06-01    979
2019-06-02    550
2019-06-03    845
2019-06-04    417
2019-06-05     53
             ... 
2019-09-26    794
2019-09-27     72
2019-09-28    466
2019-09-29    802
2019-09-30    431
Freq: D, Length: 122, dtype: int32