## 7.1 문자열을 날짜로 변환하기

In [1]:
import numpy as np
import pandas as pd

In [2]:
date_strings = np.array(['03-04-2005 11:35 PM', '23-05-2010 12:01 AM', '04-09-2009 09:09 PM'])

In [6]:
[pd.to_datetime(date, format='%d-%m-%Y %I:%M %p') for date in date_strings]

[Timestamp('2005-04-03 23:35:00'),
 Timestamp('2010-05-23 00:01:00'),
 Timestamp('2009-09-04 21:09:00')]

In [7]:
[pd.to_datetime(date, format="%d-%m-%Y %I:%M %p", errors="ignore") for date in date_strings]

[Timestamp('2005-04-03 23:35:00'),
 Timestamp('2010-05-23 00:01:00'),
 Timestamp('2009-09-04 21:09:00')]

In [8]:
pd.to_datetime(date_strings)

DatetimeIndex(['2005-03-04 23:35:00', '2010-05-23 00:01:00',
               '2009-04-09 21:09:00'],
              dtype='datetime64[ns]', freq=None)

## 7.2 시간대 다루기

In [9]:
pd.Timestamp('2017-05-01 06:00:00', tz='Europe/London')

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [12]:
date=pd.Timestamp('2017-05-01 06:00:00')

In [14]:
date_in_london = date.tz_localize('Europe/London')

In [15]:
date_in_london

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [16]:
date_in_london.tz_convert('Africa/Abidjan')

Timestamp('2017-05-01 05:00:00+0000', tz='Africa/Abidjan')

In [18]:
dates=pd.Series(pd.date_range('2/2/2002', periods=3, freq='M'))

In [19]:
dates=pd.Series(pd.date_range('2/2/2002', periods=3, freq='M'))

In [20]:
dates.dt.tz_localize('Africa/Abidjan')

0   2002-02-28 00:00:00+00:00
1   2002-03-31 00:00:00+00:00
2   2002-04-30 00:00:00+00:00
dtype: datetime64[ns, Africa/Abidjan]

In [21]:
from pytz import all_timezones

In [24]:
all_timezones[0:2]

['Africa/Abidjan', 'Africa/Accra']

In [25]:
dates.dt.tz_localize('dateutil/Aisa/Seoul')

0   2002-02-28
1   2002-03-31
2   2002-04-30
dtype: datetime64[ns]

In [26]:
import pytz

In [27]:
tz= pytz.timezone('Asia/Seoul')

In [28]:
dates.dt.tz_localize(tz)

0   2002-02-28 00:00:00+09:00
1   2002-03-31 00:00:00+09:00
2   2002-04-30 00:00:00+09:00
dtype: datetime64[ns, Asia/Seoul]

## 7.3 날짜와 시간 선택하기

In [30]:
dataframe=pd.DataFrame()

In [31]:
dataframe['date']=pd.date_range('1/1/2001', periods=100000, freq='H')

In [34]:
dataframe[(dataframe['date'] > '2002-1-1 01:00:00') & (dataframe['date'] <= '2002-1-1 04:00:00')]

Unnamed: 0,date
8762,2002-01-01 02:00:00
8763,2002-01-01 03:00:00
8764,2002-01-01 04:00:00


In [35]:
dataframe=dataframe.set_index(dataframe['date'])

In [36]:
dataframe.loc['2002-1-1 01:00:00':'2002-1-1 04:00:00']

Unnamed: 0_level_0,date
date,Unnamed: 1_level_1
2002-01-01 01:00:00,2002-01-01 01:00:00
2002-01-01 02:00:00,2002-01-01 02:00:00
2002-01-01 03:00:00,2002-01-01 03:00:00
2002-01-01 04:00:00,2002-01-01 04:00:00


## 7.4 날짜 데이터를 여러 특성으로 나누기

In [37]:
dataframe = pd.DataFrame()

In [38]:
dataframe['date'] = pd.date_range('1/1/2001', periods=150, freq = 'W')

In [39]:
dataframe['year'] = dataframe['date'].dt.year

In [40]:
dataframe['month'] = dataframe['date'].dt.month

In [41]:
dataframe['day']=dataframe['date'].dt.day

In [42]:
dataframe['hour'] = dataframe['date'].dt.hour

In [43]:
dataframe['minute'] = dataframe['date'].dt.minute

In [44]:
dataframe.head(3)

Unnamed: 0,date,year,month,day,hour,minute
0,2001-01-07,2001,1,7,0,0
1,2001-01-14,2001,1,14,0,0
2,2001-01-21,2001,1,21,0,0


## 7.5 날짜 간의 차이를 계산하기

In [45]:
dataframe=pd.DataFrame()

In [48]:
dataframe['Arrived'] = [pd.Timestamp('01-01-2017'), pd.Timestamp('01-04-2017')]

In [50]:
dataframe['Left'] = [pd.Timestamp('01-01-2017'), pd.Timestamp('01-06-2017')]

In [51]:
dataframe['Left'] - dataframe['Arrived']

0   0 days
1   2 days
dtype: timedelta64[ns]

In [53]:
pd.Series(delta.days for delta in (dataframe['Left'] - dataframe['Arrived']))

0    0
1    2
dtype: int64

## 7.6 요일을 인코딩ㅇ하기

In [54]:
dates=pd.Series(pd.date_range("2/2/2002", periods=3, freq="M"))

In [55]:
dates.dt.day_name()

0    Thursday
1      Sunday
2     Tuesday
dtype: object

In [56]:
dates.dt.weekday

0    3
1    6
2    1
dtype: int64

## 7.7 시차 특성 만들기

In [57]:
dataframe = pd.DataFrame()

In [58]:
dataframe["dates"] = pd.date_range("1/1/2001", periods=5, freq="D")

In [59]:
dataframe["stock_price"] = [1.1,2.2,3.3,4.4,5.5]

In [60]:
dataframe["previodus_days_stock_price"]=dataframe["stock_price"].shift(1)

In [61]:
dataframe

Unnamed: 0,dates,stock_price,previodus_days_stock_price
0,2001-01-01,1.1,
1,2001-01-02,2.2,1.1
2,2001-01-03,3.3,2.2
3,2001-01-04,4.4,3.3
4,2001-01-05,5.5,4.4


## 7.8 이동 시간 윈도 사용ㅇ하기

In [62]:
time_index = pd.date_range("01/01/2010", periods=5, freq="M")

In [63]:
dataframe = pd.DataFrame(index=time_index)

In [64]:
dataframe["Stock_Price"] = [1,2,3,4,5]

In [65]:
dataframe.rolling(window=2).mean()

Unnamed: 0,Stock_Price
2010-01-31,
2010-02-28,1.5
2010-03-31,2.5
2010-04-30,3.5
2010-05-31,4.5


## 7.9 시계열 데이터에서 누락된 값 다루기

In [66]:
import numpy as np

In [68]:
time_index = pd.date_range("01/01/2010", periods=5, freq="M")

In [69]:
dataframe = pd.DataFrame(index=time_index)

In [70]:
dataframe["Sales"] = [1.0,2.0,np.nan,np.nan,5.0]

In [71]:
dataframe.interpolate()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.0
2010-04-30,4.0
2010-05-31,5.0


In [72]:
dataframe.ffill()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,2.0
2010-04-30,2.0
2010-05-31,5.0


In [73]:
dataframe.bfill()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,5.0
2010-04-30,5.0
2010-05-31,5.0


In [74]:
dataframe.interpolate(method="quadratic")

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.059808
2010-04-30,4.038069
2010-05-31,5.0


In [76]:
dataframe.interpolate(lmit=1, limit_direction="forward")

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.0
2010-04-30,4.0
2010-05-31,5.0
