In [1]:
import numpy as np
import pandas as pd

date_strings = np.array(['03-04-2005 11:35 PM','23-05-2010 12:01 AM','04-09-2009 09:09 PM'])

#to_datetime()으로 format 매개변수에 날짜와 시간 포맷 지정
[pd.to_datetime(date, format='%d-%m-%Y %I:%M %p') for date in date_strings]
[pd.to_datetime(date, format="%d-%m-%Y %I:%M %p", errors="ignore") for date in date_strings]
pd.to_datetime(date_strings)

DatetimeIndex(['2005-03-04 23:35:00', '2010-05-23 00:01:00',
               '2009-04-09 21:09:00'],
              dtype='datetime64[ns]', freq=None)

In [2]:
import pandas as pd

pd.Timestamp('2017-05-01 06:00:00', tz='Europe/London')
#datetime 생성
date = pd.Timestamp('2017-05-01 06:00:00')
#시간대 지정
date_in_london = date.tz_localize('Europe/London')
date_in_london 

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [3]:
#시간대 변환
date_in_london.tz_convert('Africa/Abidjan')
dates = pd.Series(pd.date_range('2/2/2002', periods=3, freq='M'))
#datetime 속성 dt를 사용하여 datetime 구성 요소에 엑세스 가능
dates.dt.tz_localize('Africa/Abidjan') 

0   2002-02-28 00:00:00+00:00
1   2002-03-31 00:00:00+00:00
2   2002-04-30 00:00:00+00:00
dtype: datetime64[ns, Africa/Abidjan]

In [4]:
from pytz import all_timezones
all_timezones[0:2] 
dates.dt.tz_localize('dateutil/Aisa/Seoul')

0   2002-02-28
1   2002-03-31
2   2002-04-30
dtype: datetime64[ns]

In [5]:
import pytz
#pytz로 객체 직접 전달
tz = pytz.timezone('Asia/Seoul')
dates.dt.tz_localize(tz)

0   2002-02-28 00:00:00+09:00
1   2002-03-31 00:00:00+09:00
2   2002-04-30 00:00:00+09:00
dtype: datetime64[ns, Asia/Seoul]

In [8]:
import pandas as pd

dataframe = pd.DataFrame()
dataframe['date'] = pd.date_range('1/1/2001', periods=100000, freq='H')

dataframe[ (dataframe['date'] > '2002-1-1 01:00:00') &(dataframe['date'] <= '2002-1-1 04:00:00') ]
dataframe = dataframe.set_index(dataframe['date'])
dataframe.loc[ '2002-1-1 01:00:00':'2002-1-1 04:00:00']

Unnamed: 0_level_0,date
date,Unnamed: 1_level_1
2002-01-01 01:00:00,2002-01-01 01:00:00
2002-01-01 02:00:00,2002-01-01 02:00:00
2002-01-01 03:00:00,2002-01-01 03:00:00
2002-01-01 04:00:00,2002-01-01 04:00:00


In [7]:
import pandas as pd
dataframe = pd.DataFrame()
dataframe['date'] = pd.date_range('1/1/2001', periods=150, freq='W')

# 년, 월, 일, 시, 분에 대한 특성 생성
dataframe['year'] = dataframe['date'].dt.year
dataframe['month'] = dataframe['date'].dt.month
dataframe['day'] = dataframe['date'].dt.day
dataframe['hour'] = dataframe['date'].dt.hour
dataframe['minute'] = dataframe['date'].dt.minute
dataframe.head(3)

Unnamed: 0,date,year,month,day,hour,minute
0,2001-01-07,2001,1,7,0,0
1,2001-01-14,2001,1,14,0,0
2,2001-01-21,2001,1,21,0,0


In [9]:
import pandas as pd
dataframe = pd.DataFrame()

dataframe['Arrived'] = [pd.Timestamp('01-01-2017'), pd.Timestamp('01-04-2017')]
dataframe['Left'] = [pd.Timestamp('01-01-2017'), pd.Timestamp('01-06-2017')]
#특성 사이의 차이 계산
dataframe['Left'] - dataframe['Arrived'] 
# 특성 간의 기간을 계산(days출력 삭제)
pd.Series(delta.days for delta in (dataframe['Left'] - dataframe['Arrived'])) 

0    0
1    2
dtype: int64

In [11]:
import pandas as pd
dates = pd.Series(pd.date_range("2/2/2002", periods=3, freq="M"))
#요일 확인
dates.dt.day_name
#요일을 정수로 확인
dates.dt.day

0    28
1    31
2    30
dtype: int64

In [12]:
import pandas as pd
dataframe = pd.DataFrame()

dataframe["dates"] = pd.date_range("1/1/2001", periods=5, freq="D")
dataframe["stock_price"] = [1.1,2.2,3.3,4.4,5.5]
# 한 행 뒤의 값을 가져옴
dataframe["previous_days_stock_price"] = dataframe["stock_price"].shift(1)
dataframe

Unnamed: 0,dates,stock_price,previous_days_stock_price
0,2001-01-01,1.1,
1,2001-01-02,2.2,1.1
2,2001-01-03,3.3,2.2
3,2001-01-04,4.4,3.3
4,2001-01-05,5.5,4.4


In [13]:
import pandas as pd

time_index = pd.date_range("01/01/2010", periods=5, freq="M")
#인덱스 설정
dataframe = pd.DataFrame(index=time_index)
dataframe["Stock_Price"] = [1,2,3,4,5] 
#이동 평균 계산
dataframe.rolling(window=2).mean()
dataframe.ewm(alpha=0.5).mean()

Unnamed: 0,Stock_Price
2010-01-31,1.0
2010-02-28,1.666667
2010-03-31,2.428571
2010-04-30,3.266667
2010-05-31,4.16129


In [14]:
import pandas as pd
import numpy as np

time_index = pd.date_range("01/01/2010", periods=5, freq="M")
dataframe = pd.DataFrame(index=time_index)
# 누락된 값이 있는 특성을 만듦
dataframe["Sales"] = [1.0,2.0,np.nan,np.nan,5.0] 
# 누락된 값을 보간
dataframe.interpolate()

# 앞쪽으로 채우기(Forward-fill)
dataframe.ffill()

# 뒤쪽으로 채우기(Back-fill)
dataframe.bfill()

# 비선형의 경우 보간 방법 변경
dataframe.interpolate(method="quadratic") 
 #보간 방향 지정
dataframe.interpolate(limit=1, limit_direction="forward") 

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.0
2010-04-30,
2010-05-31,5.0
