# 날짜/시간 자료형

In [None]:
from datetime import datetime

now = datetime.now()
now

In [None]:
now.year, now.month, now.day

In [None]:
delta = datetime.now() - datetime(2021, 3, 10, 10, 20)
delta

In [None]:
delta.days

In [None]:
delta.seconds

In [None]:
from datetime import datetime, timedelta

date = datetime(2021, 5, 11)
date

In [None]:
date + timedelta(days=10)

In [None]:
date - 2 * timedelta(days=10)

In [None]:
date - timedelta(days=10, hours=23)

In [None]:
date + timedelta(days=10, hours=12, minutes= 30)

## 문자열을 datetime으로 변환

In [None]:
date = datetime(2021, 5, 11, 10, 30, 50)

In [None]:
str(date)

In [None]:
date.strftime('%Y-%m-%d')

In [None]:
date_string = '2021-04-12'

In [None]:
date = datetime.strptime(date_string, '%Y-%m-%d')
date

In [None]:
date_strings = ['7/10/2021 10:20', '6/4/2020 16:30']

In [None]:
for date_string in date_strings:
    date = datetime.strptime(date_string, '%d/%m/%Y %H:%M')
    print(date)

## pandas 내 문자열을 datetime으로 변환

In [None]:
import pandas as pd

conversion = pd.read_csv('./conversion.csv', delimiter='\t')
conversion.head()

In [None]:
conversion.info()

In [None]:
conversion['date'] = conversion['date'].astype(str)

In [None]:
conversion.info()

In [None]:
conversion['date_dt'] = pd.to_datetime(conversion['date'], format='%Y-%m-%d')

In [None]:
conversion.head()

In [None]:
conversion.info()

In [None]:
test_df = pd.DataFrame(
    ['02/06/2021', '04/12/2021', '03/12/2021'],
    columns = ['date']
)

In [None]:
test_df

In [None]:
test_df['date_dt'] = pd.to_datetime(test_df['date'], format='%d/%m/%Y')

In [None]:
test_df

In [None]:
test_df.info()

## read_csv에서 datetime 변환하기

In [None]:
import pandas as pd

parse_date = lambda x : datetime.strptime(x, '%Y%m%d')
conversion = pd.read_csv('./conversion.csv', delimiter='\t',
                         parse_dates = ['date'], date_parser = parse_date)
conversion.head()

In [None]:
conversion.info()

# datetime 조작하기

## dt 사용하기

In [None]:
import pandas as pd

conversion = pd.read_csv('./conversion.csv', delimiter='\t', parse_dates = ['date'])
conversion.head()

In [None]:
conversion['year'] = conversion['date'].dt.year
conversion['month'] = conversion['date'].dt.month
conversion['day'] = conversion['date'].dt.day

In [None]:
conversion.head()

In [None]:
conversion['date_yr'] = conversion['date'].dt.to_period(freq='A')
conversion['date_m'] = conversion['date'].dt.to_period(freq='M')

In [None]:
conversion.head()

In [None]:
conversion.info()

## 날짜 인덱스 사용하기

In [None]:
import pandas as pd

conversion = pd.read_csv('./conversion.csv', delimiter='\t', parse_dates = ['date'])
conversion.set_index('date', inplace=True)
conversion.head()

In [None]:
conversion.index

In [None]:
tmp_df = conversion['2021']
tmp_df.head()

In [None]:
tmp_df = conversion['2020-12']
tmp_df.head()

In [None]:
tmp_df = conversion.loc['2020-12-01':'2020-12-15']
print(len(tmp_df))
tmp_df.tail()

In [None]:
tmp_df = conversion.loc[:'2020-12-15', 'amount':'conversion_users']
tmp_df.head()

In [None]:
today = pd.to_datetime('2021-05-01')
conversion['time_delta'] = today - conversion.index
conversion.set_index('time_delta', inplace=True)
conversion.head()

In [None]:
within_30days = conversion['30 days':]
print(len(within_30days))
within_30days.head()

# 시계열 데이터 분석하기

In [None]:
import pandas as pd

conversion = pd.read_csv('./conversion.csv', delimiter='\t')
conversion['date'] = conversion['date'].astype(str)
conversion['date_dt'] = pd.to_datetime(conversion['date'], format='%Y-%m-%d')
conversion.head()

In [None]:
# 최초 매출 발생일
min_dt = conversion['date_dt'].min()
min_dt

In [None]:
# 가장 최근 매출 발생일
max_dt = conversion['date_dt'].max()
max_dt

In [None]:
conversion['dday'] = conversion['date_dt'] - min_dt
conversion.head()

## 일자별 데이터 분포

In [None]:
import matplotlib.pyplot as plt

conversion.plot(x='date_dt', y='amount', figsize=(8, 4))

## 일별 분포 비교하기

In [None]:
import pandas as pd

conversion = pd.read_csv('./conversion.csv', delimiter='\t')
conversion['date'] = conversion['date'].astype(str)
conversion['date_dt'] = pd.to_datetime(conversion['date'], format='%Y-%m-%d')
conversion.head()

In [None]:
conversion['date_m'] = conversion['date_dt'].dt.to_period(freq='M')
conversion['date_m'] = conversion['date_m'].astype(str)
conversion['day'] = conversion['date_dt'].dt.day

In [None]:
conversion.head()

In [None]:
conversion.set_index('date_dt', inplace=True)

In [None]:
new_df = conversion.loc['2021']

In [None]:
fig, ax = plt.subplots(figsize=(8,6))

for key, grp in new_df.groupby(['date_m']):
    ax = grp.plot(ax=ax, kind='line', x='day', y='conversions', label=key)

plt.legend(loc='best')
plt.show()

## 월별 데이터 분포

In [None]:
conversion['month'] = conversion['date_dt'].dt.month
conversion['year'] = conversion['date_dt'].dt.year

In [None]:
conversion.head()

In [None]:
conversion_year_q = conversion.groupby(['year', 'month'])[['amount', 'conversions']].sum()

In [None]:
conversion_year_q

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 3))
conversion_year_q['amount'].plot()
plt.show()
plt.figure(figsize=(8, 3))
conversion.plot(x='date_dt', y='amount', figsize=(8, 4))
plt.show()