# PANDAS DATE-TIME 다루기 ------------------------------------------------------

## 날짜 시간 관련 PANDAS ------------------------------------------------

- 관련 자료형 : timestamp, datetime64, period

- 관련 메서드 : to_datetime(), date_range(), to_period(), period_range()

In [2]:
# 모듈 로딩 -------------------------------------------------------
import pandas as pd

# DF 객체 생성
df=pd.DataFrame({'year':[2021, 2022],
                 'month':[11, 12],
                 'day':[1, 24]})

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   year    2 non-null      int64
 1   month   2 non-null      int64
 2   day     2 non-null      int64
dtypes: int64(3)
memory usage: 176.0 bytes


In [3]:
# DF 객체 타입 정보
df.dtypes

year     int64
month    int64
day      int64
dtype: object

In [4]:
# DateTime으로 변환 => pandas.to_datetime()
result=pd.to_datetime(df)

print(type(result), result, sep='\n')

<class 'pandas.core.series.Series'>
0   2021-11-01
1   2022-12-24
dtype: datetime64[ns]


In [5]:
one=result[0]

print(type(one), one, sep='\n')     # 날짜 한 개면 Timestamp

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
2021-11-01 00:00:00


In [6]:
one.day, one.month, one.month_name, one.hour

(1, 11, <function Timestamp.month_name>, 0)

# 타입 및 다른 포맷 변환 메서드들 ... to_변환타입()

In [7]:
result.to_frame()

Unnamed: 0,0
0,2021-11-01
1,2022-12-24


In [8]:
result.to_list()

[Timestamp('2021-11-01 00:00:00'), Timestamp('2022-12-24 00:00:00')]

In [9]:
result.to_string()

'0   2021-11-01\n1   2022-12-24'

In [10]:
result.to_csv('result.csv', index=False)

In [11]:
result.to_json('result.json')

## CSV 파일 활용 실습 --------------------------------------------------------

In [12]:
# CSV FILE ==> DataFrame으로 가져오기
DIR_PATH='../Data/'
FILE_NAME=DIR_PATH+'survey_visited.csv'

csvDF=pd.read_csv(FILE_NAME)
csvDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ident   8 non-null      int64 
 1   site    8 non-null      object
 2   dated   7 non-null      object
dtypes: int64(1), object(2)
memory usage: 320.0+ bytes


In [13]:
csvDF.head()

Unnamed: 0,ident,site,dated
0,619,DR-1,1927-02-08
1,622,DR-1,1927-02-10
2,734,DR-3,1939-01-07
3,735,DR-3,1930-01-12
4,751,DR-3,1930-02-26


In [14]:
# 결측치 처리
csvDF.isnull().sum()

ident    0
site     0
dated    1
dtype: int64

In [15]:
# 이전 값으로 치환 => fillna(method='ffill')
csvDF.fillna(method='ffill',inplace=True)
csvDF.isnull().sum()

ident    0
site     0
dated    0
dtype: int64

In [16]:
# 날짜 관련 데이터 ==> 자료형 변환하기
csvDF['dated']=pd.to_datetime(csvDF['dated'])

In [17]:
csvDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ident   8 non-null      int64         
 1   site    8 non-null      object        
 2   dated   8 non-null      datetime64[ns]
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 320.0+ bytes


In [18]:
csvDF['dated'][0], csvDF['dated'][1]

(Timestamp('1927-02-08 00:00:00'), Timestamp('1927-02-10 00:00:00'))

In [19]:
csvDF['dated'][0], csvDF['dated'][0].year, csvDF['dated'][0].day

(Timestamp('1927-02-08 00:00:00'), 1927, 8)

In [22]:
type(csvDF['dated']), csvDF['dated'].dtype

(pandas.core.series.Series, dtype('<M8[ns]'))

In [25]:
csvDF['dated'].dt.year       # dt(접근자)는 어디에 쓰는가???????? 시리즈에서 각 데이트타입 뽑을 때?

0    1927
1    1927
2    1939
3    1930
4    1930
5    1930
6    1932
7    1932
Name: dated, dtype: int64

In [26]:
csvDF['dated'].dt.month

0    2
1    2
2    1
3    1
4    2
5    2
6    1
7    3
Name: dated, dtype: int64

In [37]:
dateSR=csvDF['dated']
dateSR

0   1927-02-08
1   1927-02-10
2   1939-01-07
3   1930-01-12
4   1930-02-26
5   1930-02-26
6   1932-01-14
7   1932-03-22
Name: dated, dtype: datetime64[ns]

In [31]:
dateSR[dateSR.dt.year>=1930]

2   1939-01-07
3   1930-01-12
4   1930-02-26
5   1930-02-26
6   1932-01-14
7   1932-03-22
Name: dated, dtype: datetime64[ns]

In [34]:
dateSR[dateSR.dt.month>=2]

0   1927-02-08
1   1927-02-10
4   1930-02-26
5   1930-02-26
7   1932-03-22
Name: dated, dtype: datetime64[ns]