# 접근자(Accessor) 실습 ---------------------------------------

- DF/SR 속성으로 제공되며 다른 객체의 기능/속성을 사용할 수 있도록 해줌

- 종류
    - Series.dt : datetime 속성 사용할 수 있도록 해줌
    - Series.str : string 관련 메서드 사용할 수 있도록 해줌
    - DataFrame.spare : spare 관련 속성/메서드 사용할 수 있도록 해줌

In [4]:
import pandas as pd, random

# 데이터 생성 --------------------------------------
# 이름, 생년월일, 나이 3개 컬럼으로 구성
# 10명 이름 영문으로
# 생년월일 : 동년해, 동일날짜

df = pd.DataFrame({'Name':['Jhon Kim', 'GilDong Hong', 'Tom KIM', 'Mn Lee', 'Annan Jung', 'K Paul', 'H Kane', 'H Son', 'E Hazard', 'M Salah'],
                   'Birth': pd.date_range('2022-01-10', periods=10, freq='M'),
                   'age':[random.randint(17,25) for count in range(10)]})
df

Unnamed: 0,Name,Birth,age
0,Jhon Kim,2022-01-31,19
1,GilDong Hong,2022-02-28,17
2,Tom KIM,2022-03-31,25
3,Mn Lee,2022-04-30,20
4,Annan Jung,2022-05-31,21
5,K Paul,2022-06-30,21
6,H Kane,2022-07-31,25
7,H Son,2022-08-31,18
8,E Hazard,2022-09-30,22
9,M Salah,2022-10-31,18


In [5]:
df.dtypes

Name             object
Birth    datetime64[ns]
age               int64
dtype: object

In [7]:
df['Name'][0], type(df['Name'][0])

('Jhon Kim', str)

In [8]:
df['Name'].str.split()

0        [Jhon, Kim]
1    [GilDong, Hong]
2         [Tom, KIM]
3          [Mn, Lee]
4      [Annan, Jung]
5          [K, Paul]
6          [H, Kane]
7           [H, Son]
8        [E, Hazard]
9         [M, Salah]
Name: Name, dtype: object

In [9]:
df['Name'].str.upper()

0        JHON KIM
1    GILDONG HONG
2         TOM KIM
3          MN LEE
4      ANNAN JUNG
5          K PAUL
6          H KANE
7           H SON
8        E HAZARD
9         M SALAH
Name: Name, dtype: object

In [10]:
df['Name'].str.contains('a')

0    False
1    False
2    False
3    False
4     True
5     True
6     True
7    False
8     True
9     True
Name: Name, dtype: bool

## str 데이터로 구성된 컬럼(Series)의 str 접근자 ------------------------

- 전체 컬럼의 요소에 str관련 메서드 사용 가능하도록 하는 접근자
- 사용법
        - 객체변수[컬럼명].str.메서드() -> upper, lower, split, replace.....
        - 객체변수[컬럼명].str.get(인덱스) => 분리 문자리스트에서 특정 요소 추출

In [22]:
# 문자열 분리
finame = df['Name'].str.split()

In [25]:
# 분리된 문자열 리스트에서 특정 값 가져오기 ---------------------------------
finame.str.get(0), finame.str.get(1)

(0       Jhon
 1    GilDong
 2        Tom
 3         Mn
 4      Annan
 5          K
 6          H
 7          H
 8          E
 9          M
 Name: Name, dtype: object,
 0       Kim
 1      Hong
 2       KIM
 3       Lee
 4      Jung
 5      Paul
 6      Kane
 7       Son
 8    Hazard
 9     Salah
 Name: Name, dtype: object)

In [26]:
df['First_Name'] = finame.str.get(0)
df['Last_Name'] = finame.str.get(1)
df

Unnamed: 0,Name,Birth,age,First_Name,Last_Name
0,Jhon Kim,2022-01-31,19,Jhon,Kim
1,GilDong Hong,2022-02-28,17,GilDong,Hong
2,Tom KIM,2022-03-31,25,Tom,KIM
3,Mn Lee,2022-04-30,20,Mn,Lee
4,Annan Jung,2022-05-31,21,Annan,Jung
5,K Paul,2022-06-30,21,K,Paul
6,H Kane,2022-07-31,25,H,Kane
7,H Son,2022-08-31,18,H,Son
8,E Hazard,2022-09-30,22,E,Hazard
9,M Salah,2022-10-31,18,M,Salah


In [30]:
# str.split()
value = df['Name'].str.split(expand=True)
value

Unnamed: 0,0,1
0,Jhon,Kim
1,GilDong,Hong
2,Tom,KIM
3,Mn,Lee
4,Annan,Jung
5,K,Paul
6,H,Kane
7,H,Son
8,E,Hazard
9,M,Salah


In [34]:
df1 = pd.concat([df,value], axis=1)

In [35]:
df1

Unnamed: 0,Name,Birth,age,First_Name,Last_Name,0,1
0,Jhon Kim,2022-01-31,19,Jhon,Kim,Jhon,Kim
1,GilDong Hong,2022-02-28,17,GilDong,Hong,GilDong,Hong
2,Tom KIM,2022-03-31,25,Tom,KIM,Tom,KIM
3,Mn Lee,2022-04-30,20,Mn,Lee,Mn,Lee
4,Annan Jung,2022-05-31,21,Annan,Jung,Annan,Jung
5,K Paul,2022-06-30,21,K,Paul,K,Paul
6,H Kane,2022-07-31,25,H,Kane,H,Kane
7,H Son,2022-08-31,18,H,Son,H,Son
8,E Hazard,2022-09-30,22,E,Hazard,E,Hazard
9,M Salah,2022-10-31,18,M,Salah,M,Salah


In [36]:
df1.dtypes

Name                  object
Birth         datetime64[ns]
age                    int64
First_Name            object
Last_Name             object
0                     object
1                     object
dtype: object

In [39]:
df1['Year'] = df1['Birth'].dt.year
df1['Month'] = df1['Birth'].dt.month
df1['Day'] = df1['Birth'].dt.day
df1

Unnamed: 0,Name,Birth,age,First_Name,Last_Name,0,1,Year,Month,Day
0,Jhon Kim,2022-01-31,19,Jhon,Kim,Jhon,Kim,2022,1,31
1,GilDong Hong,2022-02-28,17,GilDong,Hong,GilDong,Hong,2022,2,28
2,Tom KIM,2022-03-31,25,Tom,KIM,Tom,KIM,2022,3,31
3,Mn Lee,2022-04-30,20,Mn,Lee,Mn,Lee,2022,4,30
4,Annan Jung,2022-05-31,21,Annan,Jung,Annan,Jung,2022,5,31
5,K Paul,2022-06-30,21,K,Paul,K,Paul,2022,6,30
6,H Kane,2022-07-31,25,H,Kane,H,Kane,2022,7,31
7,H Son,2022-08-31,18,H,Son,H,Son,2022,8,31
8,E Hazard,2022-09-30,22,E,Hazard,E,Hazard,2022,9,30
9,M Salah,2022-10-31,18,M,Salah,M,Salah,2022,10,31


In [43]:
df1.rename(columns={0:'FN', 1:'LN'}, inplace=True)
df1


Unnamed: 0,Name,Birth,age,First_Name,Last_Name,FN,LN,Year,Month,Day
0,Jhon Kim,2022-01-31,19,Jhon,Kim,Jhon,Kim,2022,1,31
1,GilDong Hong,2022-02-28,17,GilDong,Hong,GilDong,Hong,2022,2,28
2,Tom KIM,2022-03-31,25,Tom,KIM,Tom,KIM,2022,3,31
3,Mn Lee,2022-04-30,20,Mn,Lee,Mn,Lee,2022,4,30
4,Annan Jung,2022-05-31,21,Annan,Jung,Annan,Jung,2022,5,31
5,K Paul,2022-06-30,21,K,Paul,K,Paul,2022,6,30
6,H Kane,2022-07-31,25,H,Kane,H,Kane,2022,7,31
7,H Son,2022-08-31,18,H,Son,H,Son,2022,8,31
8,E Hazard,2022-09-30,22,E,Hazard,E,Hazard,2022,9,30
9,M Salah,2022-10-31,18,M,Salah,M,Salah,2022,10,31


## 실습 ------------------------------------------------------------------------------------------

In [44]:
DIR = '../../Data/'
FILE = DIR+'stock-data.csv'

In [46]:
df = pd.read_csv(FILE)
df

Unnamed: 0,Date,Close,Start,High,Low,Volume
0,2018-07-02,10100,10850,10900,10000,137977
1,2018-06-29,10700,10550,10900,9990,170253
2,2018-06-28,10400,10900,10950,10150,155769
3,2018-06-27,10900,10800,11050,10500,133548
4,2018-06-26,10800,10900,11000,10700,63039
5,2018-06-25,11150,11400,11450,11000,55519
6,2018-06-22,11300,11250,11450,10750,134805
7,2018-06-21,11200,11350,11750,11200,133002
8,2018-06-20,11550,11200,11600,10900,308596
9,2018-06-19,11300,11850,11950,11300,180656


In [50]:
df.dtypes

Date      object
Close      int64
Start      int64
High       int64
Low        int64
Volume     int64
dtype: object

In [54]:
df.dtypes

Date      object
Close      int64
Start      int64
High       int64
Low        int64
Volume     int64
dtype: object

In [61]:
df['Y'] = df['Date1'].dt.year
df['M'] = df['Date1'].dt.month
df['D'] = df['Date1'].dt.day
df.sort_values(by='D')

Unnamed: 0,Date,Close,Start,High,Low,Volume,Date1,Y,M,D
19,2022-06-01,11900,11800,12100,11750,32062,2022-06-01,2022,6,1
0,2018-07-02,10100,10850,10900,10000,137977,2018-07-02,2018,7,2
18,2022-06-04,11900,11900,12200,11700,25171,2022-06-04,2022,6,4
17,2022-06-05,12150,11800,12250,11800,42485,2022-06-05,2022,6,5
16,2021-06-07,11950,12200,12300,11900,49088,2021-06-07,2021,6,7
15,2020-06-08,11950,11950,12200,11800,59258,2020-06-08,2020,6,8
14,2020-06-11,11950,12000,12250,11950,62293,2020-06-11,2020,6,11
13,2020-06-12,13200,12200,13300,12050,558148,2020-06-12,2020,6,12
12,2018-06-14,13450,13200,13700,13150,347451,2018-06-14,2018,6,14
11,2018-06-15,13400,13600,13600,12900,201376,2018-06-15,2018,6,15


In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    20 non-null     object        
 1   Close   20 non-null     int64         
 2   Start   20 non-null     int64         
 3   High    20 non-null     int64         
 4   Low     20 non-null     int64         
 5   Volume  20 non-null     int64         
 6   Date1   20 non-null     datetime64[ns]
 7   Y       20 non-null     int64         
 8   M       20 non-null     int64         
 9   D       20 non-null     int64         
dtypes: datetime64[ns](1), int64(8), object(1)
memory usage: 1.7+ KB
