# 접근자(Accessor) 실습 --------------------------------------------------------------

- DF/SR에 속성으로 제공되며 다른 객체의 기능/속성을 사용할 수 있도록 해줌

- 종류
    - Series.dt  : datetime 속성 사용할 수 있도록 해줌
    - Series.str : string 관련 메서드 사용할 수 있도록 해줌
    - DataFrame.spare : spare 관련 속성/메서드 사용할 수 있도록 해줌

In [1]:
# 모듈 로딩 -----------------------------------------------------------------------------------
import pandas as pd, random

# 데이터 생성 -------------------------------------------------------------------
# 이름, 생년월일, 나이 3개 컬럼으로 구성
# 10명
# 생년월일: 동년해, 동일날짜  <- date_range()
# 이름 : 영문 (예: Tom Kim)
df=pd.DataFrame({'name':['One Kim', 'Two Kim', 'Three Kim', 'Four Kim', 'Fiv Kim', 'Six Park', 'Sev Park', 'Eig Park', 'Nine Park', 'Ten Park'],
                 'birth':pd.date_range('2022-01-10', periods=10, freq='M'),
                 'age':[random.randint(17,25) for count in range(10)]})

df

Unnamed: 0,name,birth,age
0,One Kim,2022-01-31,24
1,Two Kim,2022-02-28,17
2,Three Kim,2022-03-31,25
3,Four Kim,2022-04-30,20
4,Fiv Kim,2022-05-31,17
5,Six Park,2022-06-30,20
6,Sev Park,2022-07-31,25
7,Eig Park,2022-08-31,25
8,Nine Park,2022-09-30,17
9,Ten Park,2022-10-31,24


In [2]:
df.dtypes

name             object
birth    datetime64[ns]
age               int64
dtype: object

In [3]:
df['name'][0], type(df['name'][0])

('One Kim', str)

In [4]:
df['name'][0].split( )

['One', 'Kim']

In [5]:
df['age'].max(), df['age'].mean()

(25, 20.7)

## str 데이터로 구성된 컬럼(Series)의 str 접근자 -------------------------

- 전체 컬럼의 요소에 str관련 메서드 사용 가능하도록 하는 접근자
- 사용법
        - 객체변수[컬럼명].str.메서드() => upper, lower, split, replace,...
        - 객체변수[컬럼명].str.get(인덱스) => 분리 문자리스트에서 특정 요소 추출

In [6]:
df['name'].str.upper(), df['name'].str.contains('E')      # capitalize 첫 글자만 대문자

(0      ONE KIM
 1      TWO KIM
 2    THREE KIM
 3     FOUR KIM
 4      FIV KIM
 5     SIX PARK
 6     SEV PARK
 7     EIG PARK
 8    NINE PARK
 9     TEN PARK
 Name: name, dtype: object,
 0    False
 1    False
 2    False
 3    False
 4    False
 5    False
 6    False
 7     True
 8    False
 9    False
 Name: name, dtype: bool)

In [7]:
# 문자열 분리 ------------------------------------------------------------------------------
flname=df['name'].str.split()
type(flname)

pandas.core.series.Series

In [8]:
flname[0][0], flname[0][1]

('One', 'Kim')

In [9]:
# 분리된 문자열 리스트에서 특정 값 가져오기 --------------------------------------------------------
flname.str.get(0), flname.str.get(1)         # get 괄호 안에 인덱스 주면 됨

(0      One
 1      Two
 2    Three
 3     Four
 4      Fiv
 5      Six
 6      Sev
 7      Eig
 8     Nine
 9      Ten
 Name: name, dtype: object,
 0     Kim
 1     Kim
 2     Kim
 3     Kim
 4     Kim
 5    Park
 6    Park
 7    Park
 8    Park
 9    Park
 Name: name, dtype: object)

In [10]:
df['first_name']=flname.str.get(0)
df['last_name']=flname.str.get(1)
df

Unnamed: 0,name,birth,age,first_name,last_name
0,One Kim,2022-01-31,17,One,Kim
1,Two Kim,2022-02-28,25,Two,Kim
2,Three Kim,2022-03-31,19,Three,Kim
3,Four Kim,2022-04-30,21,Four,Kim
4,Fiv Kim,2022-05-31,24,Fiv,Kim
5,Six Park,2022-06-30,22,Six,Park
6,Sev Park,2022-07-31,22,Sev,Park
7,Eig Park,2022-08-31,17,Eig,Park
8,Nine Park,2022-09-30,19,Nine,Park
9,Ten Park,2022-10-31,21,Ten,Park


In [11]:
# df['name'].str.split()[0][1]

In [12]:
# fir_name=[]
# sec_name=[]
# for i in df.index:
#     if df['name'].str.split()[i]:
#         fir_name.append(df['name'].str.split()[i][0])
#         sec_name.append(df['name'].str.split()[i][1])
        
# df['fir_name']=fir_name
# df['sec_name']=sec_name

# df.insert(1,'fir_name')

In [13]:
# str.split()
value=df['name'].str.split(expand=True)          # expand 분리된 것을 시리즈로 담았는데 모아지니까 데이터프레임
type(value)     # expand 확대되다

pandas.core.frame.DataFrame

In [14]:
value.shape, value.ndim, value.columns, value.index

((10, 2),
 2,
 RangeIndex(start=0, stop=2, step=1),
 RangeIndex(start=0, stop=10, step=1))

In [15]:
# value.columns=['fN','sN']
df2=pd.concat([df,value], axis=1)
df2

Unnamed: 0,name,birth,age,first_name,last_name,0,1
0,One Kim,2022-01-31,17,One,Kim,One,Kim
1,Two Kim,2022-02-28,25,Two,Kim,Two,Kim
2,Three Kim,2022-03-31,19,Three,Kim,Three,Kim
3,Four Kim,2022-04-30,21,Four,Kim,Four,Kim
4,Fiv Kim,2022-05-31,24,Fiv,Kim,Fiv,Kim
5,Six Park,2022-06-30,22,Six,Park,Six,Park
6,Sev Park,2022-07-31,22,Sev,Park,Sev,Park
7,Eig Park,2022-08-31,17,Eig,Park,Eig,Park
8,Nine Park,2022-09-30,19,Nine,Park,Nine,Park
9,Ten Park,2022-10-31,21,Ten,Park,Ten,Park


In [16]:
df2.rename(columns={0:'FName', 1:'LName'}, inplace=True)

In [17]:
df2

Unnamed: 0,name,birth,age,first_name,last_name,FName,LName
0,One Kim,2022-01-31,17,One,Kim,One,Kim
1,Two Kim,2022-02-28,25,Two,Kim,Two,Kim
2,Three Kim,2022-03-31,19,Three,Kim,Three,Kim
3,Four Kim,2022-04-30,21,Four,Kim,Four,Kim
4,Fiv Kim,2022-05-31,24,Fiv,Kim,Fiv,Kim
5,Six Park,2022-06-30,22,Six,Park,Six,Park
6,Sev Park,2022-07-31,22,Sev,Park,Sev,Park
7,Eig Park,2022-08-31,17,Eig,Park,Eig,Park
8,Nine Park,2022-09-30,19,Nine,Park,Nine,Park
9,Ten Park,2022-10-31,21,Ten,Park,Ten,Park


## 실습 ----------------------------------------------------------------------------

- stock-data.csv

- 접근자 사용하여 Date 컬럼 분할 (Y, M, D)

- D일 기준으로 정렬

In [18]:
stockDF=pd.read_csv('../Data/stock-data.csv')
stockDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    20 non-null     object
 1   Close   20 non-null     int64 
 2   Start   20 non-null     int64 
 3   High    20 non-null     int64 
 4   Low     20 non-null     int64 
 5   Volume  20 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 1.1+ KB


In [19]:
stockDF.head()

Unnamed: 0,Date,Close,Start,High,Low,Volume
0,2018-07-02,10100,10850,10900,10000,137977
1,2018-06-29,10700,10550,10900,9990,170253
2,2018-06-28,10400,10900,10950,10150,155769
3,2018-06-27,10900,10800,11050,10500,133548
4,2018-06-26,10800,10900,11000,10700,63039


In [20]:
# stockDF['Date']=stockDF['Date'].astype('Datetime64')
# stockDF.info()

In [21]:
stDate=stockDF['Date'].str.split('-')

stockDF['Y']=stDate.str.get(0)
stockDF['M']=stDate.str.get(1)
stockDF['D']=stDate.str.get(2)

stD=stockDF.sort_values(by='D')
stD

# stD=stD.reset_index()
# stD.drop('index',axis=1)

Unnamed: 0,Date,Close,Start,High,Low,Volume,Y,M,D
19,2022-06-01,11900,11800,12100,11750,32062,2022,6,1
0,2018-07-02,10100,10850,10900,10000,137977,2018,7,2
18,2022-06-04,11900,11900,12200,11700,25171,2022,6,4
17,2022-06-05,12150,11800,12250,11800,42485,2022,6,5
16,2020-06-07,11950,12200,12300,11900,49088,2020,6,7
15,2021-06-08,11950,11950,12200,11800,59258,2021,6,8
14,2020-06-11,11950,12000,12250,11950,62293,2020,6,11
13,2020-06-12,13200,12200,13300,12050,558148,2020,6,12
12,2020-06-14,13450,13200,13700,13150,347451,2020,6,14
11,2018-06-15,13400,13600,13600,12900,201376,2018,6,15


In [22]:
stock=pd.concat([stockDF,stDate],axis=1)
stock.rename(columns={0:'Y',1:'M',2:'D'},inplace=True)
sD=stock.sort_values(by='D')
stock_end=sD.reset_index().drop('index',axis=1)
stock_end

Unnamed: 0,Date,Close,Start,High,Low,Volume,Y,M,D,Date.1
0,2022-06-01,11900,11800,12100,11750,32062,2022,6,1,"[2022, 06, 01]"
1,2018-07-02,10100,10850,10900,10000,137977,2018,7,2,"[2018, 07, 02]"
2,2022-06-04,11900,11900,12200,11700,25171,2022,6,4,"[2022, 06, 04]"
3,2022-06-05,12150,11800,12250,11800,42485,2022,6,5,"[2022, 06, 05]"
4,2020-06-07,11950,12200,12300,11900,49088,2020,6,7,"[2020, 06, 07]"
5,2021-06-08,11950,11950,12200,11800,59258,2021,6,8,"[2021, 06, 08]"
6,2020-06-11,11950,12000,12250,11950,62293,2020,6,11,"[2020, 06, 11]"
7,2020-06-12,13200,12200,13300,12050,558148,2020,6,12,"[2020, 06, 12]"
8,2020-06-14,13450,13200,13700,13150,347451,2020,6,14,"[2020, 06, 14]"
9,2018-06-15,13400,13600,13600,12900,201376,2018,6,15,"[2018, 06, 15]"
