# 6.3. DataFrame(데이터프레임)

In [2]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame, Index

pd.set_option('display.notebook_repr_html', False)

df1 = pd.DataFrame(np.random.rand(3,3))
df1

          0         1         2
0  0.744197  0.962776  0.386615
1  0.390614  0.183582  0.245680
2  0.162151  0.138704  0.158631

In [3]:
df1[1] # 두 번째 열이 인덱싱

0    0.962776
1    0.183582
2    0.138704
Name: 1, dtype: float64

In [4]:
df1.loc[1] # 두 번째 행이 인덱싱(명시적)

0    0.390614
1    0.183582
2    0.245680
Name: 1, dtype: float64

In [5]:
df1.iloc[1] # 두 번째 행이 인덱싱(암묵적)

0    0.390614
1    0.183582
2    0.245680
Name: 1, dtype: float64

In [6]:
df1.0 # 열 이름 접근 불가

SyntaxError: invalid syntax (2312581027.py, line 1)

In [7]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame, Index

series_kr = pd.Series(np.arange(0,5,1), index=['seoul', 'pusan', 'incheon', 'daejeon', 'daegu'])
series_jp = pd.Series(np.arange(0,10,2), index=['tokyo', 'osaka', 'sendai', 'yamagata', 'yonezawa'])

cities_asia = pd.DataFrame({'korea': series_kr, 'japan': series_jp})
cities_asia

          korea  japan
daegu       4.0    NaN
daejeon     3.0    NaN
incheon     2.0    NaN
osaka       NaN    2.0
pusan       1.0    NaN
sendai      NaN    4.0
seoul       0.0    NaN
tokyo       NaN    0.0
yamagata    NaN    6.0
yonezawa    NaN    8.0

In [8]:
# korea 열이 인덱싱됨
cities_asia['korea'] # cities_asia.korea와 동일

daegu       4.0
daejeon     3.0
incheon     2.0
osaka       NaN
pusan       1.0
sendai      NaN
seoul       0.0
tokyo       NaN
yamagata    NaN
yonezawa    NaN
Name: korea, dtype: float64

In [9]:
cities_asia.korea

daegu       4.0
daejeon     3.0
incheon     2.0
osaka       NaN
pusan       1.0
sendai      NaN
seoul       0.0
tokyo       NaN
yamagata    NaN
yonezawa    NaN
Name: korea, dtype: float64

In [10]:
# tokyo 행에 인덱싱
cities_asia.loc['tokyo']

korea    NaN
japan    0.0
Name: tokyo, dtype: float64

In [11]:
# 8번째 행(tokyo)에 인덱싱
cities_asia.iloc[7]

korea    NaN
japan    0.0
Name: tokyo, dtype: float64

In [12]:
series_kr.daegu #이와 같이 인덱스를 지정할 수 있음. series_kr['daegu’]와 동일함.

4

In [13]:
series_jp.daegu

AttributeError: 'Series' object has no attribute 'daegu'

In [14]:
cities_asia.index

Index(['daegu', 'daejeon', 'incheon', 'osaka', 'pusan', 'sendai', 'seoul',
       'tokyo', 'yamagata', 'yonezawa'],
      dtype='object')

In [15]:
cities_asia.columns

Index(['korea', 'japan'], dtype='object')

In [16]:
cities_asia.values

array([[ 4., nan],
       [ 3., nan],
       [ 2., nan],
       [nan,  2.],
       [ 1., nan],
       [nan,  4.],
       [ 0., nan],
       [nan,  0.],
       [nan,  6.],
       [nan,  8.]])

In [17]:
# 두 번째 행의 값 접근
cities_asia.values[1]


array([ 3., nan])

## 6.3.1. T(전치)

In [18]:
#전치
cities_asia.T

       daegu  daejeon  incheon  osaka  pusan  sendai  seoul  tokyo  yamagata  \
korea    4.0      3.0      2.0    NaN    1.0     NaN    0.0    NaN       NaN   
japan    NaN      NaN      NaN    2.0    NaN     4.0    NaN    0.0       6.0   

       yonezawa  
korea       NaN  
japan       8.0  

## 6.3.2. 슬라이싱

In [19]:
# 슬라이싱, 세 번째 행까지, 두 번째열까지
cities_asia.iloc[:3, :2]

         korea  japan
daegu      4.0    NaN
daejeon    3.0    NaN
incheon    2.0    NaN

In [20]:
# iloc으로 세 번째 행, 첫 번째 열 값 변경
cities_asia.iloc[2,0] = 12

In [21]:
# 세 번째 행까지, 모든 열 선택, 변경된 값(12) 확인됨
cities_asia.iloc[:3, :]

         korea  japan
daegu      4.0    NaN
daejeon    3.0    NaN
incheon   12.0    NaN

In [23]:
# cities_asia.iloc[:3, :] 와 동일함
cities_asia.iloc[:3]

         korea  japan
daegu      4.0    NaN
daejeon    3.0    NaN
incheon   12.0    NaN

In [24]:
cities_asia.iloc[::2]

          korea  japan
daegu       4.0    NaN
incheon    12.0    NaN
pusan       1.0    NaN
seoul       0.0    NaN
yamagata    NaN    6.0

In [25]:
cities_asia.iloc[::-2]

          korea  japan
yonezawa    NaN    8.0
tokyo       NaN    0.0
sendai      NaN    4.0
osaka       NaN    2.0
daejeon     3.0    NaN

In [26]:
# start가 마지막 요소로부터 3번째
cities_asia.iloc[-3:]

          korea  japan
tokyo       NaN    0.0
yamagata    NaN    6.0
yonezawa    NaN    8.0

In [27]:
# end가 마지막 요소로부터 3번째
cities_asia[:-3]


         korea  japan
daegu      4.0    NaN
daejeon    3.0    NaN
incheon   12.0    NaN
osaka      NaN    2.0
pusan      1.0    NaN
sendai     NaN    4.0
seoul      0.0    NaN

## 6.3.3. 슬라이싱으로 역순 정렬하기

In [28]:
cities_asia[::-1]

          korea  japan
yonezawa    NaN    8.0
yamagata    NaN    6.0
tokyo       NaN    0.0
seoul       0.0    NaN
sendai      NaN    4.0
pusan       1.0    NaN
osaka       NaN    2.0
incheon    12.0    NaN
daejeon     3.0    NaN
daegu       4.0    NaN

## 6.3.4. 마스킹

In [29]:
# korea 열 가운데 값이 10을 초과한 열 찾기
cities_asia[cities_asia.korea > 10]

         korea  japan
incheon   12.0    NaN

In [None]:
cities_asia.index

Index(['daegu', 'daejeon', 'incheon', 'osaka', 'pusan', 'sendai', 'seoul',
       'tokyo', 'yamagata', 'yonezawa'],
      dtype='object')

In [None]:
cities_asia.columns

Index(['korea', 'japan'], dtype='object')

In [None]:
cities_asia.values

array([[ 4., nan],
       [ 3., nan],
       [12., nan],
       [nan,  2.],
       [ 1., nan],
       [nan,  4.],
       [ 0., nan],
       [nan,  0.],
       [nan,  6.],
       [nan,  8.]])

In [None]:
cities_asia.values[1]

array([ 3., nan])

In [None]:
cities_asia['korea']

daegu        4.0
daejeon      3.0
incheon     12.0
osaka        NaN
pusan        1.0
sendai       NaN
seoul        0.0
tokyo        NaN
yamagata     NaN
yonezawa     NaN
Name: korea, dtype: float64

In [None]:
cities_asia['japan']

daegu       NaN
daejeon     NaN
incheon     NaN
osaka       2.0
pusan       NaN
sendai      4.0
seoul       NaN
tokyo       0.0
yamagata    6.0
yonezawa    8.0
Name: japan, dtype: float64

In [None]:
#전치
cities_asia.T

Unnamed: 0,daegu,daejeon,incheon,osaka,pusan,sendai,seoul,tokyo,yamagata,yonezawa
korea,4.0,3.0,12.0,,1.0,,0.0,,,
japan,,,,2.0,,4.0,,0.0,6.0,8.0


In [None]:
cities_asia.iloc[:3, :2]

Unnamed: 0,korea,japan
daegu,4.0,
daejeon,3.0,
incheon,12.0,


In [None]:
#마스킹
cities_asia[cities_asia.korea > 10]

Unnamed: 0,korea,japan
incheon,12.0,
