# 데이터 선택 (p.17)

## 1. 데이터 구성하기
- 정규분포에서 추출한 10개의 무작위 데이터(변수=4, 자료 수=10, random seed=1234)

In [1]:
# pandas: 데이터 프레임 연산용
import pandas as pd
# numpy: 숫자 연산용
import numpy as np
# random 함수 
from numpy.random import randn

np.random.seed(1234)
df = pd.DataFrame(randn(10,4), columns=["A", "B", "C", "D"])

In [2]:
df

Unnamed: 0,A,B,C,D
0,0.471435,-1.190976,1.432707,-0.312652
1,-0.720589,0.887163,0.859588,-0.636524
2,0.015696,-2.242685,1.150036,0.991946
3,0.953324,-2.021255,-0.334077,0.002118
4,0.405453,0.289092,1.321158,-1.546906
5,-0.202646,-0.655969,0.193421,0.553439
6,1.318152,-0.469305,0.675554,-1.817027
7,-0.183109,1.058969,-0.39784,0.337438
8,1.047579,1.045938,0.863717,-0.122092
9,0.124713,-0.322795,0.841675,2.390961


## slice를 이용한 데이터 선택

#### 1. B열 선택

In [3]:
df['B'] # series로 선택한 것

0   -1.190976
1    0.887163
2   -2.242685
3   -2.021255
4    0.289092
5   -0.655969
6   -0.469305
7    1.058969
8    1.045938
9   -0.322795
Name: B, dtype: float64

#### 2. B, D열 선택

In [4]:
df[['B', 'D']]

Unnamed: 0,B,D
0,-1.190976,-0.312652
1,0.887163,-0.636524
2,-2.242685,0.991946
3,-2.021255,0.002118
4,0.289092,-1.546906
5,-0.655969,0.553439
6,-0.469305,-1.817027
7,1.058969,0.337438
8,1.045938,-0.122092
9,-0.322795,2.390961


#### 3. 4~8행 선택

In [5]:
df[3:8] #4번째 행부터 8번째 행까지 선택

Unnamed: 0,A,B,C,D
3,0.953324,-2.021255,-0.334077,0.002118
4,0.405453,0.289092,1.321158,-1.546906
5,-0.202646,-0.655969,0.193421,0.553439
6,1.318152,-0.469305,0.675554,-1.817027
7,-0.183109,1.058969,-0.39784,0.337438


## 3-1. loc을 이용한 데이터 선택

#### 1. 2~8행, A, C열 선택

In [6]:
df.loc[2:8, ['A', 'C']]

Unnamed: 0,A,C
2,0.015696,1.150036
3,0.953324,-0.334077
4,0.405453,1.321158
5,-0.202646,0.193421
6,1.318152,0.675554
7,-0.183109,-0.39784
8,1.047579,0.863717


#### 2. A, C열 선택

In [7]:
df.loc[:, ['A', 'C']]

Unnamed: 0,A,C
0,0.471435,1.432707
1,-0.720589,0.859588
2,0.015696,1.150036
3,0.953324,-0.334077
4,0.405453,1.321158
5,-0.202646,0.193421
6,1.318152,0.675554
7,-0.183109,-0.39784
8,1.047579,0.863717
9,0.124713,0.841675


#### 3. 3~8행 선택

In [8]:
df.loc[3:8]

Unnamed: 0,A,B,C,D
3,0.953324,-2.021255,-0.334077,0.002118
4,0.405453,0.289092,1.321158,-1.546906
5,-0.202646,-0.655969,0.193421,0.553439
6,1.318152,-0.469305,0.675554,-1.817027
7,-0.183109,1.058969,-0.39784,0.337438
8,1.047579,1.045938,0.863717,-0.122092


#### 4. 3행 선택(단일행)

In [9]:
df.loc[3:3]

Unnamed: 0,A,B,C,D
3,0.953324,-2.021255,-0.334077,0.002118


## 3-2. iloc를 이용한 데이터 선택

#### 1. 0~6행, A열 선택

In [11]:
df.iloc[0:7, [0]]

Unnamed: 0,A
0,0.471435
1,-0.720589
2,0.015696
3,0.953324
4,0.405453
5,-0.202646
6,1.318152


#### 2. A열 선택

In [12]:
df.iloc[:, [0]]

Unnamed: 0,A
0,0.471435
1,-0.720589
2,0.015696
3,0.953324
4,0.405453
5,-0.202646
6,1.318152
7,-0.183109
8,1.047579
9,0.124713


#### 3. 2~6행 선택

In [13]:
df.iloc[2:7]

Unnamed: 0,A,B,C,D
2,0.015696,-2.242685,1.150036,0.991946
3,0.953324,-2.021255,-0.334077,0.002118
4,0.405453,0.289092,1.321158,-1.546906
5,-0.202646,-0.655969,0.193421,0.553439
6,1.318152,-0.469305,0.675554,-1.817027


#### 4. 5행 선택(단일행)

In [14]:
df.iloc[5:6]

Unnamed: 0,A,B,C,D
5,-0.202646,-0.655969,0.193421,0.553439


## 4. query를 이용한 데이터 선택

#### 1. D>0.5 데이터 선택

In [15]:
df.query('D>0.5')

Unnamed: 0,A,B,C,D
2,0.015696,-2.242685,1.150036,0.991946
5,-0.202646,-0.655969,0.193421,0.553439
9,0.124713,-0.322795,0.841675,2.390961


#### 2. B>D 데이터 선택 -> slice로 표현

In [16]:
df[df.B>df.D]

Unnamed: 0,A,B,C,D
1,-0.720589,0.887163,0.859588,-0.636524
4,0.405453,0.289092,1.321158,-1.546906
6,1.318152,-0.469305,0.675554,-1.817027
7,-0.183109,1.058969,-0.39784,0.337438
8,1.047579,1.045938,0.863717,-0.122092


#### 3. A>0.5 그리고 D<0.0 데이터 선택

In [17]:
df.query('A>0.5 & D<0.0')

Unnamed: 0,A,B,C,D
6,1.318152,-0.469305,0.675554,-1.817027
8,1.047579,1.045938,0.863717,-0.122092


#### 4. A>0.5 혹은 D<0.0 데이터 선택

In [19]:
df.query('A>0.5 | D<0.0')

Unnamed: 0,A,B,C,D
0,0.471435,-1.190976,1.432707,-0.312652
1,-0.720589,0.887163,0.859588,-0.636524
3,0.953324,-2.021255,-0.334077,0.002118
4,0.405453,0.289092,1.321158,-1.546906
6,1.318152,-0.469305,0.675554,-1.817027
8,1.047579,1.045938,0.863717,-0.122092
