# 데이터프레임 생성

In [1]:
import pandas as pd

obj = pd.DataFrame({'student':['s1', 's2', 's3', 's4', 's5', 's6'],
                   'stat_score':[None, 90, 85, 71, 63, None],
                   'math_score':[65, None, 87, 75, 57, 88], 
                   'sex':['Female', 'Male', 'Female', None, 'Male', 'Male'],
                   'pre_level':['B', 'A', 'B', 'B', 'C', None]})

# 결측치 확인

In [3]:
obj.isna().sum()

student       0
stat_score    2
math_score    1
sex           1
pre_level     1
dtype: int64

In [4]:
# 행별 결측치 개수

obj.isna().sum(axis=1)

0    1
1    1
2    0
3    1
4    0
5    2
dtype: int64

# 결측치 처리

In [6]:
obj

Unnamed: 0,student,stat_score,math_score,sex,pre_level
0,s1,,65.0,Female,B
1,s2,90.0,,Male,A
2,s3,85.0,87.0,Female,B
3,s4,71.0,75.0,,B
4,s5,63.0,57.0,Male,C
5,s6,,88.0,Male,


In [5]:
obj.dropna()

Unnamed: 0,student,stat_score,math_score,sex,pre_level
2,s3,85.0,87.0,Female,B
4,s5,63.0,57.0,Male,C


In [7]:
obj.dropna(axis=1)

Unnamed: 0,student
0,s1
1,s2
2,s3
3,s4
4,s5
5,s6


## 'stat_score', 'math_score' 컬럼만 선택해 결측치 처리

In [9]:
obj[['stat_score', 'math_score']]

Unnamed: 0,stat_score,math_score
0,,65.0
1,90.0,
2,85.0,87.0
3,71.0,75.0
4,63.0,57.0
5,,88.0


In [10]:
obj[['stat_score', 'math_score']].dropna()

Unnamed: 0,stat_score,math_score
2,85.0,87.0
3,71.0,75.0
4,63.0,57.0


In [11]:
df1 = obj[['stat_score', 'math_score']].copy()

In [12]:
df1.fillna(0)

Unnamed: 0,stat_score,math_score
0,0.0,65.0
1,90.0,0.0
2,85.0,87.0
3,71.0,75.0
4,63.0,57.0
5,0.0,88.0


In [13]:
df1.fillna(df1.mean())

Unnamed: 0,stat_score,math_score
0,77.25,65.0
1,90.0,74.4
2,85.0,87.0
3,71.0,75.0
4,63.0,57.0
5,77.25,88.0


# 이상치 처리

In [14]:
obj = pd.DataFrame({'student_id':['s1', 's2', 's3', 's4', 's5', 's6'],
                   'stat_score':[55, 90, 85, 10, 88, 99],
                   'math_score':[65, 99, 67, 70, 57, 80]})

In [15]:
obj

Unnamed: 0,student_id,stat_score,math_score
0,s1,55,65
1,s2,90,99
2,s3,85,67
3,s4,10,70
4,s5,88,57
5,s6,99,80


In [16]:
q1 = obj['stat_score'].quantile(0.25)
q3 = obj['stat_score'].quantile(0.75)
iqr = q3-q1
lower = q1-1.5*iqr
upper = q3+1.5*iqr

In [17]:
# 이상치 판별

obj[(obj['stat_score']<lower)|(obj['stat_score']>upper)]

Unnamed: 0,student_id,stat_score,math_score
3,s4,10,70


In [19]:
# 이상치 제거

obj[(obj['stat_score']>lower) & (obj['stat_score']<upper)]

Unnamed: 0,student_id,stat_score,math_score
0,s1,55,65
1,s2,90,99
2,s3,85,67
4,s5,88,57
5,s6,99,80
