# 결측치(결측값)
- 결측치(missing value)
    - NaN, Na, Null
    - 알려지지 않은 값, 알 수 없는 값
    

In [5]:
import pandas as pd
import numpy

In [3]:
data = {
    'city' : ['서울', '경기', '제주', '부산'],
    'total1' : [12000, 34000, 50500, 42000],
    'total2' : [12500, 38000, 150000, 37000]
}

# 데이터 프레임 생성
df = pd.DataFrame(data)
df

Unnamed: 0,city,total1,total2
0,서울,12000,12500
1,경기,34000,38000
2,제주,50500,150000
3,부산,42000,37000


In [4]:
df.dtypes

city      object
total1     int64
total2     int64
dtype: object

In [7]:
df.loc[1,'total1'] = numpy.NAN
df

Unnamed: 0,city,total1,total2
0,서울,12000.0,12500
1,경기,,38000
2,제주,50500.0,150000
3,부산,42000.0,37000


In [8]:
df.dtypes

city       object
total1    float64
total2      int64
dtype: object

In [9]:
# 결측치 확인 : info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   city    4 non-null      object 
 1   total1  3 non-null      float64
 2   total2  4 non-null      int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 224.0+ bytes


In [13]:
# 결측치 확인 : 마스크
# 둘은 같은 함수
df.isnull()
df.isna()

Unnamed: 0,city,total1,total2
0,False,False,False
1,False,True,False
2,False,False,False
3,False,False,False


In [18]:
# 특정 컬럼에서 결측치 추출
# 결측치가 포함된 행을 확인 후 처리 방법을 선택하기 위해 추출
df[df.total1.isnull()]
df[df.total1.isna()]

Unnamed: 0,city,total1,total2
1,경기,,38000


In [20]:
# 결측치를 원하는 값으로 변경
# fillna(value, axis, inplace)
df.fillna(0)

Help on method fillna in module pandas.core.frame:

fillna(value: 'Hashable | Mapping | Series | DataFrame' = None, *, method: 'FillnaOptions | None' = None, axis: 'Axis | None' = None, inplace: 'bool' = False, limit: 'int | None' = None, downcast: 'dict | None' = None) -> 'DataFrame | None' method of pandas.core.frame.DataFrame instance
    Fill NA/NaN values using the specified method.
    
    Parameters
    ----------
    value : scalar, dict, Series, or DataFrame
        Value to use to fill holes (e.g. 0), alternately a
        dict/Series/DataFrame of values specifying which value to use for
        each index (for a Series) or column (for a DataFrame).  Values not
        in the dict/Series/DataFrame will not be filled. This value cannot
        be a list.
    method : {'backfill', 'bfill', 'ffill', None}, default None
        Method to use for filling holes in reindexed Series:
    
        * ffill: propagate last valid observation forward to next valid.
        * backfill / b

In [None]:
# 결측치가 있는 행 또는 열을 삭제
# 모든 행이 결측치일 때 삭제하는 게 좋음
# DataFrame.dropna(axis, how, inplace)
df.dropna()
df.loc[len(df)] = numpy.NaN
df

In [26]:
help(df.dropna)

Help on method dropna in module pandas.core.frame:

dropna(*, axis: 'Axis' = 0, how: 'AnyAll | NoDefault' = <no_default>, thresh: 'int | NoDefault' = <no_default>, subset: 'IndexLabel' = None, inplace: 'bool' = False, ignore_index: 'bool' = False) -> 'DataFrame | None' method of pandas.core.frame.DataFrame instance
    Remove missing values.
    
    See the :ref:`User Guide <missing_data>` for more on which values are
    considered missing, and how to work with missing data.
    
    Parameters
    ----------
    axis : {0 or 'index', 1 or 'columns'}, default 0
        Determine if rows or columns which contain missing values are
        removed.
    
        * 0, or 'index' : Drop rows which contain missing values.
        * 1, or 'columns' : Drop columns which contain missing value.
    
        Pass tuple or list to drop on multiple axes.
        Only a single axis is allowed.
    
    how : {'any', 'all'}, default 'any'
        Determine if row or column is removed from DataFrame

In [27]:
# 모든 값이 결측치인 행 삭제
df.dropna(how='all')

Unnamed: 0,city,total1,total2
0,서울,12000.0,12500.0
1,경기,,38000.0
2,제주,50500.0,150000.0
3,부산,42000.0,37000.0
