In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

def prt(*pVPK) :
    print(*pVPK, sep = '\n\n', end = '\n\n')
    
class disp(object):
    template = '<div style="float: left;padding:10px;"> <b>[{0}]</b> {1}</div>'
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join( self.template.format(a, eval(a)._repr_html_()) 
                        for a in self.args)

import pandas as pd
import numpy as np    

### [예제1] NA 데이터 연산

In [None]:
df = pd.read_csv('data/02_05_01.csv', index_col=0)

r1 = df['A'] + df['B'] + df['C'] + df['D']
r2 = df.sum(axis=1)
r3 = df.sum(axis=1, skipna=False)
df; r1; r2; r3

### [예제2] 다양한 NA 처리 및 1000단위 구분자 인식

In [None]:
# [1]

df1 = pd.read_csv('data/02_05_02.csv')
df2 = pd.read_csv('data/02_05_02.csv', na_values=['?','-'])
df2['B'] = df2['B'].str.replace(',','').astype(np.int64)
disp('df1', 'df2')
print(df1.dtypes); print(df2.dtypes)

In [None]:
# [2]

df3 = pd.read_csv('data/02_05_02.csv', thousands=',', na_values=['?','-'])
df3.dtypes
df3

### [예제3] isna(), notna() 메서드 이해

In [None]:
# [r1, r2, r3]

df = pd.read_csv('data/02_05_02.csv', thousands=',', na_values=['?','-'])

r1 = df.isna()
r2 = df.isnull()
r3 = df['B'].isna()

disp('df', 'r1', 'r2'); r3

In [None]:
# [r4, r5, r6]

df = pd.read_csv('data/02_05_02.csv', thousands=',', na_values=['?','-'])

r4 = df.notna()
r5 = df.notnull()
r6 = df['B'].notna()

disp('df', 'r4', 'r5'); r6

### [예제4] dropna() 메서드 이해

In [None]:
# [r1]

df = pd.read_csv('data/02_05_01.csv', index_col=0)

r1 = df.dropna()

disp('df', 'r1')

In [None]:
# [r2, r3]

df = pd.read_csv('data/02_05_01.csv', index_col=0)

r2 = df.dropna(subset=['B','C'], how='all')
r3 = df.dropna(subset=['B','C','D'], thresh=2)

disp('df', 'r2', 'r3')

In [None]:
# [r4, r5]

df = pd.read_csv('data/02_05_01.csv', index_col=0)

r4 = df.dropna(axis=1, how='all', thresh=3)
r5 = df['B'].dropna()

disp('df', 'r4'); r5

### [예제5] fillna() 메서드 이해

In [None]:
# [r1]

df = pd.read_csv('data/02_05_01.csv', index_col=0)
r1 = df.fillna(0)

disp('df', 'r1')

In [None]:
# [r2]

df = pd.read_csv('data/02_05_01.csv', index_col=0)
r2 = df.fillna(method='ffill')

disp('df', 'r2')

In [None]:
# [r3]

df = pd.read_csv('data/02_05_01.csv', index_col=0)
r3 = df.fillna(method='bfill', axis=1, limit=1)

disp('df', 'r3')

In [None]:
# [r4]

df = pd.read_csv('data/02_05_01.csv', index_col=0)
r4 = df.fillna({'B':-1, 'C':-2})

disp('df', 'r4')

In [None]:
# [r5]

df = pd.read_csv('data/02_05_01.csv', index_col=0)
print(df.min())
r5 = df.fillna(df.min())

disp('df', 'r5')

In [None]:
# [r6]

df = pd.read_csv('data/02_05_01.csv', index_col=0)
df1 = pd.DataFrame(np.arange(10,19).reshape(3,3), columns=list('ABC'))
r6 = df.fillna(df1) 

disp('df', 'df1', 'r6')

### [예제6] replace() 메서드 이해

In [None]:
# [r1 ~ r5]

df = pd.read_csv('data/02_05_03.csv', index_col=0)
r1 = df.replace() 
r2 = df.replace(np.nan)
r3 = df.replace(np.nan, 0)
r4 = df.replace(to_replace=3.0)
r5 = df.replace(3.0, 300)

disp('df', 'r1', 'r2', 'r3', 'r4', 'r5')

In [None]:
# [r6 ~ r8]

df = pd.read_csv('data/02_05_03.csv', index_col=0)
r6 = df.replace([1,3])
r7 = df.replace([1,3], -1) 
r8 = df.replace([1,3], [-1, -2])

disp('df', 'r6', 'r7', 'r8')

In [None]:
# [r9, r10]

df = pd.read_csv('data/02_05_03.csv', index_col=0)
r9 = df.replace({1:-1, 2:-2, 3:-3})
r10 = df.replace({'B': {1:-1, 3:-3}})

disp('df', 'r9', 'r10')

In [None]:
# [r11, r12]

df = pd.read_csv('data/02_05_03.csv', index_col=0)
r11 = df.replace({'B':[1,3], 'C':[2,5]}, -1) 
r12 = df.replace({'B':3, 'C':1}, {'B':-2, 'C':-3}) 

disp('df', 'r11', 'r12')

### [예제7] replace()의 정규식 이용

In [None]:
df = pd.DataFrame({'A':['one','th r ee'], 'B':['two','four']})
df

r1 = df.replace(to_replace='.+', value='***', regex=True)
r1

In [None]:
df = pd.DataFrame({'A':['one','three'], 'B':['two','four']})
df

r2 = df.replace(regex='^th', value = 'f')
r2