In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

class disp(object):
    template = '<div style="float: left;padding:10px;"> <b>[{0}]</b> {1}</div>'
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)

import pandas as pd  
import numpy as np

### [예제1] query() 메서드 이해

In [None]:
df = pd.DataFrame({'name':['kim','lee','park','song'], 'age':[20, 40, 35, 25]})

In [None]:
# [1]

df1 = df[df['age'] >= 20 and df['age'] <= 30] #error
df2 = df[20 <= df['age'] <= 30] #error
df3 = df[(df['age'] >= 20) & (df['age'] <= 30)]
    
disp('df', 'df3')

In [None]:
# [2]

df4 = df.query('age >= 20 & age <= 30') 
df5 = df.query('age >= 20 and age <= 30')
df6 = df.query('20 <= age <= 30')
disp('df4', 'df5', 'df6')

In [None]:
# [3]

df7 = df[df['name'] in ['kim', 'park']] #error
df8 = df[(df['name'] == 'kim')|(df['name'] == 'park')]
df9 = df[df['name'].isin(['kim', 'park'])]
disp('df8', 'df9')

In [None]:
# [4]

df10 = df.query('name in ["kim", "park"]')
df11 = df.query('name == ["kim", "park"]')
df12 = df.query('name.isin(["kim", "park"])', engine='python')
disp('df10', 'df11', 'df12')

### [예제2] query() 메서드 이해2

In [None]:
df = pd.DataFrame({'names':['kim','lee','park','song'], 'class':list('ABAC'),
                   'exam':[80,50,90,60]}).set_index('names')

In [None]:
# [1]

df1 = df[df['names']=='kim'] #error
df2 = df.query('names=="kim"')
df3 = df.query('index=="kim"')
    
disp('df', 'df2', 'df3')

In [None]:
# [2]

x, y = df['class'], 'A'
df4 = df.query('class == "A"') #error
df5 = df.query('`class` == "A"')
df6 = df.query('@x == @y')

disp('df5', 'df6')

In [None]:
# [3]

x = df['exam'].mean()
y = ['A', 'C']
df7 = df.query('exam <= @x')
v1 = df.query('`class` in @y ')['exam'].max()
disp('df','df7')
v1

### [예제3] filter() 메서드 이해

In [None]:
df = pd.DataFrame({'name':['kim','lee','park','ki'], 
                   'kor':[80,50,90,60], 'eng':[70,100,80,50]}).set_index('name')

In [None]:
# [1]

df1 = df.filter(items = ['kor'])
df2 = df.filter(like = 'k')
disp('df', 'df1', 'df2')

In [None]:
# [2]

df3 = df.filter(items = ['kim', 'ki'], axis=0)
df4 = df.filter(regex = '^k', axis=0) 
df5 = df.loc[df.index.isin(['kim', 'ki'])]
df6 = df[ df.index.str.startswith('k') ]

disp('df', 'df3', 'df4', 'df5', 'df6')

In [None]:
# [3]

df6 = df.filter(regex='a$', like='K', axis=0) #error    

### [예제4] sr.apply() 이해

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'A': [2, 5, 9], 'B':[10.5, 20.5, 15.5]})
df

In [None]:
# [1]

r1 = df['B'].round()
r2 = df['B'].apply(round)
r3 = df['B'].apply('round')
r4 = df['B'].apply(np.round)
r5 = df['A'].apply(np.square)

r1; r2; r3; r4; r5

In [None]:
# [2]

r6 = df['A'].sum()
r7 = df['A'].apply('sum')
r8 = df['A'].apply(sum) #error

r6; r7

In [None]:
# [3]

def func(x):
    print(type(x), x)
    return x*x

r9 = df['A'].apply(func)
r9

### [예제5] sr.apply() 메서드 이해 2

In [None]:
df = pd.DataFrame({'model':['TV','PC','HP','AD'],
                  'price':[400,200,100,300]})

In [None]:
# [1]

def func(t, x, y, z):
    return t * x + y - z

sr1 = df['price'].apply(func, x=0.8, y=100, z=10)
sr2 = df['price'].apply(func, args=(0.8, 100, 10))
sr3 = df['price'].apply(func, args=[0.8, 100], z=10)
sr4 = df['price'].apply(func, args=(0.8,), y=100, z=10)

df; sr1; sr2; sr3; sr4

In [None]:
# [2]

df['sale'] = df['price'].apply(lambda x:x-50 if x<300 else x-100)
df

### [예제6] sr.apply() 활용 예

In [None]:
df = pd.DataFrame({'model':['TV','COMPUTER','PHONE','AUDIO'],
                  'price':[4000000,2000000,1000000,3000000]})
df

In [None]:
# [1]

li1 = [ x if len(x)<=4 else (x[:3]+'~') for x in df['model']]
sr1 = df['model'].apply(lambda x: x if len(x)<=4 else (x[:3]+'~'))

li1; sr1

In [None]:
# [2]

code = {'TV':'TV','COMPUTER':'PC','PHONE':'HP','AUDIO':'AD'}
li2 = list(map(code.get, df['model']))
sr2 = df['model'].apply(code.get)

li2; sr2

In [None]:
# [3]

df['price1'] = list(map("{:,}원".format, df['price']))
df['price2'] = df['price'].apply('{:,}원'.format)
df

### [예제7] df.apply() 이해1

In [None]:
df = pd.DataFrame({'A': [1, 2, 3], 'B':[4, 5, 6]})

def f1(x):
    print(x)
    return x*x

r1 = df.apply(f1)  
r1

### [예제8] df.apply() 이해2

In [None]:
df = pd.DataFrame({'A': [1, 2, 3], 'B':[4, 5, 6], 'C':[7, 8, 9]})

In [None]:
# [1]

r1 = df.apply(np.sum)
r2 = df.apply(np.square)

print(r1)
disp('df', 'r2')

In [None]:
# [2]

r3 = df.apply(np.sum, axis=1)
r3

In [None]:
# [3]

def func(x):
    return [10, 20, 30]

r4 = df.apply(func)
disp('df', 'r4')

In [None]:
# [4]

r5 = df.apply(lambda x: x['A']*x['B']+x['C'], axis=1)
df; r5

### [예제9] df.apply(): result_type 옵션

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'A': [1, 2, 3], 'B':[4, 5, 6], 'C':[7, 8, 9]}, index=[1,2,3]) 
df

In [None]:
# [1] 

r1 = df.apply(np.sum, axis=1)
r2 = df.apply(np.sum, axis=1, result_type='reduce')
r3 = df.apply(np.sum, axis=1, result_type='expand')
r4 = df.apply(np.sum, axis=1, result_type='broadcast')

r1; r2; r3; r4

In [None]:
# [2]

r5 = df.apply(np.unique, axis=1)
r6 = df.apply(np.unique, axis=1, result_type='reduce')
r7 = df.apply(np.unique, axis=1, result_type='expand')
r8 = df.apply(np.unique, axis=1, result_type='broadcast')

r5; r6; r7; r8

### [예제10] df.apply(): result_type=‘expand’ 옵션

In [None]:
df = pd.DataFrame({'A': [1, 2, 3], 'B':[4, 5, 6], 'C':[7, 8, 9]}, index=[1,2,3]) 
df

In [None]:
# [1]

r1 = df.apply(lambda x : 0, axis=1, result_type='expand')
r2 = df.apply(lambda x : [1,2], axis=1, result_type='expand')
r3 = df.apply(lambda x : [1,2,3,4], axis=1, result_type='expand')

r1; r2; r3

In [None]:
# [2]

def f(x):
    return [ sum(x), sum(x)/len(x) ]

df[['sum', 'avg']] = df.apply(f, axis=1, result_type='expand')

df

### [예제11] applymap()의 이해

In [None]:
df = pd.DataFrame({'A': [1, 2, 3], 'B':[4, 5, 6], 'C':[7, 8, 9]}, index=[1,2,3]) 
df

r1 = df.applymap(float)
r2 = df.applymap(lambda x : 'Odd' if x%2  else 'Even')

r1; r2

### [예제12] apply() 활용예

In [None]:
df = pd.DataFrame({'model':['TV', 'PC', 'HP', 'AD'],
                  'price':[200,300,150,450]})
df

In [None]:
# [1]

def func1(x):
    if x>300: return 'high'
    elif 200<=x<=300: return 'mid'
    else: return 'low'

df['class'] = df['price'].apply(func1)
df

In [None]:
# [2]

dic = {'HP':0.1, 'TV':0.2, 'PC':0.3, 'AD':0.4}

def func2(x):
    val = x['price'] * dic.get(x['model'])
    return [ x['price'] - val, val ]

df[['sale','dis']] = df.apply(func2,axis=1,result_type='expand')
df

In [None]:
# [3] 

def func3(x):
    return '{:,}원'.format(x*10000)

df['price2'] = df['price'].apply(func3)
df

### [예제13] map() 메서드의 이해

In [None]:
df = pd.DataFrame({'model':['TV', 'PC', 'TV', 'PC'],
                  'brand':['A', 'A', 'B', 'C'],
                    'price':[200,300,150,450]})
df

code = {'A':'AAA', 'B':'BBB', 'C':'CCC'}
r1 = df['price'].map(lambda x:'high' if x>=300 else'low')
r2 = df['price'].map(float)
r3 = df['brand'].map(pd.Series(code))
r4 = df['brand'].map(code)

r1; r2; r3; r4

### [예제14] 다양한 mapping 방식 비교

In [None]:
df = pd.DataFrame({'model':['TV', 'PC', 'TV', 'PC'],
                  'brand':['A', 'A', 'B', 'C'],
                    'price':[200,300,150,450]})
df

In [None]:
# [1]

code = {'A':'AA', 'B':'BB'}
r1 = df['brand'].map(code)
r2 = df['brand'].replace(code)
r3 = df['brand'].apply(code.get)

r1; r2; r3

In [None]:
# [2]

df.loc[0, 'price'] = np.nan
r4 = df['price'].map('{:,}만원'.format)
r5 = df['price'].map('{:,}만원'.format, na_action='ignore')

r4; r5

### [예제15] where(), mask() 메서드의 이해

In [None]:
df = pd.DataFrame({'model':['TV', 'PC', 'HP', 'AD'],
                    'price':[200,300,150,450]})
df

In [None]:
# [1]

r1 = df['price'].where(df['price'] >= 300)
r2 = df['price'].where(df['price'] >= 300, 'low')
r3 = df['price'].mask(df['price'] >= 300, 'high')
r4 = np.where(df['price']>=300, 'high', 'low')

r1; r2; r3; r4

In [None]:
# [2]

f1 = lambda x : x['price'] >=200
f2 = lambda x : pd.Series(['-','-','-','-'])
r5 = df.where(f1, '-', axis=0)
r6 = df.where(f1, f2, axis=0)
r4; r5