In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

class disp(object):
    template = '<div style="float: left;padding:10px;"> <b>[{0}]</b> {1}</div>'
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)

import pandas as pd
import numpy as np

### [예제1] MultiIndex 형식의 DataFrame 구조

In [None]:
col = [['A','A','B','B'], ['eng','kor','eng','kor']]
data = [[10,20,30,40],[50,60,70,80]]
df = pd.DataFrame(data, columns=col, index=[1,2])
df

r1 = df['eng'] # error
r2 = df[('A', 'eng')]
r3 = df['A']

r2; r3
print(df.columns)   

### [예제2] MultiIndex 객체 생성 : MultiIndex() 함수

In [None]:
idx1 = pd.MultiIndex(levels=[['A','B'],['X','Y']],
       codes=[[0,0,1,1], [0,1,0,1]], names=['L1', 'L2'] )

arr1 = [['A','A','B','B'],['X','Y','X','Y']]
idx2 = pd.MultiIndex.from_arrays(arr1, names=['L1','L2'])

arr2 = [('A','X'),('A','Y'),('B','X'),('B','Y')]
idx3 = pd.MultiIndex.from_tuples(arr2, names=['L1','L2'])

idx1; idx2; idx3

### [예제3] xs() 메서드 : MultiIndex 객체 인덱싱

In [None]:
code = [[0,0,1,1], [0,1,0,1]]
col = pd.MultiIndex(levels=[['A','B'], ['X','Y']],codes=code, names=['C1', 'C2'])
idx = pd.MultiIndex(levels=[['i','j'], ['s','t']],codes=code, names=['I1', 'I2'])
df = pd.DataFrame(np.arange(16).reshape(4,4), columns=col, index=idx)

r1 = df.xs('s', level=1)
r2 = df.xs('s', level='I2')
r3 = df.xs('X', level=1, axis=1)

disp('df','r1','r2','r3')

### [예제4] MultiIndex DF의 연산

In [None]:
# [1] 
idx = pd.MultiIndex.from_arrays([[1,1,2,2], list('ABAB')], names=['g', 'c'])
df = pd.DataFrame({'kor':[100,80,50,90], 'eng':[60,50,100,80]}, index=idx)

r1 = df.mean()
r2 = df.mean(axis=1)
df; r1; r2

In [None]:
# [2-1]

idx = pd.MultiIndex.from_arrays([[1,1,2,2], list('ABAB')], names=['g', 'c'])
df = pd.DataFrame({'kor':[100,80,50,90], 'eng':[60,50,100,80]}, index=idx)
adf = df.groupby('g').mean()

r3 = df - adf
disp('df', 'adf', 'r3')

In [None]:
# [2-2]

idx = pd.MultiIndex.from_arrays([[1,1,2,2], list('ABAB')], names=['g', 'c'])
df = pd.DataFrame({'kor':[100,80,50,90], 'eng':[60,50,100,80]}, index=idx)
bdf = df.groupby('c').mean()

r4 = df - bdf
disp('df', 'bdf', 'r4')

### [예제5] stack(), unstack() 메서드

In [None]:
col = pd.MultiIndex.from_arrays([['A','A','B','B'], ['X','Y','X','Y']])
df = pd.DataFrame([[10,20,30,40],[50,60,70,80]], columns=col)

r1 = df.stack()
r2 = r1.unstack()

disp('df', 'r1', 'r2')

### [예제6] stack(), unstack() 메서드의 이해

In [None]:
# [1-1] 

l = [['A','A','B','B'], ['eng','kor','eng','kor']]
col = pd.MultiIndex.from_arrays(l, names=['class','subject'])
idx = pd.Index([1,2], name='grade')
df = pd.DataFrame([[10,20,30,40],[np.nan,60,np.nan,80]], columns=col, index=idx)

r1 = df.stack('subject')
r2 = df.stack(level=1)
r3 = df.stack(dropna=False)
disp('df', 'r1'); disp('r2', 'r3')

In [None]:
# [1-2] 

l = [['A','A','B','B'], ['eng','kor','eng','kor']]
col = pd.MultiIndex.from_arrays(l, names=['class','subject'])
idx = pd.Index([1,2], name='grade')
df = pd.DataFrame([[10,20,30,40],[np.nan,60,np.nan,80]], columns=col, index=idx)

r4 = df.stack(level=[0,1])
r5 = r4.unstack()
r6 = r4.unstack(fill_value=0)
r4
disp('r5', 'r6')

### [예제7] pivot() 함수

In [None]:
df = pd.DataFrame({'A':list('ijij'), 'B':list('xxyy'), 'C':[10, 20, 30, 40] })

r1 = df.pivot(index='A',columns='B',values='C')
r2 = df.pivot(index='A',columns='B')

disp('df', 'r1', 'r2')

### [예제8] melt() 함수

In [None]:
df = pd.DataFrame({'A':list('ij'), 'x':[10,20], 'y':[30,40] })

r=df.melt(id_vars='A',value_vars=['x','y'],var_name='B',value_name='C')

disp('df', 'r')

### [예제9] melt() 함수의 이해

In [None]:
# [1]

d = {'class':['A','B'], 'kor':[90,60],'eng':[100,70]}
df1 = pd.DataFrame(d)

r1 = df1.melt(id_vars='class',var_name='subject',value_name='score')
r2 = df1.melt(id_vars='class', value_vars=['kor'])
r3 = df1.melt(value_vars=['kor','eng'], var_name='subject',value_name='score')

disp('df1')
disp('r1', 'r2', 'r3')

In [None]:
# [2]

col = [['class','mid','mid','fin'],['','kor','eng','kor']]
df2 = pd.DataFrame([['A',50,100,90],['B',70,30,80]], columns=col)

r4 = df2.melt(id_vars=[('class','')], value_vars=[('mid','kor'),('fin','kor')],
             var_name=['exam','subject'],value_name='score')

r5 = df2.melt(id_vars='class', col_level=0, var_name='exam',value_name='score')
disp('df2', 'r4', 'r5')