In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# html을 이용하여 가로방향으로 여러 DF을 인쇄
class disp(object):
    template = '<div style="float: left;padding:10px;"> <b>[{0}]</b> {1}</div>'
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)

import pandas as pd   
import numpy as np
import seaborn as sns
import pprint as pp

### [예제1] GroupBy 객체의 이해

In [None]:
df = pd.DataFrame({'grade':[1,1,1,2,2], 'class':list('ABCAB'), 
                   'exam':[60, 90, 50, 100, 80]})
df

In [None]:
# [1]

gb = df.groupby('grade')
print(gb)

In [None]:
# [2] 

r1 = gb.groups  
r2 = gb.get_group(2)
r1; disp('df', 'r2')

In [None]:
# [3]

r3 = gb.mean()
disp('df', 'r3')

### [예제2] GroupBy 객체 이해 2

In [None]:
df = pd.DataFrame({'grade':[1,1,1,2,2], 'class':list('ABABB'), 
                   'name':['kim','lee','park','song','lew']})
df

In [None]:
# [1]

gb1 = df.groupby('grade')  
gsr = gb1['name']
gdf = gb1[['class','name']]
print(gb1); print(gsr); print(gdf)

In [None]:
# [2]

r1 = gsr.groups
r2 = gdf.groups
r3 = gsr.get_group(2)
r4 = gdf.get_group(2)

r1; r2; r3; r4

In [None]:
# [3]

gb2 = df.groupby(['grade', 'class'])
r5 = gb2.groups
r6 = gb2.get_group((1,'A'))
r5; r6

In [None]:
# [4]

gb2 = df.groupby(['grade', 'class'])
r7 = gb2.count()
disp('df', 'r7')

### [예제3] 그룹화 방식 및 groupby() 옵션 이해

In [None]:
df = pd.DataFrame({'grade':[1,1,1,2,2], 'class':list('ABABB'),
                  'kor':[100,80,60,90,80], 'eng':[50,100,60,80,70],
                  'math':[60,50,40,100,50]})
df

In [None]:
# [1]
gsr1 = df.groupby('grade')['kor']
gsr2 = df.groupby('grade')[['kor','eng', 'math']]
r1, r2 = gsr1.groups, gsr2.groups
r1;r2

In [None]:
# [2]

gsr3 = df['kor'].groupby('grade') # error
gsr4 = df['kor'].groupby(df['grade'])
gsr5 = df.loc[:, 'kor':'math'].groupby(df['grade'])
r4, r5 = gsr4.groups, gsr5.groups
r4; r5

In [None]:
# [3]

r6 = df.groupby('grade', as_index=False)['kor'].max()
r7 = df.groupby('grade')['kor'].max().reset_index()
r8 = df.groupby('grade', sort=False)['kor'].max()

disp('df', 'r6', 'r7'); r8

### [예제4] 함수에 의한 그룹화

In [None]:
df1 = pd.DataFrame({'month':[1, 3, 5, 9, 12],'model':['TV','PC','TV','PC','HP'],
                   'sale':[10, 30, 50, 40, 70], 'stock':[100,100,200,200,300]
                  }).set_index('month')
df2 = df1.drop(columns='model').T
df1; df2

In [None]:
# [1-1]

gb1 = df1.groupby(lambda x:x<7)
gb2 = df1.groupby(lambda x: '1~6' if x<7 else '7~12')
r1 = gb1.groups
r2 = gb2.groups
r1; r2

In [None]:
# [1-2]

gb1 = df1.groupby(lambda x:x<7)
gb2 = df1.groupby(lambda x: '1~6' if x<7 else '7~12')
r3 = gb1.sum()
r4 = gb2.sum()
disp('df1', 'r3', 'r4')

In [None]:
# [2]

r5 = df2.groupby(lambda x: '1~6' if x<7 else '7~12', axis=1).sum()
disp('df2', 'r5')

### [예제5] Splitting: dict, Series

In [None]:
df = pd.DataFrame({'month':[1, 3, 5, 9, 12],'model':['TV','PC','TV','PC','HP'],
                   'sale':[10, 30, 50, 40, 70], 'stock':[100,100,200,200,300]
                  }).set_index('month')
df

In [None]:
# [1]

dic = {1:'G1', 9:'G1', 3:'G2', 12:'G2'}
sr = pd.Series(dic)
r1 = df.groupby(dic).groups
r2 = df.groupby(sr).groups
r1; r2

In [None]:
# [2]

l = ['G1', 'G1', 'G2', 'G2', 'G3']
arr = np.where(df['sale']>=50, 'high', 'low')
r3 = df.groupby(l).groups
r4 = df.groupby(arr).groups
arr; r3; r4

In [None]:
# [3]

l = ['G1', 'G1', 'G2', 'G2', 'G3']
arr = np.where(df['sale']>=50, 'high', 'low')
r5 = df.groupby(l)[['sale']].sum()
r6 = df.groupby(arr)['sale'].count()

disp('df', 'r5'); r6

### [예제6]  MultiIndex 그룹화

In [None]:
df = pd.DataFrame({'month':[3, 3, 3, 9, 9],'brand':list('ABBAB'),
                   'model':['TV','PC','TV','PC','PC'],
                   'sale':[10, 30, 50, 40, 70]
                  }).set_index(['month','brand'])
df

In [None]:
# [1]

r1 = df.groupby(level=1).groups
r2 = df.groupby(level='brand').groups
r3 = df.groupby(level=[0, 1]).groups
r1;r2; pp.pprint(r3)

In [None]:
# [2]

r4 = df.groupby(['month', 'model']).groups
r5 = df.groupby([0, 'model']).groups #error
r6 = df.groupby([pd.Grouper(level=0),'model']).groups
pp.pprint(r4); pp.pprint(r6)