# Group By

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import pandas as pd
import numpy as np
import xarray as xr

def show_component(cmp):
    s = '\n{}'.format(cmp)
    return s.replace('\n', '\n\t')

def show_df(df):
    from IPython.display import display, HTML
    html = '<div style="margin-left:55px">{}</div>'.format(df.to_html())
    display(HTML(html))

def show_dict(d):
    s = []
    bs = 1
    for k, v in d.items():
        s.append('{}{}: {}'.format(' ' * bs, k, v))
        bs = 2
    return ''.join(['{', ',\n'.join(s), ' }'])

In [None]:
rows = 6

df = pd.DataFrame({
    'A': np.random.randint(low=1, high=4, size=rows),
    'B': pd.date_range('20190101', periods=rows),
    'C': pd.Series(np.arange(0.1, 0.7, step=0.1), index=[chr(0x61 + n) for n in range(rows)], dtype='float32'),
    'D': np.array([3] * rows, dtype='int32'),
    'E': pd.Categorical(np.tile(['test', 'train'], reps=rows//2)),
    'F': 'foo'
})

print('* when data frame “df” is:')
show_df(df)

## 按列分组

### 按单列进行分组

In [None]:
groupby = df.groupby('E')
print('* group by column "E", the result is: {}'.format(show_component(groupby)))

groups = groupby.groups
print('\n  then the groups is: {}'.format(show_component(show_dict(groups))))

### 按多列同时分组

In [None]:
groupby = df.groupby(['E', 'A'])
print('* group by column "E" and "A":')

groups = groupby.groups
print('\n  then the groups is: {}'.format(show_component(show_dict(groups))))

## 获取分组数据

### 获取各分组行索引

In [None]:
groups = df.groupby('E').groups
print('* when group by column "E"')

data = groups['test']
print('\n  then the row index of group "test" is: {}'.format(show_component(data)))

data = groups['train']
print('\n  and the row index group "train" is: {}'.format(show_component(data)))

### 获取各分组数据

In [None]:
groupby = df.groupby('E')
print('* when group by column "E"')

grouped_df = groupby.get_group('test')
print('\n  then group "test" is:')
show_df(grouped_df)

grouped_df = groupby.get_group('train')
print('  and group "train" is:')
show_df(grouped_df)

### 遍历分组

In [None]:
groupby = df.groupby(['E', 'A'])
print('* group by column "E" and "A":')

print('  then the data of each groups are:')
for group in groupby:
    print('{}- "{}"'.format(' ' * 7, group[0]))
    show_df(group[1])