In [None]:
## 什么是Group？

In [4]:
from clumper import Clumper

list_dicts = [
    {'a': 6, 'grp': 'a'},
    {'a': 2, 'grp': 'b'},
    {'a': 7, 'grp': 'a'},
    {'a': 9, 'grp': 'b'},
    {'a': 5, 'grp': 'a'}
]

(Clumper(list_dicts)
  .group_by('grp')
)

<Clumper groups=('grp',) len=5 @0x103cb0290>

当前的group以grp作为关键词
![](img/add_grp.png)

现在经过 ``.group_by('grp')``操作后，说明你对每个grp组感兴趣。具体一点，一个组是``{'grp': 'a'}``, 另一个组是``{'grp': 'b'}.``

## Agg
### without groups
![](img/agg-without-group.png)

In [3]:
from clumper import Clumper

list_dicts = [
    {'a': 6, 'grp': 'a'},
    {'a': 2, 'grp': 'b'},
    {'a': 7, 'grp': 'a'},
    {'a': 9, 'grp': 'b'},
    {'a': 5, 'grp': 'a'}
]

(Clumper(list_dicts)
  .agg(s=('a', 'sum'),
       m=('a', 'mean'))
  .collect())

[{'s': 29, 'm': 5.8}]

### with groups
分别计算组grp=a、组grp=b的sum和mean
![](img/agg-with-group.png)

In [6]:
from clumper import Clumper

list_dicts = [
    {'a': 6, 'grp': 'a'},
    {'a': 2, 'grp': 'b'},
    {'a': 7, 'grp': 'a'},
    {'a': 9, 'grp': 'b'},
    {'a': 5, 'grp': 'a'}
]

(Clumper(list_dicts)
  .group_by('grp')
  .agg(s=('a', 'sum'),
       m=('a', 'mean'))
  .collect())

[{'grp': 'a', 's': 18, 'm': 6}, {'grp': 'b', 's': 11, 'm': 5.5}]

### agg内置的统计函数名
内置的统计函数，可直接通过字符串调用

```python
{
  "mean": mean,
  "count": lambda d: len(d),
  "unique": lambda d: list(set(d)),
  "n_unique": lambda d: len(set(d)),
  "sum": sum,
  "min": min,
  "max": max,
  "median": median,
  "var": variance,
  "std": stdev,
  "values": lambda d: d,
  "first": lambda d: d[0],
  "last": lambda d: d[-1],
}
```

## Transform
``.transform()``与``.agg()``类似。主要的区别是transform处理过程中，记录数和字段数不会出现压缩。 

### without groups
![](img/transform-without-groups.png)

In [10]:
from clumper import Clumper

data = [
    {"a": 6, "grp": "a"},
    {"a": 2, "grp": "b"},
    {"a": 7, "grp": "a"},
    {"a": 9, "grp": "b"},
    {"a": 5, "grp": "a"}
]

(Clumper(data)
 .transform(s=("a", "sum"),
            u=("a", "unique"))
 .collect()
)

[{'a': 6, 'grp': 'a', 's': 29, 'u': [2, 5, 6, 7, 9]},
 {'a': 2, 'grp': 'b', 's': 29, 'u': [2, 5, 6, 7, 9]},
 {'a': 7, 'grp': 'a', 's': 29, 'u': [2, 5, 6, 7, 9]},
 {'a': 9, 'grp': 'b', 's': 29, 'u': [2, 5, 6, 7, 9]},
 {'a': 5, 'grp': 'a', 's': 29, 'u': [2, 5, 6, 7, 9]}]

### with groups
![](img/transform-with-groups.png)

In [11]:
from clumper import Clumper

data = [
    {"a": 6, "grp": "a"},
    {"a": 2, "grp": "b"},
    {"a": 7, "grp": "a"},
    {"a": 9, "grp": "b"},
    {"a": 5, "grp": "a"}
]

(Clumper(data)
 .group_by("grp")
 .transform(s=("a", "sum"),
            u=("a", "unique"))
 .collect()
)

[{'a': 6, 'grp': 'a', 's': 18, 'u': [5, 6, 7]},
 {'a': 7, 'grp': 'a', 's': 18, 'u': [5, 6, 7]},
 {'a': 5, 'grp': 'a', 's': 18, 'u': [5, 6, 7]},
 {'a': 2, 'grp': 'b', 's': 11, 'u': [9, 2]},
 {'a': 9, 'grp': 'b', 's': 11, 'u': [9, 2]}]

## Mutate
clumper库中的``row_number``可以给每条记录显示索引位置（第几个）。

### without groups
![](img/mutate-without-group.png)

In [13]:
from clumper import Clumper
from clumper.sequence import row_number

list_dicts = [
    {'a': 6, 'grp': 'a'},
    {'a': 2, 'grp': 'b'},
    {'a': 7, 'grp': 'a'},
    {'a': 4, 'grp': 'b'},
    {'a': 5, 'grp': 'a'}
]

(Clumper(list_dicts)
  .mutate(index=row_number())
  .collect()
)

[{'a': 6, 'grp': 'a', 'index': 1},
 {'a': 2, 'grp': 'b', 'index': 2},
 {'a': 7, 'grp': 'a', 'index': 3},
 {'a': 4, 'grp': 'b', 'index': 4},
 {'a': 5, 'grp': 'a', 'index': 5}]

### with groups
![](img/mutate-with-group.png)

In [14]:
from clumper import Clumper
from clumper.sequence import row_number

list_dicts = [
    {'a': 6, 'grp': 'a'},
    {'a': 2, 'grp': 'b'},
    {'a': 7, 'grp': 'a'},
    {'a': 4, 'grp': 'b'},
    {'a': 5, 'grp': 'a'}
]

(Clumper(list_dicts)
  .group_by('grp')
  .mutate(index=row_number())
  .collect()
)

[{'a': 6, 'grp': 'a', 'index': 1},
 {'a': 7, 'grp': 'a', 'index': 2},
 {'a': 5, 'grp': 'a', 'index': 3},
 {'a': 2, 'grp': 'b', 'index': 1},
 {'a': 4, 'grp': 'b', 'index': 2}]

## Sort
排序, 默认升序
### without groups
![](img/sort-no-group.png)

In [15]:
from clumper import Clumper

list_dicts = [
    {'a': 6, 'grp': 'a'},
    {'a': 2, 'grp': 'b'},
    {'a': 7, 'grp': 'a'},
    {'a': 9, 'grp': 'b'},
    {'a': 5, 'grp': 'a'}
]

(Clumper(list_dicts)
 #根据字段a进行排序
  .sort(key=lambda d: d['a'])
  .collect()
)

[{'a': 2, 'grp': 'b'},
 {'a': 5, 'grp': 'a'},
 {'a': 6, 'grp': 'a'},
 {'a': 7, 'grp': 'a'},
 {'a': 9, 'grp': 'b'}]

### with groups
![](img/sort-with-group.png)

In [16]:
from clumper import Clumper

list_dicts = [
    {'a': 6, 'grp': 'a'},
    {'a': 2, 'grp': 'b'},
    {'a': 7, 'grp': 'a'},
    {'a': 9, 'grp': 'b'},
    {'a': 5, 'grp': 'a'}
]

(Clumper(list_dicts)
  .group_by('grp')
  .sort(key=lambda d: d['a'])
  .collect()
)

[{'a': 5, 'grp': 'a'},
 {'a': 6, 'grp': 'a'},
 {'a': 7, 'grp': 'a'},
 {'a': 2, 'grp': 'b'},
 {'a': 9, 'grp': 'b'}]

## Ungroup
最后，如果你已经进行完了分组计算，想再次整合起来，取消分组状态，可以使用``.ungroup()``