In [1]:
import numpy as np
import pandas as pd

# Advanced GroupBy Use

## 1. Group Transforms and “Unwrapped” GroupBys

In [2]:
dataframe = pd.DataFrame({'key': ['a', 'b', 'c'] * 4,
                         'value': np.arange(12.)})

In [3]:
dataframe

Unnamed: 0,key,value
0,a,0.0
1,b,1.0
2,c,2.0
3,a,3.0
4,b,4.0
5,c,5.0
6,a,6.0
7,b,7.0
8,c,8.0
9,a,9.0


In [4]:
grouped = dataframe.groupby('key')
grouped.mean()

Unnamed: 0_level_0,value
key,Unnamed: 1_level_1
a,4.5
b,5.5
c,6.5


In [5]:
#the same shape output as input but with meaned by groups
grouped.transform(lambda x: x.mean())

Unnamed: 0,value
0,4.5
1,5.5
2,6.5
3,4.5
4,5.5
5,6.5
6,4.5
7,5.5
8,6.5
9,4.5


In [6]:
#alternatively
grouped.transform('mean')

Unnamed: 0,value
0,4.5
1,5.5
2,6.5
3,4.5
4,5.5
5,6.5
6,4.5
7,5.5
8,6.5
9,4.5


In [7]:
grouped.transform(lambda x: x ** 2)

Unnamed: 0,value
0,0.0
1,1.0
2,4.0
3,9.0
4,16.0
5,25.0
6,36.0
7,49.0
8,64.0
9,81.0


In [8]:
grouped.transform(lambda x: x.rank(ascending=False))

Unnamed: 0,value
0,4.0
1,4.0
2,4.0
3,3.0
4,3.0
5,3.0
6,2.0
7,2.0
8,2.0
9,1.0


In [9]:
grouped.transform('mean')

Unnamed: 0,value
0,4.5
1,5.5
2,6.5
3,4.5
4,5.5
5,6.5
6,4.5
7,5.5
8,6.5
9,4.5


## 2. Grouped Time Resampling

In [10]:
n = 15

times = pd.date_range('2017-05-20 00:00', freq='1min', periods=n)
frame = pd.DataFrame({'times': times,
                      'value': np.arange(n)})
frame

Unnamed: 0,times,value
0,2017-05-20 00:00:00,0
1,2017-05-20 00:01:00,1
2,2017-05-20 00:02:00,2
3,2017-05-20 00:03:00,3
4,2017-05-20 00:04:00,4
5,2017-05-20 00:05:00,5
6,2017-05-20 00:06:00,6
7,2017-05-20 00:07:00,7
8,2017-05-20 00:08:00,8
9,2017-05-20 00:09:00,9


In [11]:
frame.set_index('times').resample('5min').count()

Unnamed: 0_level_0,value
times,Unnamed: 1_level_1
2017-05-20 00:00:00,5
2017-05-20 00:05:00,5
2017-05-20 00:10:00,5


In [12]:
frame_2 = pd.DataFrame({'times': times.repeat(3),
                       'key': np.tile(['A', 'B', 'C'], n),
                       'value': np.arange(n*3.)})

In [13]:
frame_2[:7]

Unnamed: 0,times,key,value
0,2017-05-20 00:00:00,A,0.0
1,2017-05-20 00:00:00,B,1.0
2,2017-05-20 00:00:00,C,2.0
3,2017-05-20 00:01:00,A,3.0
4,2017-05-20 00:01:00,B,4.0
5,2017-05-20 00:01:00,C,5.0
6,2017-05-20 00:02:00,A,6.0
