# Iterating by group

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({'k1': ['a','b','a','a','b'], 
                   'k2': 'one two one two one'.split(),
                   'data1': np.random.randn(5),
                   'data2': np.random.randn(5)})

In [3]:
for name, group in df.groupby('k1'):
    print(name)
    print(group)

a
  k1   k2     data1     data2
0  a  one  0.531668 -0.146453
2  a  one -0.523527  0.407377
3  a  two -1.499152  0.539337
b
  k1   k2     data1     data2
1  b  two -1.325462 -0.122490
4  b  one -0.217736  0.164766


In [4]:
for (k1, k2), group in df.groupby(['k1', 'k2']):
    print((k1, k2))
    print(group)

('a', 'one')
  k1   k2     data1     data2
0  a  one  0.531668 -0.146453
2  a  one -0.523527  0.407377
('a', 'two')
  k1   k2     data1     data2
3  a  two -1.499152  0.539337
('b', 'one')
  k1   k2     data1     data2
4  b  one -0.217736  0.164766
('b', 'two')
  k1   k2     data1    data2
1  b  two -1.325462 -0.12249


In [5]:
pieces = dict(list(df.groupby('k1')))

In [6]:
pieces['b']

Unnamed: 0,k1,k2,data1,data2
1,b,two,-1.325462,-0.12249
4,b,one,-0.217736,0.164766


In [7]:
df.dtypes

k1        object
k2        object
data1    float64
data2    float64
dtype: object

In [8]:
grouped = df.groupby(df.dtypes, axis=1)

In [9]:
for dtype, group in grouped:
    print(dtype)
    print(group)

float64
      data1     data2
0  0.531668 -0.146453
1 -1.325462 -0.122490
2 -0.523527  0.407377
3 -1.499152  0.539337
4 -0.217736  0.164766
object
  k1   k2
0  a  one
1  b  two
2  a  one
3  a  two
4  b  one


# Selecting a column or subset of columns

In [12]:
df.groupby('k1')['data1']
df.groupby('k1')[['data2']]

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002307BCF9240>

In [13]:
df['data1'].groupby(df['k1'])
df[['data2']].groupby(df['k1'])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002307BCF8BE0>

In [14]:
df.groupby(['k1', 'k2'])[['data2']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data2
k1,k2,Unnamed: 2_level_1
a,one,0.130462
a,two,0.539337
b,one,0.164766
b,two,-0.12249


In [15]:
s_grouped = df.groupby(['k1', 'k2'])['data2']

In [16]:
s_grouped

<pandas.core.groupby.generic.SeriesGroupBy object at 0x000002307BDB6B30>

In [17]:
s_grouped.mean()

k1  k2 
a   one    0.130462
    two    0.539337
b   one    0.164766
    two   -0.122490
Name: data2, dtype: float64