In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],
                   'key2' : ['one', 'two', 'one', 'two', 'one'],
                   'data1': np.random.randn(5),
                   'data2': np.random.randn(5)}) 

In [3]:
df

Unnamed: 0,data1,data2,key1,key2
0,0.053818,0.541159,a,one
1,0.49688,1.186934,a,two
2,-0.324803,0.658899,b,one
3,1.277378,-0.730108,b,two
4,0.72521,-0.955991,a,one


In [4]:
grouped = df['data1'].groupby(df['key1']) 

In [5]:
grouped

<pandas.core.groupby.SeriesGroupBy object at 0x000000000787F550>

In [6]:
grouped.mean() 

key1
a    0.425303
b    0.476288
Name: data1, dtype: float64

In [7]:
means = df['data1'].groupby( [ df['key1'], df['key2']] ).mean() 

In [8]:
means

key1  key2
a     one     0.389514
      two     0.496880
b     one    -0.324803
      two     1.277378
Name: data1, dtype: float64

In [9]:
means.unstack() 

key2,one,two
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.389514,0.49688
b,-0.324803,1.277378


In [10]:
df

Unnamed: 0,data1,data2,key1,key2
0,0.053818,0.541159,a,one
1,0.49688,1.186934,a,two
2,-0.324803,0.658899,b,one
3,1.277378,-0.730108,b,two
4,0.72521,-0.955991,a,one


In [11]:
df.groupby('key1').mean() 

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.425303,0.257367
b,0.476288,-0.035605


In [12]:
df.groupby('key1').count() 

Unnamed: 0_level_0,data1,data2,key2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,3,3,3
b,2,2,2


In [13]:
df.groupby(['key1','key2']).mean() 

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,0.389514,-0.207416
a,two,0.49688,1.186934
b,one,-0.324803,0.658899
b,two,1.277378,-0.730108


In [15]:
df.groupby(['key1','key2']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,2,2
a,two,1,1
b,one,1,1
b,two,1,1


In [16]:
df.groupby(['key1','key2'])['data2'].mean()

key1  key2
a     one    -0.207416
      two     1.186934
b     one     0.658899
      two    -0.730108
Name: data2, dtype: float64

In [17]:
grouped = df.groupby('key1')

In [18]:
grouped

<pandas.core.groupby.DataFrameGroupBy object at 0x000000000A7DE128>

In [19]:
for name, group in grouped:
    print(name)
    print(group)

a
      data1     data2 key1 key2
0  0.053818  0.541159    a  one
1  0.496880  1.186934    a  two
4  0.725210 -0.955991    a  one
b
      data1     data2 key1 key2
2 -0.324803  0.658899    b  one
3  1.277378 -0.730108    b  two


In [20]:
 df2 = pd.DataFrame(np.random.randn(5, 5), 
                   columns=['a', 'b', 'c', 'd', 'e'],
                   index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])

In [21]:
df2

Unnamed: 0,a,b,c,d,e
Joe,1.236738,0.606495,-1.579581,0.639828,-0.159116
Steve,0.433373,0.16395,-0.438505,1.732935,-2.538654
Wes,-0.49035,0.554422,0.344526,-0.502106,0.747047
Jim,-0.375294,-0.498919,-0.737563,1.346862,-0.583281
Travis,-1.335168,-1.476235,0.947088,-1.293615,-1.075971


In [22]:
map_dict = {'a':'red', 'b':'red','c':'blue', 'd':'blue', 'e':'red', 'f':'gold'}

In [23]:
df2.groupby(map_dict, axis=1).sum() 

Unnamed: 0,blue,red
Joe,-0.939753,1.684116
Steve,1.294431,-1.941332
Wes,-0.157581,0.811118
Jim,0.609298,-1.457494
Travis,-0.346527,-3.887373


In [24]:
grouped = df.groupby('key1') 

In [25]:
grouped

<pandas.core.groupby.DataFrameGroupBy object at 0x000000000A84BC18>

In [26]:
def peak_to_peak(arr):
    return arr.max() - arr.min() 

In [27]:
grouped.agg(peak_to_peak)

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.671392,2.142925
b,1.602181,1.389007


In [28]:
grouped.describe() 

Unnamed: 0_level_0,data1,data1,data1,data1,data1,data1,data1,data1,data2,data2,data2,data2,data2,data2,data2,data2
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
a,3.0,0.425303,0.341371,0.053818,0.275349,0.49688,0.611045,0.72521,3.0,0.257367,1.099288,-0.955991,-0.207416,0.541159,0.864046,1.186934
b,2.0,0.476288,1.132913,-0.324803,0.075743,0.476288,0.876833,1.277378,2.0,-0.035605,0.982176,-0.730108,-0.382856,-0.035605,0.311647,0.658899
