In [3]:
import pandas as pd 
import numpy as np
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                             'foo', 'bar', 'foo', 'foo'],
                        'B': ['one', 'one', 'two', 'three',
                          'two', 'two', 'one', 'three'],
                       'C': np.random.randn(8),
                       'D': np.random.randn(8)})

In [14]:
df.groupby(['B', 'A']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
B,A,Unnamed: 2_level_1,Unnamed: 3_level_1
one,bar,-0.384931,0.954748
one,foo,-0.04864,0.542532
three,bar,-1.016718,-0.771237
three,foo,-0.579274,-1.321934
two,bar,0.935631,-0.49131
two,foo,0.161499,1.188072


In [15]:
df.groupby('A').agg({'C': np.sum, 'D': np.max})

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-0.466018,0.954748
foo,-0.466414,1.345111


In [20]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                         'foo', 'foo', 'qux', 'qux'],
                        ['one', 'two', 'one', 'two',
                         'one', 'two', 'one', 'two']]))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df2 = df[:4]

In [23]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.025106,0.831306
bar,two,1.0187,-0.641589
baz,one,0.885655,-2.234267
baz,two,-0.622379,-0.152207


In [30]:
df3 = df2.stack()

In [38]:
df3.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.025106,0.885655
one,B,0.831306,-2.234267
two,A,1.0187,-0.622379
two,B,-0.641589,-0.152207


In [41]:
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
                       'B': ['A', 'B', 'C'] * 4,
                       'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                       'D': np.random.randn(12),
                       'E': np.random.randn(12)})
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.254128,0.887873
1,one,B,foo,1.069735,0.96941
2,two,C,foo,-0.676289,-0.623757
3,three,A,bar,0.714919,0.433614
4,one,B,bar,0.668877,0.24346
5,one,C,bar,-0.555843,0.792796
6,two,A,foo,0.927285,-0.081488
7,three,B,foo,0.703084,0.308089
8,one,C,foo,-0.017237,-0.686906
9,one,A,bar,-0.619075,0.460963


In [40]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.993774,-2.457382
one,B,-0.161507,-0.048285
one,C,-1.104123,1.392186
three,A,-0.653588,
three,B,,0.358853
three,C,2.569611,
two,A,,0.658251
two,B,0.406856,
two,C,,1.28428


In [42]:
df = pd.DataFrame({
        'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
        'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
        'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})

In [43]:
df.apply(np.mean)

one     -0.066100
two      0.358364
three   -0.319860
dtype: float64

In [44]:
df.apply(np.mean, axis=1)

a   -0.381954
b   -0.301704
c    0.256727
d    0.587207
dtype: float64

In [45]:
df.apply(lambda x: x.max() - x.min())

one      1.794064
two      1.085101
three    0.865004
dtype: float64

In [46]:
df.apply(np.cumsum)

Unnamed: 0,one,two,three
a,-0.731921,-0.031986,
b,-1.260442,-0.066448,-0.342127
c,-0.198299,0.382818,-1.083356
d,,1.433456,-0.959581


In [47]:
df

Unnamed: 0,one,two,three
a,-0.731921,-0.031986,
b,-0.528521,-0.034462,-0.342127
c,1.062143,0.449266,-0.741229
d,,1.050638,0.123775


In [51]:
def own_function(x):
    return x*x
df.apply(own_function).apply(np.cumsum)

Unnamed: 0,one,two,three
a,0.535709,0.001023,
b,0.815043,0.002211,0.117051
c,1.94319,0.204051,0.666471
d,,1.307892,0.681791


In [52]:
def subtract_and_divide(x, sub, divide=1):
    return (x - sub) / divide
df.apply(subtract_and_divide, args=(5,3))
#  args must be iterable. Therefore, even if you pass only 1 argument, you have to pass it as a tuple:args=(5,)


Unnamed: 0,one,two,three
a,-1.91064,-1.677329,
b,-1.84284,-1.678154,-1.780709
c,-1.312619,-1.516911,-1.913743
d,,-1.316454,-1.625408


In [56]:
def subtract(x, sub):
    return (x - sub)

df.apply(subtract, args=[5,])

Unnamed: 0,one,two,three
a,-5.731921,-5.031986,
b,-5.528521,-5.034462,-5.342127
c,-3.937857,-4.550734,-5.741229
d,,-3.949362,-4.876225
