In [1]:
import pandas as pd
import numpy as np

# Transform

In [2]:
df = pd.DataFrame(np.arange(1,10).reshape(3,3),
                 index=['a','b','c'],
                 columns=['A','B','C'])
print(df)

   A  B  C
a  1  2  3
b  4  5  6
c  7  8  9


In [3]:
def double(x):
    return x*2

df2 = df.transform(double)  
print(df2)

    A   B   C
a   2   4   6
b   8  10  12
c  14  16  18


In [4]:
df2 = df.apply(double)
print(df2)

    A   B   C
a   2   4   6
b   8  10  12
c  14  16  18


In [5]:
multidf = df.transform([np.sqrt, double])
print(multidf)

          A                B                C       
       sqrt double      sqrt double      sqrt double
a  1.000000      2  1.414214      4  1.732051      6
b  2.000000      8  2.236068     10  2.449490     12
c  2.645751     14  2.828427     16  3.000000     18


In [6]:
idx = pd.IndexSlice
dfn = multidf.loc[idx[:], idx[:, 'double']]

dfn.columns = dfn.columns.droplevel(1)
print(dfn)

    A   B   C
a   2   4   6
b   8  10  12
c  14  16  18


In [7]:
df2 = df.transform({
    'A': np.sqrt,
    'B': np.double,
})
print(df2)

          A    B
a  1.000000  2.0
b  2.000000  5.0
c  2.645751  8.0


In [8]:
df.transform(np.sum)

ValueError: Function did not transform

In [9]:
df.apply(sum)

A    12
B    15
C    18
dtype: int64

In [10]:
def adding(x):
    return x[0] + x[1]

df.apply(adding, axis=1)

a     3
b     9
c    15
dtype: int32

In [11]:
# Getting error when trying the same with transform
df.transform(adding, axis=1)

ValueError: Function did not transform

# Aggregation

In [12]:
df = pd.DataFrame(np.arange(1,10).reshape(3,3),
                 index=['a','b','c'],
                 columns=['A','B','C'])
print(df)

   A  B  C
a  1  2  3
b  4  5  6
c  7  8  9


In [13]:
df.agg(np.sum)

A    12
B    15
C    18
dtype: int64

In [14]:
multidf = df.agg([np.sqrt, double])
print(multidf)

          A                B                C       
       sqrt double      sqrt double      sqrt double
a  1.000000      2  1.414214      4  1.732051      6
b  2.000000      8  2.236068     10  2.449490     12
c  2.645751     14  2.828427     16  3.000000     18


In [15]:
df2 = df.agg({
    'A': np.sqrt,
    'B': np.double,
})
print(df2)

          A    B
a  1.000000  2.0
b  2.000000  5.0
c  2.645751  8.0


In [16]:
df.agg(np.sum)

A    12
B    15
C    18
dtype: int64

In [17]:
df = df.agg([np.mean, np.sum])
print(multidf)

          A                B                C       
       sqrt double      sqrt double      sqrt double
a  1.000000      2  1.414214      4  1.732051      6
b  2.000000      8  2.236068     10  2.449490     12
c  2.645751     14  2.828427     16  3.000000     18


In [18]:
df2 = df.agg({
    'A': np.mean,
    'B': np.sum,
})
print(df2)

A     8.0
B    20.0
dtype: float64


In [19]:
df = df.agg([np.mean, np.sum, np.sqrt])
print(multidf)

ValueError: cannot combine transform and aggregation operations