Chapter 5 - Apply Functions

In [1]:
import pandas as pd

df = pd.DataFrame({"a": [10, 20, 30], "b": [20, 30, 40]})
print(df)

    a   b
0  10  20
1  20  30
2  30  40


In [3]:
def my_sq(x):
    """square the value"""
    return x ** 2

def avg_2(x, y):
    """average of two values"""
    return (x + y) / 2

In [2]:
#square a column
print(df["a"]**2)

0    100
1    400
2    900
Name: a, dtype: int64


In [4]:
#square a column by using apply
sq = df["a"].apply(my_sq)
print(sq)

0    100
1    400
2    900
Name: a, dtype: int64


In [5]:
def my_exp(x, e):
    """raise x to the power of e"""
    return x ** e

cubed = my_exp(2, 3)
print(cubed)

8


In [6]:
#the exponent, e, to 2
ex = df["a"].apply(my_exp, e=2)
print(ex)

0    100
1    400
2    900
Name: a, dtype: int64


In [7]:
#apply over a dataframe
df = pd.DataFrame({"a": [10, 20, 30], "b": [20, 30, 40]})
print(df)

    a   b
0  10  20
1  20  30
2  30  40


In [8]:
def print_me(x):
    """print the value"""
    print(x)

In [10]:
df.apply(print_me, axis=0)

0    10
1    20
2    30
Name: a, dtype: int64
0    20
1    30
2    40
Name: b, dtype: int64


a    None
b    None
dtype: object

In [None]:
#column wise
def avg_3_apply(col):
    x = col[0]
    y = col[1]
    z = col[2]
    return (x + y + z) / 3

print(df.apply(avg_3_apply))

a    20.0
b    30.0
dtype: float64


In [18]:
#row wise
def avg_2_apply(row):
    x = row[0]
    y = row[1]
    return (x + y) / 2

print(df.apply(avg_2_apply, axis = 0))

a    15.0
b    25.0
dtype: float64


In [19]:
def avg_2(x, y):
    return (x + y) / 2

print(avg_2(df["a"], df["b"]))

0    15.0
1    25.0
2    35.0
dtype: float64


In [20]:
import numpy as np

def avg_2_mod(x, y):
    if (x == 20):
        return (np.NaN)
    else:
        return (x + y) / 2

In [21]:
print(avg_2_mod(10, 20))

15.0


In [22]:
avg_2_mod_vec = np.vectorize(avg_2_mod)
print(avg_2_mod_vec(df["a"], df["b"]))

[15. nan 35.]


In [23]:
@np.vectorize
def avg_2_mod(x, y):
    if (x == 20):
        return (np.NaN)
    else:
        return (x + y) / 2

print(avg_2_mod(df["a"], df["b"]))

[15. nan 35.]


In [25]:
import numba

@numba.vectorize
def avg_2_numba(x, y):
    if (int(x) == 20):
        return (np.NaN)
    else:
        return (x + y) / 2

print(avg_2_numba(df["a"].values, df["b"].values))

[15. nan 35.]


In [27]:
#lambda function
df['a_sq_lambda'] = df['a'].apply(lambda x: x ** 2)
print(df)

    a   b  a_sq_lambda
0  10  20          100
1  20  30          400
2  30  40          900
