In [1]:
import pandas as pd
import numpy as np

In [2]:
data = np.round(np.random.normal(size=(4, 3)), 2)
df = pd.DataFrame(data, columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.0,-0.62,-0.6
1,0.19,0.45,-1.66
2,0.82,-0.28,0.94
3,0.13,1.19,-0.69


# Apply

In [3]:
df.apply(lambda x: 1 + np.abs(x))

Unnamed: 0,A,B,C
0,1.0,1.62,1.6
1,1.19,1.45,2.66
2,1.82,1.28,1.94
3,1.13,2.19,1.69


In [4]:
df.A.apply(np.abs)

0    0.00
1    0.19
2    0.82
3    0.13
Name: A, dtype: float64

In [5]:
def double_if_positive(x):
    x[x>0] *= 2
    return x

df.apply(double_if_positive)

Unnamed: 0,A,B,C
0,0.0,-0.62,-0.6
1,0.38,0.9,-1.66
2,1.64,-0.28,1.88
3,0.26,2.38,-0.69


In [6]:
df

Unnamed: 0,A,B,C
0,0.0,-0.62,-0.6
1,0.38,0.9,-1.66
2,1.64,-0.28,1.88
3,0.26,2.38,-0.69


In [7]:
def double_if_positive(x):
    x = x.copy()
    x[x > 0] *= 2
    return x

df.apply(double_if_positive, raw=True)

Unnamed: 0,A,B,C
0,0.0,-0.62,-0.6
1,0.76,1.8,-1.66
2,3.28,-0.28,3.76
3,0.52,4.76,-0.69


# Map

In [8]:
series = pd.Series(['Steve', 'Alex', 'Jess', 'Mark'])

In [9]:
series.map({'Steve': 'Stephen'})

0    Stephen
1        NaN
2        NaN
3        NaN
dtype: object

In [10]:
series.map(lambda d: f'I am {d}')

0    I am Steve
1     I am Alex
2     I am Jess
3     I am Mark
dtype: object

# Vectorised function

In [11]:
display(df, df.abs())

Unnamed: 0,A,B,C
0,0.0,-0.62,-0.6
1,0.38,0.9,-1.66
2,1.64,-0.28,1.88
3,0.26,2.38,-0.69


Unnamed: 0,A,B,C
0,0.0,0.62,0.6
1,0.38,0.9,1.66
2,1.64,0.28,1.88
3,0.26,2.38,0.69


In [12]:
series = pd.Series(['Iron Man', 'Captain Marvel', 'Black Panther', 'Thor Thunder'])

In [13]:
'Iron Man'.split()

['Iron', 'Man']

In [14]:
series.str.split(expand=True)

Unnamed: 0,0,1
0,Iron,Man
1,Captain,Marvel
2,Black,Panther
3,Thor,Thunder


In [15]:
series.str.contains('Man')

0     True
1    False
2    False
3    False
dtype: bool

In [16]:
series.str.upper().str.split()

0          [IRON, MAN]
1    [CAPTAIN, MARVEL]
2     [BLACK, PANTHER]
3      [THOR, THUNDER]
dtype: object

# User defined functions

In [17]:
data2 = np.random.normal(10, 2, size=(100000, 2))
df2 = pd.DataFrame(data2, columns=['x', 'y'])

In [18]:
hypot = (df2.x**2 + df2.y**2)**0.5
print(hypot[0])

16.84221461997407


In [19]:
def hypot1(x, y):
    return np.sqrt(x**2 + y**2)

h1 = []
for index, (x, y) in df2.iterrows():
    h1.append(hypot1(x, y))
print(h1[0])

16.84221461997407


In [20]:
def hypot2(row):
    return np.sqrt(row.x**2 + row.y**2)

h2 = df2.apply(hypot2, axis=1)
print(h2[0])

16.84221461997407


In [22]:
def hypot3(xs, ys):
    return np.sqrt(xs**2 + ys**2)

h3 = hypot3(df2.x, df2.y)
print(h3[0])

16.84221461997407
