In [2]:
import pandas as pd
import numpy as np

In [3]:
def extract_city_name(df):
    """
    Chicago, IL -> Chicago for city_name column
    """
    df["city_name"] = df["city_and_code"].str.split(",").str.get(0)
    return df

In [4]:
def add_country_name(df, country_name=None):
    """
    Chicago -> Chicago-US for city_name column
    """
    col = "city_name"
    df["city_and_country"] = df[col] + country_name
    return df

In [5]:
df_p = pd.DataFrame({"city_and_code":["Chicago, IL"]})

In [6]:
add_country_name(extract_city_name(df_p), country_name="US")

Unnamed: 0,city_and_code,city_name,city_and_country
0,"Chicago, IL",Chicago,ChicagoUS


Pandas encourages us to use pipe() for the problem above, which is known as 'method chaining'. pipe makes it easy to use your own or another library’s functions in method chains, alongside Pandas’ methods. Compare the first approach with following:

In [7]:
(df_p.pipe(extract_city_name)
         .pipe(add_country_name, country_name="US"))

Unnamed: 0,city_and_code,city_name,city_and_country
0,"Chicago, IL",Chicago,ChicagoUS


## row or column-wise function application

Arbitrary functions can be applied along the axes of a DataFrame using the apply() method, which, like the descriptive statistics methods, takes an optional axis argument.

In [12]:
df = pd.DataFrame({
        'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
        'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
        'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
df

Unnamed: 0,one,two,three
a,-0.498715,-1.086124,
b,0.076683,-1.654754,0.894579
c,1.271734,1.186488,1.063489
d,,-0.289534,-1.153575


In [15]:
df.apply(np.mean) # on columns

one      0.283234
two     -0.460981
three    0.268164
dtype: float64

In [17]:
df.apply(np.mean, axis=1) # on index

a   -0.792420
b   -0.227831
c    1.173904
d   -0.721554
dtype: float64

In [18]:
df.apply(lambda x: x.max() - x.min())

one      1.770449
two      2.841243
three    2.217063
dtype: float64

In [19]:
df.apply(np.cumsum)

Unnamed: 0,one,two,three
a,-0.498715,-1.086124,
b,-0.422032,-2.740879,0.894579
c,0.849702,-1.554391,1.958067
d,,-1.843925,0.804493


In [20]:
df.apply(np.exp)

Unnamed: 0,one,two,three
a,0.607311,0.337522,
b,1.0797,0.191139,2.446305
c,3.567033,3.275558,2.896458
d,,0.748612,0.315507


In [22]:
def subtract_and_divide(x, sub, divide=1):
    return (x - sub) / divide

df.apply(subtract_and_divide, args=(5,3)) # pass function into apply()

Unnamed: 0,one,two,three
a,-1.832905,-2.028708,
b,-1.641106,-2.218251,-1.368474
c,-1.242755,-1.271171,-1.31217
d,,-1.763178,-2.051192
