* Operations with DataFrame itself
* using apply - apply to the vertical(axis=0) or horizontal (axis=1) series


In [1]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.random.randn(4,3),
                 columns = list('bde'),
                  index = ['台北','台中','台南','高雄'])
df

Unnamed: 0,b,d,e
台北,-1.663553,-0.79109,-0.51656
台中,0.963342,0.496731,0.028431
台南,-0.686563,-0.501218,-1.306348
高雄,0.717511,0.101329,1.018368


In [2]:
np.abs(df) # function applied to df is automatically mapped into every element

Unnamed: 0,b,d,e
台北,1.663553,0.79109,0.51656
台中,0.963342,0.496731,0.028431
台南,0.686563,0.501218,1.306348
高雄,0.717511,0.101329,1.018368


## df.apply(function)

the function is applied to each column of the df one at a time. Each column is a serie

In [3]:
def f(series):
    print(series)

df.apply(f)

台北   -1.663553
台中    0.963342
台南   -0.686563
高雄    0.717511
Name: b, dtype: float64
台北   -0.791090
台中    0.496731
台南   -0.501218
高雄    0.101329
Name: d, dtype: float64
台北   -0.516560
台中    0.028431
台南   -1.306348
高雄    1.018368
Name: e, dtype: float64


b    None
d    None
e    None
dtype: object

In [4]:
def f(series):
    return series.max() - series.min()

df.apply(f)

b    2.626895
d    1.287820
e    2.324716
dtype: float64

## using apply on axis = 1 (horizontally)

In [5]:
df.apply(f, axis=1)

台北    1.146994
台中    0.934911
台南    0.805130
高雄    0.917039
dtype: float64

## apply to element

In [7]:
def g(ele):
    return ele * 100

df.loc['台北'].apply(g)

b   -166.355337
d    -79.108957
e    -51.655960
Name: 台北, dtype: float64

## apply using unnamed function == lambda

In [11]:
df.apply(lambda series: series.max()-series.min(),axis=1)

台北    1.146994
台中    0.934911
台南    0.805130
高雄    0.917039
dtype: float64

In [12]:
d = lambda series: series.max()-series.min()  # assign unnamed function to a variable
df.apply(d)

b    2.626895
d    1.287820
e    2.324716
dtype: float64

## when the function in df.apply return value, they combine into serie
## when the function in df.apply return serie, they combine into dataframe

In [13]:
def minMax(s):
    return pd.Series([s.max(),s.min()],index = ['max','min'])

df.apply(minMax)  #note: combination of series with same index become a dataframe!

Unnamed: 0,b,d,e
max,0.963342,0.496731,1.018368
min,-1.663553,-0.79109,-1.306348


In [2]:
import numpy as np
import pandas as pd

scores = np.random.randint(50,101,[10,5])
scores_pd = pd.DataFrame(scores, index = range(1,11), columns=['國文','英文','數學','地理','自然'])
scores_pd.columns.name = '科目'
display(scores_pd)

def cal1(s):
    return pd.Series([s.min(),s.max(),s.median()],index = ['最低分','最高分','中間值'])

scores_pd.apply(cal1)

科目,國文,英文,數學,地理,自然
1,73,51,56,59,68
2,52,84,89,74,70
3,95,50,68,64,91
4,72,64,99,52,82
5,52,78,97,61,98
6,62,83,64,57,54
7,77,56,85,85,97
8,75,80,55,83,52
9,73,68,70,96,62
10,76,96,70,76,91


科目,國文,英文,數學,地理,自然
最低分,52.0,50.0,55.0,52.0,52.0
最高分,95.0,96.0,99.0,96.0,98.0
中間值,73.0,73.0,70.0,69.0,76.0


## apply.map - apply function to every element

In [3]:
scores_pd.applymap(lambda x: float(x))

科目,國文,英文,數學,地理,自然
1,73.0,51.0,56.0,59.0,68.0
2,52.0,84.0,89.0,74.0,70.0
3,95.0,50.0,68.0,64.0,91.0
4,72.0,64.0,99.0,52.0,82.0
5,52.0,78.0,97.0,61.0,98.0
6,62.0,83.0,64.0,57.0,54.0
7,77.0,56.0,85.0,85.0,97.0
8,75.0,80.0,55.0,83.0,52.0
9,73.0,68.0,70.0,96.0,62.0
10,76.0,96.0,70.0,76.0,91.0
