# Mod11 Ufuncs in Pandas

## Ufuncs: Index Preservation

In [1]:
import pandas as pd
import numpy as np

In [3]:
pd.__version__

'1.1.4'

In [2]:
np.__version__

'1.19.4'

In [4]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4))
ser

0    6
1    3
2    7
3    4
dtype: int64

In [5]:
ser.values # return np array

array([6, 3, 7, 4])

In [8]:
rng = np.random.RandomState(777)
df = pd.DataFrame(rng.randint(0, 10, (3, 4)),
                  columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,7,6,7,1
1,7,4,7,9
2,8,7,2,0


In [9]:
df.values

array([[7, 6, 7, 1],
       [7, 4, 7, 9],
       [8, 7, 2, 0]])

In [10]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [11]:
np.exp(df)

Unnamed: 0,A,B,C,D
0,1096.633158,403.428793,1096.633158,2.718282
1,1096.633158,54.59815,1096.633158,8103.083928
2,2980.957987,1096.633158,7.389056,1.0


In [12]:
np.exp(df['A'])

0    1096.633158
1    1096.633158
2    2980.957987
Name: A, dtype: float64

In [13]:
np.exp(df[['B','D']])

Unnamed: 0,B,D
0,403.428793,2.718282
1,54.59815,8103.083928
2,1096.633158,1.0


In [20]:
%%timeit
np.exp(df[0:2])  # integer index 不包含最後一筆

70 µs ± 308 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [21]:
%%timeit
np.exp(df.loc[0:2]) # index 包含最後一筆

75.9 µs ± 272 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [19]:
%%timeit
np.exp(df.iloc[0:2]) # integer index 不包含最後一筆

68.5 µs ± 966 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [15]:
np.exp(df.iloc[1])

A    1096.633158
B      54.598150
C    1096.633158
D    8103.083928
Name: 1, dtype: float64

In [16]:
np.sin(df * np.pi / 4)

Unnamed: 0,A,B,C,D
0,-0.7071068,-1.0,-0.707107,0.707107
1,-0.7071068,1.224647e-16,-0.707107,0.707107
2,-2.449294e-16,-0.7071068,1.0,0.0


## UFuncs: Index Alignment

### Index alignment in Series

In [None]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')

In [None]:
area

In [None]:
population

In [None]:
population / area
# The resulting array contains the union of indices of the two input arrays
# any missing values are filled in with NaN by default

In [None]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])

In [None]:
A

In [None]:
B

In [None]:
A + B

explicit specification of the fill value for any elements in ``A`` or ``B`` that might be missing:

In [None]:
A.add(B, fill_value=0)

### Index alignment in DataFrame

In [None]:
rng = np.random.RandomState(42)
A = pd.DataFrame(rng.randint(0, 20, (2, 2)),
                 columns=list('AB'))
A

In [None]:
rng = np.random.RandomState(42)
B = pd.DataFrame(rng.randint(0, 10, (3, 3)),
                 columns=list('BAC'))
B

In [None]:
A + B

In [None]:
A.add(B,fill_value=0)

fill with the mean of all values in ``A`` (computed by first stacking the rows of ``A``):

In [None]:
A.mean()

In [None]:
A.stack()

In [None]:
fill = A.stack().mean()
A.add(B, fill_value=fill)