Ufuncs: Index Preservation

In [1]:
import pandas as pd
import numpy as np

In [12]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0,10,4))
ser

0    6
1    3
2    7
3    4
dtype: int32

In [13]:
df = pd.DataFrame(rng.randint(0,10,(3,4)), columns = ['A', 'B','C', 'D'])
df

Unnamed: 0,A,B,C,D
0,6,9,2,6
1,7,4,3,7
2,7,2,5,4


In [14]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [15]:
np.sin(df*np.pi/4)

Unnamed: 0,A,B,C,D
0,-1.0,0.7071068,1.0,-1.0
1,-0.707107,1.224647e-16,0.707107,-0.7071068
2,-0.707107,1.0,-0.707107,1.224647e-16


UFuncs: Index Alignment

Index alignment in Series

In [16]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name = 'area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name = 'population')

In [17]:
population/area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [18]:
area.index | population.index

Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')

In [19]:
area.index & population.index

Index(['Texas', 'California'], dtype='object')

In [20]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
A+B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [21]:
A.add(B, fill_value=0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

Index alignment in DataFrame

In [24]:
A = pd.DataFrame(rng.randint(0,20,(2,2)), columns = list('AB'))
A

Unnamed: 0,A,B
0,9,15
1,14,14


In [25]:
B = pd.DataFrame(rng.randint(0,10,(3,3)), columns = list('BAC'))
B

Unnamed: 0,B,A,C
0,2,6,3
1,8,2,4
2,2,6,4


In [26]:
A+B

Unnamed: 0,A,B,C
0,15.0,17.0,
1,16.0,22.0,
2,,,


In [27]:
fill = A.stack().mean()
A.add(B, fill_value = fill)

Unnamed: 0,A,B,C
0,15.0,17.0,16.0
1,16.0,22.0,17.0
2,19.0,15.0,17.0


In [28]:
fill

13.0

In [None]:
Python Operator	     Pandas Method(s)
+	                 add()
-	                 sub(), subtract()
*	                 mul(), multiply()
/	                 truediv(), div(), divide()
//	                 floordiv()
%	                 mod()
**	                 pow()

Ufuncs: Operations Between DataFrame and Series


In [29]:
A = rng.randint(10,size=(3,4))
A

array([[8, 6, 1, 3],
       [8, 1, 9, 8],
       [9, 4, 1, 3]])

In [30]:
A-A[0]

array([[ 0,  0,  0,  0],
       [ 0, -5,  8,  5],
       [ 1, -2,  0,  0]])

In [31]:
A[0]

array([8, 6, 1, 3])

In [32]:
df = pd.DataFrame(A, columns=list('QRST'))
df

Unnamed: 0,Q,R,S,T
0,8,6,1,3
1,8,1,9,8
2,9,4,1,3


In [33]:
df.iloc[0]

Q    8
R    6
S    1
T    3
Name: 0, dtype: int32

In [34]:
df-df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,0,-5,8,5
2,1,-2,0,0


In [35]:
df.subtract(df['R'], axis = 0)

Unnamed: 0,Q,R,S,T
0,2,0,-5,-3
1,7,0,8,7
2,5,0,-3,-1


In [36]:
df

Unnamed: 0,Q,R,S,T
0,8,6,1,3
1,8,1,9,8
2,9,4,1,3


In [37]:
df['R']

0    6
1    1
2    4
Name: R, dtype: int32

In [39]:
halfrow = df.iloc[0, ::2]
halfrow

Q    8
S    1
Name: 0, dtype: int32

In [40]:
df-halfrow

Unnamed: 0,Q,R,S,T
0,0.0,,0.0,
1,0.0,,8.0,
2,1.0,,0.0,
