In [1]:
# operating on pandas, inherite from NumPy
# Ufuncs index preservation 
import pandas as pd
import numpy as np

In [3]:
ser = pd.Series(np.random.randint(0,10,4))
ser

0    1
1    9
2    6
3    2
dtype: int32

In [4]:
df = pd.DataFrame(np.random.randint(0 , 10 , (3 , 4)) , columns = ["A" , "B" , "C" , "D"])
df

Unnamed: 0,A,B,C,D
0,0,6,3,5
1,5,9,1,3
2,8,3,9,5


In [5]:
# apply np ufunc, indices will preserved and result will be another pandas object
np.exp(ser)

0       2.718282
1    8103.083928
2     403.428793
3       7.389056
dtype: float64

In [6]:
np.sin(df* np.pi/4)

Unnamed: 0,A,B,C,D
0,0.0,-1.0,0.707107,-0.707107
1,-0.7071068,0.707107,0.707107,0.707107
2,-2.449294e-16,0.707107,0.707107,-0.707107


In [14]:
# binary operation on series and dataframe, pandas will align indices 
# index alignment in series
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name = 'area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')

In [17]:
population / area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [18]:
# the result contains the union of indices of the 2 inputs array
area.index | population.index

  


Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')

In [20]:
# missing values are filled with NaN
A = pd.Series([2,4,6] , index = [0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])

In [21]:
A

0    2
1    4
2    6
dtype: int64

In [22]:
B

1    1
2    3
3    5
dtype: int64

In [23]:
A + B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [25]:
# if NaN is not desired using this , fill_value fill the elements missing in A or B
A.add(B, fill_value = 0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [27]:
# index alignment in df, same alignment for both columns and indices
A = pd.DataFrame(np.random.randint(0,20,(2,2)) , columns = list("AB"))
A

Unnamed: 0,A,B
0,2,8
1,8,7


In [28]:
B = pd.DataFrame(np.random.randint(0,10,(3,3)) , columns = list("BAC"))
B

Unnamed: 0,B,A,C
0,8,9,9
1,5,2,0
2,2,2,7


In [29]:
A + B

Unnamed: 0,A,B,C
0,11.0,16.0,
1,10.0,12.0,
2,,,


In [32]:
fill = A.stack().mean()
fill

6.25

In [33]:
A.add(B , fill_value = fill)

Unnamed: 0,A,B,C
0,11.0,16.0,15.25
1,10.0,12.0,6.25
2,8.25,8.25,13.25


In [34]:
# + = add() , - = sub(), subtract(), * mul(), multiply(), / truediv(), div(), divide(), // floordiv() get the quotient, % mod() get the reminder, ** pow()

In [42]:
# operation between df and series
A = np.random.randint(10 , size = (3,4))
A

array([[0, 1, 9, 6],
       [4, 4, 6, 5],
       [8, 0, 1, 7]])

In [43]:
A[0]

array([0, 1, 9, 6])

In [44]:
A - A[0]

array([[ 0,  0,  0,  0],
       [ 4,  3, -3, -1],
       [ 8, -1, -8,  1]])

In [45]:
df = pd.DataFrame(A, columns=list('QRST'))
df

Unnamed: 0,Q,R,S,T
0,0,1,9,6
1,4,4,6,5
2,8,0,1,7


In [48]:
df.iloc[0]

Q    0
R    1
S    9
T    6
Name: 0, dtype: int32

In [49]:
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,4,3,-3,-1
2,8,-1,-8,1


In [50]:
# if you would like to operate on column-wise then use axis 
df['R']

0    1
1    4
2    0
Name: R, dtype: int32

In [52]:
df.subtract(df['R'] , axis = 0)

Unnamed: 0,Q,R,S,T
0,-1,0,8,5
1,0,0,2,1
2,8,0,1,7


In [54]:
halfrow = df.iloc[0, ::2]
halfrow

Q    0
S    9
Name: 0, dtype: int32

In [55]:
df -halfrow

Unnamed: 0,Q,R,S,T
0,0.0,,0.0,
1,4.0,,-3.0,
2,8.0,,-8.0,
