# Ufuncs Index Preservation

In [1]:
import numpy as np
import pandas as pd

In [3]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4)) # series of random numbers, length = 4, min = 0, max = 10
ser

0    6
1    3
2    7
3    4
dtype: int64

In [6]:
df = pd.DataFrame(rng.randint(0, 10, (3, 4)), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,6,3,8,2
1,4,2,6,4
2,8,6,1,3


In [7]:
np.exp(ser) # calculates the exponential of all elements

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [9]:
np.sin(df * np.pi / 4)

Unnamed: 0,A,B,C,D
0,-1.0,0.707107,-2.449294e-16,1.0
1,1.224647e-16,1.0,-1.0,1.224647e-16
2,-2.449294e-16,-1.0,0.7071068,0.7071068


# Index Alignment
For binary operations on two Series or DataFrame objects, Pandas will align indices in the process of performing the operation. This is very convenient when working with incomplete data, as we'll see in some of the examples that follow.

In [10]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')

In [11]:
population / area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [12]:
area.index | population.index

Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')

In [13]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
A + B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [16]:
A.add(B, fill_value=0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [19]:
A = pd.DataFrame(rng.randint(0, 20, (2,2)), columns=list('AB'))
A

Unnamed: 0,A,B
0,7,14
1,2,13


In [21]:
B = pd.DataFrame(rng.randint(0, 10, (3, 3)), columns=list('BAC'))
B

Unnamed: 0,B,A,C
0,3,5,1
1,9,1,9
2,3,7,6


In [22]:
fill = A.stack().mean()
A.add(B, fill_value=fill) # fill missing value with mean of A

Unnamed: 0,A,B,C
0,12.0,17.0,10.0
1,3.0,22.0,18.0
2,16.0,12.0,15.0


# Operations Between DataFrame and Series

In [23]:
A = rng.randint(10, size=(3,4))
A

array([[8, 7, 4, 1],
       [4, 7, 9, 8],
       [8, 0, 8, 6]])

In [24]:
A - A[0]

array([[ 0,  0,  0,  0],
       [-4,  0,  5,  7],
       [ 0, -7,  4,  5]])

In [25]:
df = pd.DataFrame(A, columns=list('QRST'))

In [26]:
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-4,0,5,7
2,0,-7,4,5
