# Chapter 15. Operating on Data in Pandas

In [1]:
import pandas as pd
import numpy as np

## Ufuncs: Index Preservation
Because Pandas is designed to work with NumPy, any NumPy *ufunc* will
work on Pandas *Series* and *DataFrame* objects

In [6]:
rng = np.random.default_rng(42)
ser = pd.Series(rng.integers(0,10,4))
ser

0    0
1    7
2    6
3    4
dtype: int64

In [8]:
df = pd.DataFrame(rng.integers(0,10,(3,4)),
                  columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
0,5,1,8,4
1,5,3,1,9
2,7,6,4,8


In [9]:
np.exp(ser) # f we apply a NumPy ufunc on either of these objects, the result will be 
#another Pandas object with the indices preserved:

0       1.000000
1    1096.633158
2     403.428793
3      54.598150
dtype: float64

In [10]:
np.sin(df*np.pi/4)

Unnamed: 0,A,B,C,D
0,-0.707107,0.707107,-2.449294e-16,1.224647e-16
1,-0.707107,0.707107,0.7071068,0.7071068
2,-0.707107,-1.0,1.224647e-16,-2.449294e-16


## Ufuncs: Index Alignment
For binary operations on two Series or DataFrame objects, Pandas will
align indices in the process of performing the operation

In [11]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
'California': 423967}, name='area')
area

Alaska        1723337
Texas          695662
California     423967
Name: area, dtype: int64

In [12]:
population = pd.Series({'California': 39538223, 'Texas':
29145505,
'Florida': 21538187},
name='population')
population

California    39538223
Texas         29145505
Florida       21538187
Name: population, dtype: int64

In [13]:
population/area 
# Any item for which one or the other does not have an entry is marked with
# NaN, or “Not a Number,” which is how Pandas marks missing data

Alaska              NaN
California    93.257784
Florida             NaN
Texas         41.896072
dtype: float64

In [15]:
area.index.union(population.index)

Index(['Alaska', 'California', 'Florida', 'Texas'], dtype='object')

#### Index Alignment in Series

In [16]:
A = pd.Series([1,2,4],index=[0,1,2])
B = pd.Series([0,1,1],index=[1,2,3])

In [17]:
A+B

0    NaN
1    2.0
2    5.0
3    NaN
dtype: float64

### Index Alignment in DataFrames

In [18]:
A = pd.DataFrame(rng.integers(0, 20, (2, 2)),
columns=['a', 'b'])
A

Unnamed: 0,a,b
0,10,8
1,9,4


In [19]:
B = pd.DataFrame(rng.integers(0, 10, (3, 3)),
columns=['b', 'a', 'c'])
B

Unnamed: 0,b,a,c
0,0,5,8
1,0,8,8
2,2,6,1


In [20]:
A+B

Unnamed: 0,a,b,c
0,15.0,8.0,
1,17.0,4.0,
2,,,


In [22]:
A.add(B,fill_value=A.values.mean()) # fill mean in missing entries

Unnamed: 0,a,b,c
0,15.0,8.0,15.75
1,17.0,4.0,15.75
2,13.75,9.75,8.75
