In [1]:
# 导入包
import numpy as np
import pandas as pd

## 小结
1. 算术运算和数据对齐
    1. 先根据索引做数据对齐，再进行运算
    2. 找不到相同的索引，就填充nan值
2. 使用填充值的算术方法
    1. 数据对齐时，以设定的填充值替代nan
    2. 如果两边都不存在的索引位置，还是nan
3. DataFrame和Series的混合运算
    1. 类似NumPy不同维度数组的运算
    2. 默认匹配DataFrame列索引，可手动指定匹配行索引

## 一、算术运算和数据对齐

### 1.1 Series

In [2]:
# 创建Series对象
s1 = pd.Series(np.arange(5), index=["a", "b", "c", "d", "e"])
s2 = pd.Series(np.arange(6), index=["a", "c", "e", "f", "g", "h"])

print(s1)
print(s2)

a    0
b    1
c    2
d    3
e    4
dtype: int64
a    0
c    1
e    2
f    3
g    4
h    5
dtype: int64


In [3]:
# 算术加
s1 + s2

a    0.0
b    NaN
c    3.0
d    NaN
e    6.0
f    NaN
g    NaN
h    NaN
dtype: float64

### 1.2 DataFrame

In [4]:
# 创建DataFrame对象
df1 = pd.DataFrame(
    np.arange(12).reshape(4, 3), index=["a", "b", "c", "d"], columns=["A", "B", "C"]
)
df2 = pd.DataFrame(
    np.arange(9).reshape(3, 3), index=["a", "c", "e"], columns=["A", "B", "H"]
)

print(df1)
print(df2)

   A   B   C
a  0   1   2
b  3   4   5
c  6   7   8
d  9  10  11
   A  B  H
a  0  1  2
c  3  4  5
e  6  7  8


In [5]:
# 算术加
print(df1 + df2)

     A     B   C   H
a  0.0   2.0 NaN NaN
b  NaN   NaN NaN NaN
c  9.0  11.0 NaN NaN
d  NaN   NaN NaN NaN
e  NaN   NaN NaN NaN


## 二、使用填充值的算术方法

### 2.1 Series

In [6]:
# 使用填充值的加法
# s1+s2换成s1.add(s2)
s1.add(s2)

a    0.0
b    NaN
c    3.0
d    NaN
e    6.0
f    NaN
g    NaN
h    NaN
dtype: float64

In [7]:
# 使用填充值的加法
# s1+s2换成s1.add(s2)
s1.add(s2, fill_value=0)

a    0.0
b    1.0
c    3.0
d    3.0
e    6.0
f    3.0
g    4.0
h    5.0
dtype: float64

### 2.2 DataFrame

In [9]:
# 使用填充值的加法
# df1+df2换成df1.add(df2)
print(df1.add(df2, fill_value=0))

     A     B     C    H
a  0.0   2.0   2.0  2.0
b  3.0   4.0   5.0  NaN
c  9.0  11.0   8.0  5.0
d  9.0  10.0  11.0  NaN
e  6.0   7.0   NaN  8.0


### 2.3 reindex 

In [10]:
# reindex未使用填充值
print(df1.reindex(columns=["A", "B", "C", "D"]))

   A   B   C   D
a  0   1   2 NaN
b  3   4   5 NaN
c  6   7   8 NaN
d  9  10  11 NaN


In [11]:
# reindex未使用填充值
print(df1.reindex(columns=["A", "B", "C", "D"], fill_value=99))

   A   B   C   D
a  0   1   2  99
b  3   4   5  99
c  6   7   8  99
d  9  10  11  99


## 三、DataFrame和Series的混合运算

### 启发小例

In [17]:
# 创建二维NumPy数组
arr = np.arange(12).reshape(4, 3)
print(arr)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [18]:
# 取出arr中的第0行，作为一维数组
arr1 = arr[0]
print(arr1)

[0 1 2]


In [19]:
# 数组运算，arr减去arr1
print(arr - arr1)

[[0 0 0]
 [3 3 3]
 [6 6 6]
 [9 9 9]]


In [15]:
# 取出df1中的第0行，作为一个Series
s3 = df1.iloc[0]
print(s3)

A    0
B    1
C    2
Name: a, dtype: int64


In [16]:
# pd混合运算，df1-s3
print(df1 - s3)

   A  B  C
a  0  0  0
b  3  3  3
c  6  6  6
d  9  9  9


In [21]:
# 取出df1中的第0列，作为一个Series
s4 = df1["A"]
print(s4)

a    0
b    3
c    6
d    9
Name: A, dtype: int64


In [26]:
# pd混合运算，df1减去s4，按行索引广播
# print(df1.sub(s4, axis=0))
print(df1.sub(s4, axis="index"))

   A  B  C
a  0  1  2
b  0  1  2
c  0  1  2
d  0  1  2
