In [32]:
import pandas as pd
import numpy as np

# 隨機產生一個 4 個元素的 Series 物件
rng = np.random.default_rng(42)
ser = pd.Series(rng.integers(0, 10, 4))
print(ser)

# 隨機產生一個 3x4 的 DataFrame 物件
df = pd.DataFrame(rng.integers(0, 10, (3, 4)),
                  columns=['A', 'B', 'C', 'D'])
print(df)

# 套用 NumPy 的通用函式會產生另一個保留 index 的 Pandas 物件
print(np.exp(ser))

# 更複雜的運算也一樣
print(np.sin(df * np.pi / 4))

0    0
1    7
2    6
3    4
dtype: int64
   A  B  C  D
0  4  8  0  6
1  2  0  5  9
2  7  7  7  7
0       1.000000
1    1096.633158
2     403.428793
3      54.598150
dtype: float64
              A             B         C         D
0  1.224647e-16 -2.449294e-16  0.000000 -1.000000
1  1.000000e+00  0.000000e+00 -0.707107  0.707107
2 -7.071068e-01 -7.071068e-01 -0.707107 -0.707107


In [9]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662, 'California': 423967, 
                  }, name='area')
population = pd.Series({'California': 39538223, 'Texas': 29145505, 'Florida': 21438187, 
                        }, name='population')

# Series 運算範例
print(population/area)

# 檢查索引是否為兩個陣列的索引聯集
print(area.index.union(population.index))

# 有一方的資料為NaN 結果就會是NaN
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
print(A + B)
# 使用函式可以設定空值的預設值 指定空值為0
print(A.add(B, fill_value=0))

Alaska              NaN
California    93.257784
Florida             NaN
Texas         41.896072
dtype: float64
Index(['Alaska', 'California', 'Florida', 'Texas'], dtype='object')
0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64
0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64


In [12]:
A = pd.DataFrame(rng.integers(0, 20, (2, 2)),
                 columns=['a', 'b'])
print(A)
# B 的索引順序故意與 A 的不同
B = pd.DataFrame(rng.integers(0, 10, (3, 3)),
                 columns=['b','a','c'])
print(B)

# 運算結果會自動對齊 (按照UTF-8編碼順序)
print(A+B)

# 缺失值指定為 A 的所有元素的平均數
print(A.add(B, fill_value=A.values.mean()))

    a   b
0  14   7
1   1  19
   b  a  c
0  4  8  6
1  7  7  1
2  3  4  4
      a     b   c
0  22.0  11.0 NaN
1   8.0  26.0 NaN
2   NaN   NaN NaN
       a      b      c
0  22.00  11.00  16.25
1   8.00  26.00  11.25
2  14.25  13.25  14.25


In [37]:
# Numpy 的擴張運算
rng = np.random.default_rng(42)
A = rng.integers(10, size=(3, 4))
print(A)
print(A - A[0])

# Pandas 也是一樣的規則
df = pd.DataFrame(A, columns=['Q', 'R', 'S', 'T'])
print(df - df.iloc[0])

# Pandas 的逐欄運算方法
print(df.subtract(df['R'], axis=0))

# 會自動對齊 也會顯示NaN
halfrow = df.iloc[0, ::2]
print(halfrow)
print(df - halfrow)


[[0 7 6 4]
 [4 8 0 6]
 [2 0 5 9]]
[[ 0  0  0  0]
 [ 4  1 -6  2]
 [ 2 -7 -1  5]]
   Q  R  S  T
0  0  0  0  0
1  4  1 -6  2
2  2 -7 -1  5
   Q  R  S  T
0 -7  0 -1 -3
1 -4  0 -8 -2
2  2  0  5  9
Q    0
S    6
Name: 0, dtype: int64
     Q   R    S   T
0  0.0 NaN  0.0 NaN
1  4.0 NaN -6.0 NaN
2  2.0 NaN -1.0 NaN
