In [1]:
import numpy as np
import pandas as pd

In [2]:
def make_df(cols, ind):
    """一个简单的 DataFrame"""
    data = {c: [str(c) + str(i) for i in ind] for c in cols}
    return pd.DataFrame(data, ind)

# DataFrame 示例
make_df('ABC', range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


### 3.7.1 知识回顾：NumPy 数组的合并

In [4]:
x = [1, 2, 3]
y = [4, 5, 6]
z = [7, 8, 9]

np.concatenate([x, y, z])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
x = [[1, 2],
     [3, 4]]
np.concatenate([x, x], axis=1)

array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

### 3.7.2 通过 pd.concat 实现简易合并

In [7]:
ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])

pd.concat([ser1, ser2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [17]:
df1 = make_df('AB', [1, 2])
df2 = make_df('AB', [3, 4])

print(df1)
print(df2)
print(pd.concat([df1, df2]))

    A   B
1  A1  B1
2  A2  B2
    A   B
3  A3  B3
4  A4  B4
    A   B
1  A1  B1
2  A2  B2
3  A3  B3
4  A4  B4


In [22]:
df3 = make_df('AB', [0, 1])
df4 = make_df('CD', [0, 1])

print(df3)
print(df4)
print(pd.concat([df3, df4], axis=1))

    A   B
0  A0  B0
1  A1  B1
    C   D
0  C0  D0
1  C1  D1
    A   B   C   D
0  A0  B0  C0  D0
1  A1  B1  C1  D1


#### 1. 索引重复

In [24]:
x = make_df('AB', [0, 1])
y = make_df('AB', [2, 3])
y.index = x.index  # 复制索引
print(x)
print(y)
print(pd.concat([x, y]))

    A   B
0  A0  B0
1  A1  B1
    A   B
0  A2  B2
1  A3  B3
    A   B
0  A0  B0
1  A1  B1
0  A2  B2
1  A3  B3


(1) 捕捉索引重复的错误

In [25]:
try:
    pd.concat([x, y], verify_integrity=True)
except ValueError as e:
    print("ValueError: ", e)

ValueError:  Indexes have overlapping values: Int64Index([0, 1], dtype='int64')


(2) 忽略索引

In [26]:
pd.concat([x, y], ignore_index=True)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


(3) 增加多层索引

In [27]:
pd.concat([x, y], keys=['x', 'y'])

Unnamed: 0,Unnamed: 1,A,B
x,0,A0,B0
x,1,A1,B1
y,0,A2,B2
y,1,A3,B3


#### 2. 类似 join 的合并

In [33]:
df5 = make_df('ABC', [1, 2])
df6 = make_df('BCE', [3, 4])

print(df5)
print(df6)
print(pd.concat([df5, df6], sort=True))

    A   B   C
1  A1  B1  C1
2  A2  B2  C2
    B   C   E
3  B3  C3  E3
4  B4  C4  E4
     A   B   C    E
1   A1  B1  C1  NaN
2   A2  B2  C2  NaN
3  NaN  B3  C3   E3
4  NaN  B4  C4   E4


In [34]:
pd.concat([df5, df6], join='inner')

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [35]:
pd.concat([df5, df6], join_axes=[df5.columns])

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,,B3,C3
4,,B4,C4


#### 3. append() 方法

In [37]:
print(df1)
print(df2)
print(df1.append(df2))

    A   B
1  A1  B1
2  A2  B2
    A   B
3  A3  B3
4  A4  B4
    A   B
1  A1  B1
2  A2  B2
3  A3  B3
4  A4  B4
