In [3]:
import pandas as pd
import numpy as np

# 定義一個可以快速DataFrame 的函式
def make_df(cols, ind):
    data = {c:[str(c) + str(i) for i in ind] for c in cols}
    return pd.DataFrame(data, ind)

# 測試make_df 函式
print(make_df('ABC', range(3)))

    A   B   C
0  A0  B0  C0
1  A1  B1  C1
2  A2  B2  C2


In [37]:
# 定義一個可以並排顯示 DataFrame 的類別
class display(object):
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
    
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)

In [6]:
# numpy 的串接方法 np.concatenate
x = [1, 2, 3]
y = [4, 5, 6]
z = [7, 8, 9]
# 合併為同一個串列
print(np.concatenate([x, y, z]))

# 指定串接的軸 預設為第0軸
x = [[1, 2],
     [3, 4]]
print(np.concatenate([x, x], axis=1))

[1 2 3 4 5 6 7 8 9]
[[1 2 1 2]
 [3 4 3 4]]


In [None]:
# Signature in Pandas 2.2.3
pd.concat(object, axis=0, join='outer', ignore_index=False, keys=None, 
          levels=None, names=None, verify_integrity=False, 
          sort=False, copy=None) 

In [22]:
# 可用於 Series 與 DataFrame 物件串接
ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])
print(pd.concat([ser1, ser2]))

df1 = make_df('AB', [1, 2])
df2 = make_df('AB', [3, 4])
display('df1', 'df2', 'pd.concat([df1, df2])')



1    A
2    B
3    C
4    D
5    E
6    F
dtype: object


Unnamed: 0,A,B
1,A1,B1
2,A2,B2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [35]:
# 預設是從第0軸串接 也可指定為第1軸
df3 = make_df('AB', [0, 1])
df4 = make_df('CD', [0, 1])
display('df3', 'df4', "pd.concat([df3, df4], axis='columns')")

Unnamed: 0,A,B
0,A0,B0
1,A1,B1

Unnamed: 0,C,D
0,C0,D0
1,C1,D1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1


In [38]:
# pd.concat 會保留索引
x = make_df('AB', [0, 1])
y = make_df('AB', [2, 3])
# 將y 的index 設定與x 的index 相同
y.index = x.index
display('x','y', 'pd.concat([x, y])')

Unnamed: 0,A,B
0,A0,B0
1,A1,B1

Unnamed: 0,A,B
0,A2,B2
1,A3,B3

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
0,A2,B2
1,A3,B3


In [40]:
# 拋出重複索引的例外
try:
    pd.concat([x, y], verify_integrity=True)
except ValueError as e:
    print("ValueError",e)

ValueError Indexes have overlapping values: Index([0, 1], dtype='int64')


In [42]:
# 自動重編整數索引
display('x','y', 'pd.concat([x, y],ignore_index=True)')

Unnamed: 0,A,B
0,A0,B0
1,A1,B1

Unnamed: 0,A,B
0,A2,B2
1,A3,B3

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [43]:
# 指定keys 進行辨識
display('x','y', 'pd.concat([x, y], keys=["x", "y"])')

Unnamed: 0,A,B
0,A0,B0
1,A1,B1

Unnamed: 0,A,B
0,A2,B2
1,A3,B3

Unnamed: 0,Unnamed: 1,A,B
x,0,A0,B0
x,1,A1,B1
y,0,A2,B2
y,1,A3,B3


In [44]:
# 預設為聯集
df5 = make_df('ABC', [1, 2])
df6 = make_df('BCD', [3, 4])
display('df5', 'df6', 'pd.concat([df5, df6])')

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


In [45]:
# 可改為交集
display('df5', 'df6', 'pd.concat([df5, df6], join="inner")')

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [48]:
# 或是使用reindex 方法決定要捨棄那些欄位
display('df5', 'df6', 'pd.concat([df5, df6.reindex(df5.columns,axis=1)])')


Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,,B3,C3
4,,B4,C4
