## concat(沿轴向连接)

In [78]:
import pandas as pd
import numpy as np

In [79]:
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=list('abc'),
                   columns=['one', 'two'])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=list('ac'),
                   columns=['two', 'four'])

In [80]:
print(df1, end='\n\n')
print(df2)

   one  two
a    0    1
b    2    3
c    4    5

   two  four
a    5     6
c    7     8


In [81]:
pd.concat([df1, df2])  # 默认axis=0;join='outer'

Unnamed: 0,one,two,four
a,0.0,1,
b,2.0,3,
c,4.0,5,
a,,5,6.0
c,,7,8.0


In [82]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,one,two,two.1,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [83]:
pd.concat([df1, df2], axis=1, join='inner')

Unnamed: 0,one,two,two.1,four
a,0,1,5,6
c,4,5,7,8


In [84]:
# 连接轴上创建分层索引
pd.concat([df1, df2], keys=['leval1', 'leval2'])

Unnamed: 0,Unnamed: 1,one,two,four
leval1,a,0.0,1,
leval1,b,2.0,3,
leval1,c,4.0,5,
leval2,a,,5,6.0
leval2,c,,7,8.0


In [85]:
# 连接轴上创建分层索引
pd.concat([df1, df2], axis=1, keys=['leval1', 'leval2'])

Unnamed: 0_level_0,leval1,leval1,leval2,leval2
Unnamed: 0_level_1,one,two,two,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [86]:
pd.concat({'leval1': df1, 'leval2': df2}, axis=1)  # 与上等价

Unnamed: 0_level_0,leval1,leval1,leval2,leval2
Unnamed: 0_level_1,one,two,two,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [87]:
pd.concat([df1, df2], axis=0, keys=['leval1', 'leval2'], join='inner')  # 拼接列名相同的列

Unnamed: 0,Unnamed: 1,two
leval1,a,1
leval1,b,3
leval1,c,5
leval2,a,5
leval2,c,7


In [88]:
pd.concat([df1, df2], axis=1, keys=['leval1', 'leval2'], join='inner')  # 拼接索引相同的索引

Unnamed: 0_level_0,leval1,leval1,leval2,leval2
Unnamed: 0_level_1,one,two,two,four
a,0,1,5,6
c,4,5,7,8


In [89]:
pd.concat([df1, df2], axis=1, keys=['level1', 'level2'], join='inner',
          names=['upper', 'lower'])  # 多层索引的层级名称

upper,level1,level1,level2,level2
lower,one,two,two,four
a,0,1,5,6
c,4,5,7,8


In [90]:
df3 = pd.DataFrame(np.random.randn(3, 4), columns=list('abcd'))
df4 = pd.DataFrame(np.random.randn(2, 3), columns=list('bda'))

print(df3, '\n\n', df4)

          a         b         c         d
0  0.430875 -0.529950  0.949861 -0.367723
1 -1.001886 -0.511487 -1.374737 -0.420403
2  0.100413  0.246785 -0.205342 -1.024649 

           b         d         a
0 -0.184968 -1.629370 -0.278562
1  0.742702 -0.965977  0.543584


In [91]:
# 默认ignore_index=False
pd.concat([df3, df4])

Unnamed: 0,a,b,c,d
0,0.430875,-0.52995,0.949861,-0.367723
1,-1.001886,-0.511487,-1.374737,-0.420403
2,0.100413,0.246785,-0.205342,-1.024649
0,-0.278562,-0.184968,,-1.62937
1,0.543584,0.742702,,-0.965977


In [92]:
pd.concat([df3, df4], ignore_index=True)  # 产生一段新的索引


Unnamed: 0,a,b,c,d
0,0.430875,-0.52995,0.949861,-0.367723
1,-1.001886,-0.511487,-1.374737,-0.420403
2,0.100413,0.246785,-0.205342,-1.024649
3,-0.278562,-0.184968,,-1.62937
4,0.543584,0.742702,,-0.965977
