In [1]:
import pandas as pd
import numpy as np

In [2]:
def make_df(cols,ind):
    data = {c:[str(c)+str(i) for i in ind] for c in cols}
    return pd.DataFrame(data,ind)

make_df('ABC',range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


# Numpy Concatenate

In [3]:
x = [1,2,3]
y = [4,5,6]
z = [7,8,9] 
A1 = np.concatenate([x,y,z])
print(A1)

a = [[1,2],[3,4]]
A2 = np.concatenate([a,a],axis=1)
print(A2)

[1 2 3 4 5 6 7 8 9]
[[1 2 1 2]
 [3 4 3 4]]


# pd.concat

In [4]:
# series concat
ser1 = pd.Series(['A','B','C'], index = [1,2,3])
ser2 = pd.Series(['D','E','F'], index = [4,5,6])
pd.concat([ser1,ser2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [5]:
# concat by row
df1 = make_df('AB',[1,2])
df2 = make_df('AB',[3,4])
print(df1)
print(df2)
pd.concat([df1,df2])

    A   B
1  A1  B1
2  A2  B2
    A   B
3  A3  B3
4  A4  B4


Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [6]:
# concat by column
df3 = make_df('CD',[1,2])
print(df1)
print(df3)
pd.concat([df1,df3],axis=1)

    A   B
1  A1  B1
2  A2  B2
    C   D
1  C1  D1
2  C2  D2


Unnamed: 0,A,B,C,D
1,A1,B1,C1,D1
2,A2,B2,C2,D2


In [7]:
# duplicate index
df2.index = df1.index
print(df1)
print(df2)
pd.concat([df1,df2])

    A   B
1  A1  B1
2  A2  B2
    A   B
1  A3  B3
2  A4  B4


Unnamed: 0,A,B
1,A1,B1
2,A2,B2
1,A3,B3
2,A4,B4


In [8]:
# ignore index
pd.concat([df1,df2],ignore_index=True)

Unnamed: 0,A,B
0,A1,B1
1,A2,B2
2,A3,B3
3,A4,B4


In [9]:
pd.concat([df1,df2], keys=['x','y'])

Unnamed: 0,Unnamed: 1,A,B
x,1,A1,B1
x,2,A2,B2
y,1,A3,B3
y,2,A4,B4


# pd.concat.join

In [10]:
# default (join='outer')
df4 = make_df('ABC',[1,2])
df5 = make_df('BCD',[3,4])
print(df4)
print(df5)
pd.concat([df4,df5])

    A   B   C
1  A1  B1  C1
2  A2  B2  C2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


In [11]:
# inner join
pd.concat([df4,df5],join='inner')

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [12]:
# join_axes is similar to left/right join
print(pd.concat([df4,df5],join_axes=[df4.columns]))    #left join
print(pd.concat([df4,df5],join_axes=[df5.columns]))    #right join

     A   B   C
1   A1  B1  C1
2   A2  B2  C2
3  NaN  B3  C3
4  NaN  B4  C4
    B   C    D
1  B1  C1  NaN
2  B2  C2  NaN
3  B3  C3   D3
4  B4  C4   D4


# pd.append

In [13]:
df1.append(df2)
#  it's not like list's append. everytime as u use append on df, it'll create new obj.

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
1,A3,B3
2,A4,B4
