<img src = 'Concat_syntax.JPG'/>

In [85]:
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [86]:
df1 = pd.DataFrame({'A':['A0','A1','A2','A3'],
                    'B':['B0','B1','B2','B3'],
                    'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3'],
                    'E':['E0','E1','E2','E3']}
                    )
df1

Unnamed: 0,A,B,C,D,E
0,A0,B0,C0,D0,E0
1,A1,B1,C1,D1,E1
2,A2,B2,C2,D2,E2
3,A3,B3,C3,D3,E3


In [87]:
df2 = pd.DataFrame({'A':['A4','A5','A6','A7'],
                    'B':['B4','B5','B6','B7'],
                    'C':['C4','C5','C6','C7'],
                    'D':['D4','D5','D6','D7'],
                    'F':['F4','F5','F6','F7']}
                    )
df2

Unnamed: 0,A,B,C,D,F
0,A4,B4,C4,D4,F4
1,A5,B5,C5,D5,F5
2,A6,B6,C6,D6,F6
3,A7,B7,C7,D7,F7


default concat() parameters, axis=0, join=outer, ignore_index=False

In [88]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D,E,F
0,A0,B0,C0,D0,E0,
1,A1,B1,C1,D1,E1,
2,A2,B2,C2,D2,E2,
3,A3,B3,C3,D3,E3,
0,A4,B4,C4,D4,,F4
1,A5,B5,C5,D5,,F5
2,A6,B6,C6,D6,,F6
3,A7,B7,C7,D7,,F7


ingnore_index = True, ignore original indexes, and allocate new indexes for all the rows.

In [89]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,A,B,C,D,E,F
0,A0,B0,C0,D0,E0,
1,A1,B1,C1,D1,E1,
2,A2,B2,C2,D2,E2,
3,A3,B3,C3,D3,E3,
4,A4,B4,C4,D4,,F4
5,A5,B5,C5,D5,,F5
6,A6,B6,C6,D6,,F6
7,A7,B7,C7,D7,,F7


pick overlapped rows, drop everything else

In [90]:
pd.concat([df1, df2], ignore_index=True, join = 'inner')

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


merge a Dataframe with a series by column

In [91]:
s1 = pd.Series(list(range(4)), name = 'F')
pd.concat([df1,s1],axis=1)

Unnamed: 0,A,B,C,D,E,F
0,A0,B0,C0,D0,E0,0
1,A1,B1,C1,D1,E1,1
2,A2,B2,C2,D2,E2,2
3,A3,B3,C3,D3,E3,3


In [92]:
s2 = df1.apply(lambda x:x['A'] + '_GG', axis=1)
s2.name = 'G'

dataframe could be appended by couple of series

In [93]:
pd.concat([df1,s1,s2], axis=1)

Unnamed: 0,A,B,C,D,E,F,G
0,A0,B0,C0,D0,E0,0,A0_GG
1,A1,B1,C1,D1,E1,1,A1_GG
2,A2,B2,C2,D2,E2,2,A2_GG
3,A3,B3,C3,D3,E3,3,A3_GG


two series could be merged into a dataframe

In [94]:
pd.concat([s1,s2], axis=1)

Unnamed: 0,F,G
0,0,A0_GG
1,1,A1_GG
2,2,A2_GG
3,3,A3_GG


dataframe and series could be merged in mixed sequence

In [95]:
pd.concat([s1, df1, s2],axis=1)

Unnamed: 0,F,A,B,C,D,E,G
0,0,A0,B0,C0,D0,E0,A0_GG
1,1,A1,B1,C1,D1,E1,A1_GG
2,2,A2,B2,C2,D2,E2,A2_GG
3,3,A3,B3,C3,D3,E3,A3_GG


Append

In [96]:
df1 = pd.DataFrame([[1,2],[3,4]], columns = list('AB'))
df1

Unnamed: 0,A,B
0,1,2
1,3,4


In [97]:
df2 = pd.DataFrame([[5,6],[7,8]], columns = list('AB'))
df2

Unnamed: 0,A,B
0,5,6
1,7,8


In [98]:
df1.append(df2)

Unnamed: 0,A,B
0,1,2
1,3,4
0,5,6
1,7,8


In [99]:
df1.append(df2, ignore_index=True)

Unnamed: 0,A,B
0,1,2
1,3,4
2,5,6
3,7,8


add data row by row

In [100]:
df1 = pd.DataFrame(columns=['A'])
df1

Unnamed: 0,A


low performance version

In [101]:
for i in range(5):
    df1 = df1.append({'A':i},ignore_index=True)
df1

Unnamed: 0,A
0,0
1,1
2,2
3,3
4,4


high performance version, use more memory, but less time

In [102]:
pd.concat([pd.DataFrame([i], columns=['A']) for i in range (5)], ignore_index = True)

Unnamed: 0,A
0,0
1,1
2,2
3,3
4,4
