## Pandas Concat Data Frame

In [1]:
import pandas as pd
import numpy as np

### Prepare data

In [2]:
df1 = pd.DataFrame(np.ones((3,4))*0, columns=["a", "b", "c", "d"])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=["a", "b", "c", "d"])
df3 = pd.DataFrame(np.ones((3,4))*2, columns=["a", "b", "c", "d"])

In [3]:
df1

Unnamed: 0,a,b,c,d
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0


In [4]:
df2

Unnamed: 0,a,b,c,d
0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0


In [5]:
df3

Unnamed: 0,a,b,c,d
0,2.0,2.0,2.0,2.0
1,2.0,2.0,2.0,2.0
2,2.0,2.0,2.0,2.0


### Concatenating Data Frame

In [6]:
# Merge rows
res = pd.concat([df1, df2, df3], axis=0)
res

Unnamed: 0,a,b,c,d
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0
0,2.0,2.0,2.0,2.0
1,2.0,2.0,2.0,2.0
2,2.0,2.0,2.0,2.0


#### Merge rows ignoring index

In [7]:
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
res

Unnamed: 0,a,b,c,d
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0
5,1.0,1.0,1.0,1.0
6,2.0,2.0,2.0,2.0
7,2.0,2.0,2.0,2.0
8,2.0,2.0,2.0,2.0


In [8]:
# Merge cols
res = pd.concat([df1, df2, df3], axis=1)
res

Unnamed: 0,a,b,c,d,a.1,b.1,c.1,d.1,a.2,b.2,c.2,d.2
0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0
1,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0
2,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0


## Join Data Frame
- inner
- outer

### Prepare data

In [9]:
df1 = pd.DataFrame(np.ones((3,4))*0, columns=["a", "b", "c", "d"], index=[1, 2, 3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=["b", "c", "d", "e"], index=[2, 3, 4])

In [10]:
df1

Unnamed: 0,a,b,c,d
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0


In [11]:
df2

Unnamed: 0,b,c,d,e
2,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0


### If we concat directly, it will be by default outer join, missing value will be NaN

In [12]:
pd.concat([df1, df2], join="outer")

Unnamed: 0,a,b,c,d,e
1,0.0,0.0,0.0,0.0,
2,0.0,0.0,0.0,0.0,
3,0.0,0.0,0.0,0.0,
2,,1.0,1.0,1.0,1.0
3,,1.0,1.0,1.0,1.0
4,,1.0,1.0,1.0,1.0


### If inner join, NaN column will be removed

In [13]:
pd.concat([df1, df2], join="inner")

Unnamed: 0,b,c,d
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
2,1.0,1.0,1.0
3,1.0,1.0,1.0
4,1.0,1.0,1.0


### We can also ignore the row index for joining

In [14]:
pd.concat([df1, df2], join="inner", ignore_index=True)

Unnamed: 0,b,c,d
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,1.0,1.0,1.0
4,1.0,1.0,1.0
5,1.0,1.0,1.0


### join_axes is used for keep some index of data frame even it's NaN

In [15]:
pd.concat([df1, df2], axis=1, join_axes=[df1.index])

Unnamed: 0,a,b,c,d,b.1,c.1,d.1,e
1,0.0,0.0,0.0,0.0,,,,
2,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
3,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0


## Append data frame

### Prepare the data

In [16]:
df1 = pd.DataFrame(np.ones((3,4))*0, columns=["a", "b", "c", "d"])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=["a", "b", "c", "d"])
df3 = pd.DataFrame(np.ones((3,4))*0, columns=["b", "c", "d", "e"], index=[2,3,4])

In [17]:
df2

Unnamed: 0,a,b,c,d
0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0


In [18]:
df3

Unnamed: 0,b,c,d,e
2,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0


### Append data frame

In [19]:
df1.append(df2)

Unnamed: 0,a,b,c,d
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0


In [20]:
df1.append(df3)

Unnamed: 0,a,b,c,d,e
0,0.0,0.0,0.0,0.0,
1,0.0,0.0,0.0,0.0,
2,0.0,0.0,0.0,0.0,
2,,0.0,0.0,0.0,0.0
3,,0.0,0.0,0.0,0.0
4,,0.0,0.0,0.0,0.0


In [21]:
df1.append([df2, df3], ignore_index=True)

Unnamed: 0,a,b,c,d,e
0,0.0,0.0,0.0,0.0,
1,0.0,0.0,0.0,0.0,
2,0.0,0.0,0.0,0.0,
3,1.0,1.0,1.0,1.0,
4,1.0,1.0,1.0,1.0,
5,1.0,1.0,1.0,1.0,
6,,0.0,0.0,0.0,0.0
7,,0.0,0.0,0.0,0.0
8,,0.0,0.0,0.0,0.0


In [22]:
s1 = pd.Series([1,2,3,4], index=["a","b","c","d"])
s1

a    1
b    2
c    3
d    4
dtype: int64

In [23]:
df1.append(s1, ignore_index=True)

Unnamed: 0,a,b,c,d
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,1.0,2.0,3.0,4.0
