### Concatenating Datasets using .concat()

In [1]:
import pandas as pd

In [2]:
data = {'A':[1, 2, 3],
        'B':[4, 5, 6],
        'C':[7, 8, 9]}

data2 = {'A':[10, 11, 12],
         'B':[13, 14, 15],
         'C':[16, 17, 18],
         'D':[19, 20, 21]}

df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data2)

In [3]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D
0,1,4,7,
1,2,5,8,
2,3,6,9,
0,10,13,16,19.0
1,11,14,17,20.0
2,12,15,18,21.0


In [4]:
pd.concat([df1, df2], axis=1)       # If we set axis=1, then it will do it horizontally

Unnamed: 0,A,B,C,A.1,B.1,C.1,D
0,1,4,7,10,13,16,19
1,2,5,8,11,14,17,20
2,3,6,9,12,15,18,21


#### Giving keys to our datasets

In [5]:
pd.concat([df1, df2], keys=['Data 1', 'Data 2'])

Unnamed: 0,Unnamed: 1,A,B,C,D
Data 1,0,1,4,7,
Data 1,1,2,5,8,
Data 1,2,3,6,9,
Data 2,0,10,13,16,19.0
Data 2,1,11,14,17,20.0
Data 2,2,12,15,18,21.0


In [6]:
pd.concat([df1, df2], keys=['Data 1', 'Data 2'], axis=1)

Unnamed: 0_level_0,Data 1,Data 1,Data 1,Data 2,Data 2,Data 2,Data 2
Unnamed: 0_level_1,A,B,C,A,B,C,D
0,1,4,7,10,13,16,19
1,2,5,8,11,14,17,20
2,3,6,9,12,15,18,21


#### Ignoring the index

In [7]:
pd.concat([df1, df2], ignore_index=True)    # Will reset the index and start it from 0

Unnamed: 0,A,B,C,D
0,1,4,7,
1,2,5,8,
2,3,6,9,
3,10,13,16,19.0
4,11,14,17,20.0
5,12,15,18,21.0


#### What happens when we have different indices

In [13]:
data = {'A':[1, 2, 3],
        'B':[4, 5, 6],
        'C':[7, 8, 9]}

data2 = {'A':[10, 11, 12],
         'B':[13, 14, 15],
         'C':[16, 17, 18],
         'D':[19, 20, 21]}

df1 = pd.DataFrame(data, index=[1, 2, 3])
df2 = pd.DataFrame(data2, index=[4, 5, 6])

In [14]:
pd.concat([df1, df2])      # Doesn't really change anything here

Unnamed: 0,A,B,C,D
1,1,4,7,
2,2,5,8,
3,3,6,9,
4,10,13,16,19.0
5,11,14,17,20.0
6,12,15,18,21.0


In [15]:
pd.concat([df1, df2], axis=1)      # But note what happens on axis=1

Unnamed: 0,A,B,C,A.1,B.1,C.1,D
1,1.0,4.0,7.0,,,,
2,2.0,5.0,8.0,,,,
3,3.0,6.0,9.0,,,,
4,,,,10.0,13.0,16.0,19.0
5,,,,11.0,14.0,17.0,20.0
6,,,,12.0,15.0,18.0,21.0


#### What happens if we have different column names?

In [18]:
data = {'A':[1, 2, 3],
        'B':[4, 5, 6],
        'C':[7, 8, 9]}

data2 = {'D':[10, 11, 12],
         'E':[13, 14, 15],
         'F':[16, 17, 18],
         'G':[19, 20, 21]}

df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data2)

In [19]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D,E,F,G
0,1.0,4.0,7.0,,,,
1,2.0,5.0,8.0,,,,
2,3.0,6.0,9.0,,,,
0,,,,10.0,13.0,16.0,19.0
1,,,,11.0,14.0,17.0,20.0
2,,,,12.0,15.0,18.0,21.0


In [21]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,D,E,F,G
0,1,4,7,10,13,16,19
1,2,5,8,11,14,17,20
2,3,6,9,12,15,18,21


#### What happens if we have both different column names and indices names

In [23]:
data = {'A':[1, 2, 3],
        'B':[4, 5, 6],
        'C':[7, 8, 9]}

data2 = {'D':[10, 11, 12],
         'E':[13, 14, 15],
         'F':[16, 17, 18],
         'G':[19, 20, 21]}

df1 = pd.DataFrame(data, index=[1, 2, 3])
df2 = pd.DataFrame(data2, index=[4, 5, 6])

In [24]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D,E,F,G
1,1.0,4.0,7.0,,,,
2,2.0,5.0,8.0,,,,
3,3.0,6.0,9.0,,,,
4,,,,10.0,13.0,16.0,19.0
5,,,,11.0,14.0,17.0,20.0
6,,,,12.0,15.0,18.0,21.0


In [25]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,D,E,F,G
1,1.0,4.0,7.0,,,,
2,2.0,5.0,8.0,,,,
3,3.0,6.0,9.0,,,,
4,,,,10.0,13.0,16.0,19.0
5,,,,11.0,14.0,17.0,20.0
6,,,,12.0,15.0,18.0,21.0


#### Using join with tyhe original dataset

In [26]:
data = {'A':[1, 1, 1],
        'B':[2, 2, 2],
        'C':[3, 3, 3]}

data2 = {'A':[2, 2, 2],
        'B':[3, 3, 3],
        'D':[5, 5, 5]}

df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data2)

In [27]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D
0,1,2,3.0,
1,1,2,3.0,
2,1,2,3.0,
0,2,3,,5.0
1,2,3,,5.0
2,2,3,,5.0


In [28]:
pd.concat([df1, df2], join='inner')  # Will only concatenate column that are shared by both datasets

Unnamed: 0,A,B
0,1,2
1,1,2
2,1,2
0,2,3
1,2,3
2,2,3
