In [1]:
import pandas as pd

In [4]:
data = {
    'A': [1,2,3],
    'B': [4,5,6],
    'C': [7,8,9]
}

data2 = {
    'A': [10,11,12],
    'B': [13,14,15],
    'C': [16,17,18],
    'D': [19,20,21],

}

df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data2)

In [5]:
df1

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9


In [6]:
df2

Unnamed: 0,A,B,C,D
0,10,13,16,19
1,11,14,17,20
2,12,15,18,21


Above are two different datasets

Concatenating datasets

In [8]:
pd.concat([df1, df2]) # here I am concatinating df1 to df2

Unnamed: 0,A,B,C,D
0,1,4,7,
1,2,5,8,
2,3,6,9,
0,10,13,16,19.0
1,11,14,17,20.0
2,12,15,18,21.0


But we can also select axis when we are concatinating.
if we specify `axis = 1` then it will do it horizontally.

But by default `axis` is 0

In [9]:
pd.concat([df1,df2], axis=1)

Unnamed: 0,A,B,C,A.1,B.1,C.1,D
0,1,4,7,10,13,16,19
1,2,5,8,11,14,17,20
2,3,6,9,12,15,18,21


Giving keys to our dataset

In [11]:
pd.concat([df1,df2], keys=["Data1", "Data2"]) # this wat we will have multi index

# so, below Data1 and Data2 are acting as indexes.

Unnamed: 0,Unnamed: 1,A,B,C,D
Data1,0,1,4,7,
Data1,1,2,5,8,
Data1,2,3,6,9,
Data2,0,10,13,16,19.0
Data2,1,11,14,17,20.0
Data2,2,12,15,18,21.0


In [13]:
# setting axis to 1, will make the keys for colums.
pd.concat([df1,df2], keys=["Data1", "Data2"], axis=1) 

Unnamed: 0_level_0,Data1,Data1,Data1,Data2,Data2,Data2,Data2
Unnamed: 0_level_1,A,B,C,A,B,C,D
0,1,4,7,10,13,16,19
1,2,5,8,11,14,17,20
2,3,6,9,12,15,18,21


What if datasets have different indices

In [14]:
data = {
    'A': [1,2,3],
    'B': [4,5,6],
    'C': [7,8,9]
}

data2 = {
    'A': [10,11,12],
    'B': [13,14,15],
    'C': [16,17,18],
    'D': [19,20,21],

}

# creating data from of different indices

df1 = pd.DataFrame(data, index=[1,2,3])
df2 = pd.DataFrame(data2, index=[3,4,5])

In [15]:
# Notice indexes of both df1 and df2
df1

Unnamed: 0,A,B,C
1,1,4,7
2,2,5,8
3,3,6,9


In [16]:
df2

Unnamed: 0,A,B,C,D
3,10,13,16,19
4,11,14,17,20
5,12,15,18,21


In [18]:
# now as both df1 and df2 got different indexes number so, lets try to
# concatinate here
pd.concat([df1, df2]) # doesn't really change anything when index is 0

Unnamed: 0,A,B,C,D
1,1,4,7,
2,2,5,8,
3,3,6,9,
3,10,13,16,19.0
4,11,14,17,20.0
5,12,15,18,21.0


In [19]:
# but if we set axis to 1 then?
pd.concat([df1, df2], axis=1)

"""
As df1 doesn't have index 4,5 and df2 doesn't have indexes 1,2
so, for them on concatinating their value is NaN now

"""

Unnamed: 0,A,B,C,A.1,B.1,C.1,D
1,1.0,4.0,7.0,,,,
2,2.0,5.0,8.0,,,,
3,3.0,6.0,9.0,10.0,13.0,16.0,19.0
4,,,,11.0,14.0,17.0,20.0
5,,,,12.0,15.0,18.0,21.0


So, when we have different indexes then on concatinating both
dfs we end up with different missing values as above

But, if we have different column names then?

In [20]:
data = {
    'A': [1,2,3],
    'B': [4,5,6],
    'C': [7,8,9]
}

data2 = {
    'D': [10,11,12],
    'E': [13,14,15],
    'F': [16,17,18],
    'G': [19,20,21],

}
df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data2)

In [21]:
df1

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9


In [22]:
df2

Unnamed: 0,D,E,F,G
0,10,13,16,19
1,11,14,17,20
2,12,15,18,21


In [24]:
pd.concat([df1, df2])

# if df doesn't have those columns, for those columns it will be NaN

Unnamed: 0,A,B,C,D,E,F,G
0,1.0,4.0,7.0,,,,
1,2.0,5.0,8.0,,,,
2,3.0,6.0,9.0,,,,
0,,,,10.0,13.0,16.0,19.0
1,,,,11.0,14.0,17.0,20.0
2,,,,12.0,15.0,18.0,21.0


In [25]:
# now if we set axis to 1
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,D,E,F,G
0,1,4,7,10,13,16,19
1,2,5,8,11,14,17,20
2,3,6,9,12,15,18,21


What happens if we have different column names and different indices?

In [26]:
data = {
    'A': [1,2,3],
    'B': [4,5,6],
    'C': [7,8,9]
}

data2 = {
    'D': [10,11,12],
    'E': [13,14,15],
    'F': [16,17,18],
    'G': [19,20,21],
}

df1 = pd.DataFrame(data, index=[1,2,3])
df2 = pd.DataFrame(data2, index=[4,5,6])

In [27]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D,E,F,G
1,1.0,4.0,7.0,,,,
2,2.0,5.0,8.0,,,,
3,3.0,6.0,9.0,,,,
4,,,,10.0,13.0,16.0,19.0
5,,,,11.0,14.0,17.0,20.0
6,,,,12.0,15.0,18.0,21.0


join

In [28]:
data = {
    'A': [1,2,3],
    'B': [4,5,6],
    'C': [7,8,9]
}

data2 = {
    'A': [10,11,12],
    'B': [13,14,15],
    'C': [16,17,18],
    'D': [19,20,21],

}

df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data2)

In [29]:
# if we specify join
pd.concat([df1, df2], join='inner') # inner join means - it will concainte only matching columns.

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9
0,10,13,16
1,11,14,17
2,12,15,18
