In [1]:
import numpy as np
import pandas as pd

In [None]:
# Merge the same data

In [2]:
df_1 = pd.DataFrame({'name':['Tom','Alice','Bill'], 'age':[23,25,26]})
df_2 = pd.DataFrame({'name':['Jack','Mike'], 'age':[21,23]})

In [3]:
df_1

Unnamed: 0,name,age
0,Tom,23
1,Alice,25
2,Bill,26


In [4]:
df_2

Unnamed: 0,name,age
0,Jack,21
1,Mike,23


In [5]:
pd.concat([df_1, df_2]) 

Unnamed: 0,name,age
0,Tom,23
1,Alice,25
2,Bill,26
0,Jack,21
1,Mike,23


In [6]:
# default axis=0
df = pd.concat([df_1, df_2],ignore_index=True) 
df

Unnamed: 0,name,age
0,Tom,23
1,Alice,25
2,Bill,26
3,Jack,21
4,Mike,23


In [None]:
# Merge dataframes with different columns

In [9]:
df_1 = pd.DataFrame({'name':['Tom','Alice','Bill'], 'age':[np.nan,25,26], 'salary':[1000,1200,1500]})
df_2 = pd.DataFrame({'name':['Jack','Mike'], 'age':[21,23], 'sex':['male','male']})

In [11]:
df_1

Unnamed: 0,name,age,salary
0,Tom,,1000
1,Alice,25.0,1200
2,Bill,26.0,1500


In [10]:
df_2

Unnamed: 0,name,age,sex
0,Jack,21,male
1,Mike,23,male


In [12]:
df = pd.concat([df_1, df_2], ignore_index=True,join='outer') 
df

Unnamed: 0,name,age,salary,sex
0,Tom,,1000.0,
1,Alice,25.0,1200.0,
2,Bill,26.0,1500.0,
3,Jack,21.0,,male
4,Mike,23.0,,male


In [13]:
df = pd.concat([df_1, df_2], ignore_index=True,join='inner') 
df

Unnamed: 0,name,age
0,Tom,
1,Alice,25.0
2,Bill,26.0
3,Jack,21.0
4,Mike,23.0


In [None]:
# Merge dataframes by column

In [14]:
df_1 = pd.DataFrame({'name':['Tom','Alice','Bill'], 'age':[23,np.nan,26]})
df_2 = pd.DataFrame({'sex':['male','female'], 'salary':[1000,1200]})

In [15]:
df_1

Unnamed: 0,name,age
0,Tom,23.0
1,Alice,
2,Bill,26.0


In [16]:
df_2

Unnamed: 0,sex,salary
0,male,1000
1,female,1200


In [17]:
df = pd.concat([df_1, df_2], axis=1, join='outer') 
df

Unnamed: 0,name,age,sex,salary
0,Tom,23.0,male,1000.0
1,Alice,,female,1200.0
2,Bill,26.0,,


In [18]:
df = pd.concat([df_1, df_2], axis=1, join='inner') 
df

Unnamed: 0,name,age,sex,salary
0,Tom,23.0,male,1000
1,Alice,,female,1200


In [19]:
# pandas.merge()
# Merge 2 dataframes with common columns
df_1 = pd.DataFrame({'name':['Tom','Alice','Bill'], 'age':[23,np.nan,26]})
df_2 = pd.DataFrame({'name':['Alice','Tom'], 'sex':['male','female'], 'salary':[1000,1200]})

In [20]:
df_1

Unnamed: 0,name,age
0,Tom,23.0
1,Alice,
2,Bill,26.0


In [21]:
df_2

Unnamed: 0,name,sex,salary
0,Alice,male,1000
1,Tom,female,1200


In [22]:
df = pd.merge(df_1, df_2,how='inner') # key: name
df

Unnamed: 0,name,age,sex,salary
0,Tom,23.0,female,1200
1,Alice,,male,1000


In [23]:
df = pd.merge(df_1, df_2,how='outer') # key: name
df

Unnamed: 0,name,age,sex,salary
0,Tom,23.0,female,1200.0
1,Alice,,male,1000.0
2,Bill,26.0,,


In [24]:
# Merge two dataframes with different column names
df_dep = pd.DataFrame({'depid':[1,2,3], 'depname':['IT','Sales','HR']})
df_emp = pd.DataFrame({'id':[100,101,102,103], 'name':['Mike', 'King', 'Billy','Jack'], 'department_id':[1,2,1,np.nan]})

In [25]:
df_emp

Unnamed: 0,id,name,department_id
0,100,Mike,1.0
1,101,King,2.0
2,102,Billy,1.0
3,103,Jack,


In [26]:
df_dep

Unnamed: 0,depid,depname
0,1,IT
1,2,Sales
2,3,HR


In [27]:
df = pd.merge(df_emp, df_dep, left_on='department_id', right_on='depid',how='inner')
df

Unnamed: 0,id,name,department_id,depid,depname
0,100,Mike,1.0,1,IT
1,102,Billy,1.0,1,IT
2,101,King,2.0,2,Sales


In [28]:
df = pd.merge(df_emp, df_dep, left_on='department_id', right_on='depid',how='outer')
df

Unnamed: 0,id,name,department_id,depid,depname
0,100.0,Mike,1.0,1.0,IT
1,102.0,Billy,1.0,1.0,IT
2,101.0,King,2.0,2.0,Sales
3,103.0,Jack,,,
4,,,,3.0,HR


In [30]:
# Merge dataframe with Left join
df_dep = pd.DataFrame({'depid':[1,2,3], 'depname':['IT','Sales','HR']})
df_emp = pd.DataFrame({'id':[100,101,102,103], 'name':['Mike', 'King', 'Billy','Jack'], 'depid':[1,2,1,np.nan]})

In [31]:
df_emp

Unnamed: 0,id,name,depid
0,100,Mike,1.0
1,101,King,2.0
2,102,Billy,1.0
3,103,Jack,


In [32]:
df_dep

Unnamed: 0,depid,depname
0,1,IT
1,2,Sales
2,3,HR


In [33]:
df = pd.merge(df_emp, df_dep, how='left')
df

Unnamed: 0,id,name,depid,depname
0,100,Mike,1.0,IT
1,101,King,2.0,Sales
2,102,Billy,1.0,IT
3,103,Jack,,


In [34]:
df = pd.merge(df_emp, df_dep, how='right')
df

Unnamed: 0,id,name,depid,depname
0,100.0,Mike,1.0,IT
1,102.0,Billy,1.0,IT
2,101.0,King,2.0,Sales
3,,,3.0,HR
