In [1]:
import pandas as pd
import numpy as np

### Merging DataFrames

In [5]:
employees = {

    'employee_id' : [1,2,3,4,5],
    'name' : ['Peter', 'Strange', 'Michael', 'Joe', 'Tom Cruise'],
    'department' : ['HR', 'IT', 'Finance', 'Sales', 'Electrical']
}

salaries = {
    'employee_id' : [1,2,3,6,7],
    'salary' : [60000, 80000, 50000, 65000, 35000],
    'bonus' : [5000, 10000, 7000, 6000, 12000]
}

In [6]:
employees = pd.DataFrame(employees)
salary = pd.DataFrame(salaries)

In [7]:
employees

Unnamed: 0,employee_id,name,department
0,1,Peter,HR
1,2,Strange,IT
2,3,Michael,Finance
3,4,Joe,Sales
4,5,Tom Cruise,Electrical


In [8]:
salary

Unnamed: 0,employee_id,salary,bonus
0,1,60000,5000
1,2,80000,10000
2,3,50000,7000
3,6,65000,6000
4,7,35000,12000


In [11]:
pd.merge(employees, salary, on='employee_id', how='inner')  #Inner Join

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Peter,HR,60000,5000
1,2,Strange,IT,80000,10000
2,3,Michael,Finance,50000,7000


In [12]:
pd.merge(employees, salary, on='employee_id', how='outer')  #Full Join

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Peter,HR,60000.0,5000.0
1,2,Strange,IT,80000.0,10000.0
2,3,Michael,Finance,50000.0,7000.0
3,4,Joe,Sales,,
4,5,Tom Cruise,Electrical,,
5,6,,,65000.0,6000.0
6,7,,,35000.0,12000.0


In [13]:
pd.merge(employees, salary, on='employee_id', how='left')  #Left Join

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Peter,HR,60000.0,5000.0
1,2,Strange,IT,80000.0,10000.0
2,3,Michael,Finance,50000.0,7000.0
3,4,Joe,Sales,,
4,5,Tom Cruise,Electrical,,


In [14]:
pd.merge(employees, salary, on='employee_id', how='right')  #Right Join

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Peter,HR,60000,5000
1,2,Strange,IT,80000,10000
2,3,Michael,Finance,50000,7000
3,6,,,65000,6000
4,7,,,35000,12000


### Concatenation of DataFrames

In [15]:
df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2'],
    'C': ['C0', 'C1', 'C2']
})

df2 = pd.DataFrame({
    'A': ['A3', 'A4', 'A5'],
    'B': ['B3', 'B4', 'B5'],
    'C': ['C3', 'C4', 'C5']
})

In [17]:
df1

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [18]:
pd.concat([df1, df2])  #Concat on the basis of columns

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [19]:
pd.concat([df2, df1])

Unnamed: 0,A,B,C
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [22]:
pd.concat([df1, df2], axis =1)  #Concat on the basis of rows

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,A0,B0,C0,A3,B3,C3
1,A1,B1,C1,A4,B4,C4
2,A2,B2,C2,A5,B5,C5


### Joining of DataFrames

In [23]:
df1 = pd.DataFrame({
    'name' : ['Alice', 'Bob', 'Charlie']
}, index = [1,2,3])

df2 = pd.DataFrame({
    'score' : [85, 90, 75]
}, index= [2,3,4])

In [24]:
df1

Unnamed: 0,name
1,Alice
2,Bob
3,Charlie


In [25]:
df2

Unnamed: 0,score
2,85
3,90
4,75


In [27]:
df1.join(df2, how='outer')

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0
4,,75.0
