In [4]:
import numpy as np
import pandas as pd

### Merge

In [5]:
employees = pd.DataFrame({
    'employee_id': [1, 2, 3, 4, 5],
    'name': ['John', 'Anna', 'Peter', 'Linda', 'Bob'],
    'department': ['HR', 'IT', 'Finance', 'IT', 'HR']
})

salaries = pd.DataFrame({
    'employee_id': [1, 2, 3, 6, 7],
    'salary': [68080, 80000, 65000, 70000, 90000],
    'bonus': [5000, 10000, 7000, 8000, 12000]
})

In [6]:
employees

Unnamed: 0,employee_id,name,department
0,1,John,HR
1,2,Anna,IT
2,3,Peter,Finance
3,4,Linda,IT
4,5,Bob,HR


In [7]:
salaries

Unnamed: 0,employee_id,salary,bonus
0,1,68080,5000
1,2,80000,10000
2,3,65000,7000
3,6,70000,8000
4,7,90000,12000


In [None]:
pd.merge(employees, salaries) # here by-default, merge take common by employee_id

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,68080,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


In [None]:
pd.merge(employees, salaries, on="employee_id") # here merge by employee_id, because employee_id id common factor

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,68080,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


In [None]:
pd.merge(employees, salaries, on="name") # get an error, bcz name is not common both dataset

KeyError: 'name'

In [None]:
pd.merge(employees, salaries, on="employee_id") # by default -->(how="inner")

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,68080,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


In [None]:
pd.merge(employees, salaries, on="employee_id", how="inner") # (how="inner") --> common parts

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,68080,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


In [16]:
pd.merge(employees, salaries, on="employee_id", how="outer") # ( how="outer") --> all parts

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,68080.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,
5,6,,,70000.0,8000.0
6,7,,,90000.0,12000.0


In [None]:
pd.merge(employees, salaries, on="employee_id", how="left") # ( how="left") --> it is work by employees basis

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,68080.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,


In [None]:
pd.merge(employees, salaries, on="employee_id", how="right") # ( how="left") --> it is work by salaries basis

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,68080,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000
3,6,,,70000,8000
4,7,,,90000,12000


### Concatenation

In [18]:
df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'], 
    'B': ['B0', 'B1', 'B2'], 
    'C': ['C0', 'C1', 'C2']
})

df2 = pd.DataFrame({
    'A': ['A3', 'A4', 'A5'], 
    'B': ['B3', 'B4', 'B5'], 
    'C': ['C3', 'C4', 'C5']
})

In [19]:
df1

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [20]:
df2

Unnamed: 0,A,B,C
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [22]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [23]:
pd.concat([df2, df1])

Unnamed: 0,A,B,C
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [25]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,A0,B0,C0,A3,B3,C3
1,A1,B1,C1,A4,B4,C4
2,A2,B2,C2,A5,B5,C5


### Joining

In [26]:
df1 = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie']
}, index=[1, 2, 3])

# Second DataFrame with custom index
df2 = pd.DataFrame({
    'score': [85, 98, 75]
}, index=[2, 3, 4])

In [27]:
df1

Unnamed: 0,name
1,Alice
2,Bob
3,Charlie


In [28]:
df2

Unnamed: 0,score
2,85
3,98
4,75


In [None]:
df1.join(df2) # those are common in df1

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,98.0


In [30]:
df2.join(df1) # those are common in df2

Unnamed: 0,score,name
2,85,Bob
3,98,Charlie
4,75,


In [31]:
df1.join(df2, how="inner")

Unnamed: 0,name,score
2,Bob,85
3,Charlie,98


In [32]:
df1.join(df2, how="outer")

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,98.0
4,,75.0
