# Merging and Joining Data

Data is divided in different Tables Pandas gives you the ability to merge the tables as like in SQL.

In [2]:
import pandas as pd

# Sample data
employees = pd.DataFrame({
    'EmployeeID': [1,2,3,4],
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'DepartmentID': [101, 102, 103, 104]
})

departments = pd.DataFrame({
    'DepartmentID': [101, 102, 105],
    'DepartmentName': ['HR', 'Engineering', 'Marketing']
})

In [3]:
# merging the dataframes on 'DepartmentID'
merged_data = pd.merge(employees, departments, on='DepartmentID') # This will perform an inner join by default
merged_data

Unnamed: 0,EmployeeID,Name,DepartmentID,DepartmentName
0,1,Alice,101,HR
1,2,Bob,102,Engineering


In [4]:
pd.merge(employees, departments, on='DepartmentID', how='left') # This will perform a left join, keeping all records from employees

Unnamed: 0,EmployeeID,Name,DepartmentID,DepartmentName
0,1,Alice,101,HR
1,2,Bob,102,Engineering
2,3,Charlie,103,
3,4,David,104,


In [6]:
pd.merge(employees, departments, on='DepartmentID', how='right') # This will perform a right join, keeping all records from departments

Unnamed: 0,EmployeeID,Name,DepartmentID,DepartmentName
0,1.0,Alice,101,HR
1,2.0,Bob,102,Engineering
2,,,105,Marketing


In [7]:
pd.merge(employees, departments, on='DepartmentID', how='outer') # This will perform a full outer join, keeping all records from both dataframes

Unnamed: 0,EmployeeID,Name,DepartmentID,DepartmentName
0,1.0,Alice,101,HR
1,2.0,Bob,102,Engineering
2,3.0,Charlie,103,
3,4.0,David,104,
4,,,105,Marketing


## Concatenating DataFrame

In [11]:
# Now i have two DataFrames 
# Vertical rows
df1 = pd.DataFrame({'Name' : ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'Name' : ['David', 'Eve', 'Frank']})
# Horizontal columns
df3 = pd.DataFrame({"ID" : [1,2,3]})
df4 = pd.DataFrame({"Score" : [85, 90, 95]})

# Concatenating vertically (stacking rows)
vertical_concat = pd.concat([df1, df2], ignore_index=True, axis=0) # ignore_index=True to reset the index
vertical_concat

# Concatenating horizontally (side by side)
horizontal_concat = pd.concat([df3, df4], axis=1) # axis=1 for horizontal concatenation
horizontal_concat


Unnamed: 0,ID,Score
0,1,85
1,2,90
2,3,95
