In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.float_format', '{:.2f}'.format)

## Concatenate Data Along Rows

dataframe1 = pd.DataFrame({
    'Employee_ID': [101, 102],
    'Name': ['Alice', 'Bob'],
    'Department': ['HR', 'Finance']
})

dataframe2 = pd.DataFrame({
    'Employee_ID': [103, 104],
    'Name': ['Charlie', 'David'],
    'Department': ['Marketing', 'IT']
})

In [3]:
concatenated_rows = pd.concat([dataframe1, dataframe2], axis=0)
concatenated_rows

Unnamed: 0,Employee_ID,Name,Department
0,101,Alice,HR
1,102,Bob,Finance
0,103,Charlie,Marketing
1,104,David,IT


## Concatenate Data Along Columns

In [4]:
dataframe1 = pd.DataFrame({
    'Employee_ID': [101, 102],
    'Name': ['Alice', 'Bob']
})

dataframe2 = pd.DataFrame({
    'Salary': [50000, 60000],
    'Department': ['HR', 'Finance']
})

In [5]:
concatenated_columns = pd.concat([dataframe1, dataframe2], axis=1)
concatenated_columns

Unnamed: 0,Employee_ID,Name,Salary,Department
0,101,Alice,50000,HR
1,102,Bob,60000,Finance


## Concatenate Dataframes with Missing Values

In [6]:
dataframe1 = pd.DataFrame({
    'Employee_ID': [101, 102],
    'Name': ['Alice', 'Bob']
})

dataframe2 = pd.DataFrame({
    'Name': ['Charlie', 'David'],
    'Salary': [60000, 75000]
})

In [7]:
dataframe1

Unnamed: 0,Employee_ID,Name
0,101,Alice
1,102,Bob


In [8]:
dataframe2

Unnamed: 0,Name,Salary
0,Charlie,60000
1,David,75000


In [9]:
concatenated_with_missing = pd.concat([dataframe1, dataframe2])
concatenated_with_missing

Unnamed: 0,Employee_ID,Name,Salary
0,101.0,Alice,
1,102.0,Bob,
0,,Charlie,60000.0
1,,David,75000.0


## Inner Joins

In [10]:
orders_data = pd.DataFrame({
    'Order_ID': [101, 102, 103],
    'Customer_ID': [201, 202, 203],
    'Product': ['A', 'B', 'C']
})

customers_data = pd.DataFrame({
    'Customer_ID': [201, 202, 204],
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Location': ['New York', 'San Francisco', 'Los Angeles']
})

In [11]:
orders_data

Unnamed: 0,Order_ID,Customer_ID,Product
0,101,201,A
1,102,202,B
2,103,203,C


In [12]:
customers_data

Unnamed: 0,Customer_ID,Name,Location
0,201,Alice,New York
1,202,Bob,San Francisco
2,204,Charlie,Los Angeles


In [13]:
inner_join_result = pd.merge(orders_data, customers_data, on='Customer_ID', how='inner')
inner_join_result

Unnamed: 0,Order_ID,Customer_ID,Product,Name,Location
0,101,201,A,Alice,New York
1,102,202,B,Bob,San Francisco


## Outer Joins

In [14]:
employees = pd.DataFrame({
    'Employee_ID': [101, 102, 103, 104],
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Department_ID': [1, 2, 1, 3]
})

departments = pd.DataFrame({
    'Department_ID': [1, 2, 4],
    'Department_Name': ['HR', 'Finance', 'Marketing']
})

In [16]:
employees

Unnamed: 0,Employee_ID,Name,Department_ID
0,101,Alice,1
1,102,Bob,2
2,103,Charlie,1
3,104,David,3


In [17]:
departments

Unnamed: 0,Department_ID,Department_Name
0,1,HR
1,2,Finance
2,4,Marketing


In [19]:
outer_joined = pd.merge(employees, departments, on='Department_ID', how='outer')
outer_joined

Unnamed: 0,Employee_ID,Name,Department_ID,Department_Name
0,101.0,Alice,1,HR
1,103.0,Charlie,1,HR
2,102.0,Bob,2,Finance
3,104.0,David,3,
4,,,4,Marketing


## Left Outer Join

In [20]:
left_joined = pd.merge(employees, departments, on='Department_ID', how='left')
left_joined

Unnamed: 0,Employee_ID,Name,Department_ID,Department_Name
0,101,Alice,1,HR
1,102,Bob,2,Finance
2,103,Charlie,1,HR
3,104,David,3,


## Right Join

In [21]:
right_joined = pd.merge(employees, departments, on='Department_ID', how='right')
right_joined

Unnamed: 0,Employee_ID,Name,Department_ID,Department_Name
0,101.0,Alice,1,HR
1,103.0,Charlie,1,HR
2,102.0,Bob,2,Finance
3,,,4,Marketing


## Joining Dataframes on Their Index

In [22]:
students = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [21, 22, 20]
}, index=[101, 102, 103])

scores = pd.DataFrame({
    'Math': [95, 85, 78],
    'Science': [89, 92, 81]
}, index=[101, 102, 104]) # Note the missing index 103

In [23]:
students

Unnamed: 0,Name,Age
101,Alice,21
102,Bob,22
103,Charlie,20


In [24]:
scores

Unnamed: 0,Math,Science
101,95,89
102,85,92
104,78,81


In [25]:
inner_joined = pd.merge(students, scores, left_index=True, right_index=True, how='inner')
inner_joined

Unnamed: 0,Name,Age,Math,Science
101,Alice,21,95,89
102,Bob,22,85,92


## Merging on Multiple Columns

In [26]:
students = pd.DataFrame({
    'StudentID': [1, 1, 2, 2],
    'Subject': ['Math', 'History', 'Math', 'History'],
    'Name': ['Alice', 'Alice', 'Bob', 'Bob']
})

scores = pd.DataFrame({
    'StudentID': [1, 1, 2, 2],
    'Subject': ['Math', 'History', 'Math', 'History'],
    'Score': [90, 85, 78, 88]
})

In [27]:
students

Unnamed: 0,StudentID,Subject,Name
0,1,Math,Alice
1,1,History,Alice
2,2,Math,Bob
3,2,History,Bob


In [28]:
scores

Unnamed: 0,StudentID,Subject,Score
0,1,Math,90
1,1,History,85
2,2,Math,78
3,2,History,88


In [29]:
merged_df = pd.merge(students, scores, on=['StudentID', 'Subject'], how='inner')
merged_df

Unnamed: 0,StudentID,Subject,Name,Score
0,1,Math,Alice,90
1,1,History,Alice,85
2,2,Math,Bob,78
3,2,History,Bob,88
