In [1]:
import numpy as np
import pandas as pd 



## Merging 2 DataFrame
### 🔹 1. MERGE (combine using a common column)

In [3]:
employees = pd.DataFrame({
         'employee_id': [1, 2, 3, 4, 5],
         'name': ['John', 'Anna', 'Peter', 'Linda', 'Bob'],
         'department': ['HR', 'IT', 'Finance', 'IT', 'HR']
     })

  # DataFrame 2: Salary information
salaries = pd.DataFrame({
        'employee_id': [1, 2, 3, 6, 7],
        'salary': [60000, 80000, 65000, 70000, 90000],
        'bonus': [5000, 10000, 7000, 8000, 12000]
     }) 

In [4]:
employees

Unnamed: 0,employee_id,name,department
0,1,John,HR
1,2,Anna,IT
2,3,Peter,Finance
3,4,Linda,IT
4,5,Bob,HR


In [5]:
salaries


Unnamed: 0,employee_id,salary,bonus
0,1,60000,5000
1,2,80000,10000
2,3,65000,7000
3,6,70000,8000
4,7,90000,12000


In [13]:
pd.merge(employees,salaries, on="employee_id", how='inner')
# Merge on 'employee_id' → Only rows with common employee_id will come (default = inner join)

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


In [14]:
pd.merge(employees,salaries, on="employee_id",how='outer')

# Outer merge → all rows from both (fill NaN if missing)

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,
5,6,,,70000.0,8000.0
6,7,,,90000.0,12000.0


In [15]:
pd.merge(employees,salaries, on="employee_id",how='left')

# Left merge → all rows from df1 + matching from df2


Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,


In [17]:
pd.merge(employees,salaries, on="employee_id",how='right')

# Right merge → all rows from df2 + matching from df1

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000
3,6,,,70000,8000
4,7,,,90000,12000


## concatination of 2 DataFrame
### 🔹 2. CONCAT (stacking DataFrames)

In [20]:
df3 = pd.DataFrame({"A": [1, 2],
                    "B": [3, 4]})

df4 = pd.DataFrame({"A": [5, 6], 
                    "B": [7, 8]})

In [21]:
df3


Unnamed: 0,A,B
0,1,3
1,2,4


In [22]:
df4

Unnamed: 0,A,B
0,5,7
1,6,8


In [33]:
pd.concat([df3,df4])
# (a) Row-wise concat (like adding more rows at the bottom) 

Unnamed: 0,A,B
0,1,3
1,2,4
0,5,7
1,6,8


In [34]:
pd.concat([df3,df4],axis=1)
# (b) Column-wise concat (side by side)

Unnamed: 0,A,B,A.1,B.1
0,1,3,5,7
1,2,4,6,8


In [35]:
pd.concat([df3,df4],ignore_index=True)
# Row-wise concat but reset index
# ignore_index=True is used when you don’t care about old index values and want a clean, continuous new index after concatenation.

Unnamed: 0,A,B
0,1,3
1,2,4
2,5,7
3,6,8


## JOIN 2 Dataframe
### 🔹 3. JOIN (combine using index)

In [37]:

df5 = pd.DataFrame({"Maths": [90, 80, 70]}, index=["Amit", "Sneha", "Raj"])
df6 = pd.DataFrame({"Science": [85, 75, 95]}, index=["Sneha", "Raj", "Kiran"])

In [38]:
df5

Unnamed: 0,Maths
Amit,90
Sneha,80
Raj,70


In [39]:
df6

Unnamed: 0,Science
Sneha,85
Raj,75
Kiran,95


In [49]:
df5.join(df6)


Unnamed: 0,Maths,Science
Amit,90,
Sneha,80,85.0
Raj,70,75.0


In [46]:
df5.join(df6,how='inner')

# Inner join on index → only common names will be kept

Unnamed: 0,Maths,Science
Sneha,80,85
Raj,70,75


In [50]:
df5.join(df6,how="outer")

# Outer join → all from both, missing filled with NaN

Unnamed: 0,Maths,Science
Amit,90.0,
Kiran,,95.0
Raj,70.0,75.0
Sneha,80.0,85.0


In [52]:
df5.join(df6, how="left")
# Left join → all from df5, match from df6


Unnamed: 0,Maths,Science
Amit,90,
Sneha,80,85.0
Raj,70,75.0


In [54]:
df5.join(df6, how="right")

# right join → all from df6, match from df5


Unnamed: 0,Maths,Science
Sneha,80.0,85
Raj,70.0,75
Kiran,,95
