## Merging, Joining, and Concatenating

In [2]:
import numpy as np
import pandas as pd

df1 = pd.DataFrame({ 
'employee_id': ['E1', 'E2', 'E3', 'E4'], 
'name': ['Alice', 'Bob', 'Charlie', 'David'] 
}) 
df2 = pd.DataFrame({ 
'employee_id': ['E1', 'E2', 'E5', 'E6'], 
'department': ['HR', 'IT', 'Finance', 'Marketing'] 
}) 
df3 = pd.DataFrame({ 
'manager_id': ['E1', 'E2', 'E3', 'E4'], 
'manager_name': ['Zoe', 'Yara', 'Xavier', 'William'] 
}) 
df4 = pd.DataFrame({ 
'employee_id': ['E5', 'E6', 'E7', 'E8'], 
'name': ['Eve', 'Frank', 'Grace', 'Heidi'] 
})

In [3]:
# 13. Perform an inner join on df1 and df2 using 'employee_id'. 
pd.merge(df1, df2, on='employee_id', how='inner')

Unnamed: 0,employee_id,name,department
0,E1,Alice,HR
1,E2,Bob,IT


In [4]:
# 14. Perform a left join with df1 as the left table and df2 as the right. 
pd.merge(df1, df2, on='employee_id', how='left')

Unnamed: 0,employee_id,name,department
0,E1,Alice,HR
1,E2,Bob,IT
2,E3,Charlie,
3,E4,David,


In [5]:
# 15. Perform a right join with df1 as the left table and df2 as the right. 
pd.merge(df1, df2, on='employee_id', how='right')

Unnamed: 0,employee_id,name,department
0,E1,Alice,HR
1,E2,Bob,IT
2,E5,,Finance
3,E6,,Marketing


In [6]:
# 16. Perform an outer join on df1 and df2 and fill any resulting NaN values with 'N/A'. 
pd.merge(df1, df2, on='employee_id', how='outer').fillna('N/A')

Unnamed: 0,employee_id,name,department
0,E1,Alice,HR
1,E2,Bob,IT
2,E3,Charlie,
3,E4,David,
4,E5,,Finance
5,E6,,Marketing


In [7]:
# 17. Concatenate df1 and df4 vertically. 
pd.concat([df1, df4], axis=0)

Unnamed: 0,employee_id,name
0,E1,Alice
1,E2,Bob
2,E3,Charlie
3,E4,David
0,E5,Eve
1,E6,Frank
2,E7,Grace
3,E8,Heidi


In [8]:
# 18. Join df1 and df3, where the key in df1 is 'employee_id' and the key in df3 is 'manager_id'. 
df1.merge(df3, left_on='employee_id', right_on='manager_id')

Unnamed: 0,employee_id,name,manager_id,manager_name
0,E1,Alice,E1,Zoe
1,E2,Bob,E2,Yara
2,E3,Charlie,E3,Xavier
3,E4,David,E4,William


In [17]:
# 19. Merge df1 and df2 and find out which employees are present in df1 but not in df2. 
merged = df1.merge(df2, on='employee_id', how='left', indicator=True)
only_in_df1 = merged[merged['_merge'] == 'left_only']

only_in_df1[['employee_id', 'name']]

Unnamed: 0,employee_id,name
2,E3,Charlie
3,E4,David


In [18]:
# 20. Create a df_salary DataFrame with 'employee_id' and 'salary' columns, then merge it with df1. 
df_salary = pd.DataFrame({
    'employee_id': ['E1','E2','E3','E4'],
    'salary': [70000,80000,90000,85000]
})
pd.merge(df1, df_salary, on='employee_id')

Unnamed: 0,employee_id,name,salary
0,E1,Alice,70000
1,E2,Bob,80000
2,E3,Charlie,90000
3,E4,David,85000


In [19]:
# 21. Concatenate df1 and df2 horizontally (axis=1). What do you observe about the indexes? 
pd.concat([df1, df2], axis=1)


Unnamed: 0,employee_id,name,employee_id.1,department
0,E1,Alice,E1,HR
1,E2,Bob,E2,IT
2,E3,Charlie,E5,Finance
3,E4,David,E6,Marketing


In [20]:
# 22. Merge df1 and df2 using an outer join and add an indicator column to see the source of each row.
pd.merge(df1, df2, on='employee_id', how='outer', indicator=True)

Unnamed: 0,employee_id,name,department,_merge
0,E1,Alice,HR,both
1,E2,Bob,IT,both
2,E3,Charlie,,left_only
3,E4,David,,left_only
4,E5,,Finance,right_only
5,E6,,Marketing,right_only


In [15]:
# 23. Create two simple DataFrames and join them based on their index. 
df_a = pd.DataFrame({'col1':[1,2,3]}, index=['a','b','c'])
df_b = pd.DataFrame({'col2':[4,5,6]}, index=['a','b','d'])
df_a.join(df_b, how='outer')

Unnamed: 0,col1,col2
a,1.0,4.0
b,2.0,5.0
c,3.0,
d,,6.0


In [16]:
# 24. You have df1 (employee names) and df2 (employee departments). Merge them to get a DataFrame showing the department for each employee. Then, merge this result with df3 to show each employee's manager. 
merged1 = pd.merge(df1, df2, on='employee_id', how='left')
final = pd.merge(merged1, df3, left_on='employee_id', right_on='manager_id', how='left')
final

Unnamed: 0,employee_id,name,department,manager_id,manager_name
0,E1,Alice,HR,E1,Zoe
1,E2,Bob,IT,E2,Yara
2,E3,Charlie,,E3,Xavier
3,E4,David,,E4,William
