# Combining and Merging Data - Solutions

Using concat() for concatenation, merge() for SQL-style joins, and join() for index-based joins.

## Question 1
Create two DataFrames with the same columns and concatenate them vertically using pd.concat().

In [None]:
import pandas as pd
import numpy as np

df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})

print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)

result = pd.concat([df1, df2], ignore_index=True)
print("\nConcatenated vertically:")
print(result)

## Question 2
Create two DataFrames with different columns and concatenate them horizontally, handling missing values.

In [None]:
df_left = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df_right = pd.DataFrame({'C': [7, 8, 9], 'D': [10, 11, 12]})

print("Left DataFrame:")
print(df_left)
print("\nRight DataFrame:")
print(df_right)

result_horizontal = pd.concat([df_left, df_right], axis=1)
print("\nConcatenated horizontally:")
print(result_horizontal)

## Question 3
Perform an inner join between two DataFrames using merge() on a common column.

In [None]:
customers = pd.DataFrame({
    'customer_id': [1, 2, 3, 4],
    'name': ['Alice', 'Bob', 'Charlie', 'David']
})

orders = pd.DataFrame({
    'customer_id': [1, 2, 2, 5],
    'order_amount': [100, 150, 200, 75]
})

print("Customers:")
print(customers)
print("\nOrders:")
print(orders)

inner_join = pd.merge(customers, orders, on='customer_id', how='inner')
print("\nInner join:")
print(inner_join)

## Question 4
Perform a left join between two DataFrames, keeping all rows from the left DataFrame.

In [None]:
left_join = pd.merge(customers, orders, on='customer_id', how='left')
print("Left join (all customers):")
print(left_join)

## Question 5
Merge two DataFrames on multiple columns (composite key).

In [None]:
sales = pd.DataFrame({
    'product': ['A', 'B', 'A', 'C'],
    'region': ['North', 'South', 'North', 'East'],
    'sales': [100, 150, 120, 90]
})

pricing = pd.DataFrame({
    'product': ['A', 'B', 'A', 'C'],
    'region': ['North', 'South', 'South', 'East'],
    'price': [10, 15, 12, 8]
})

print("Sales:")
print(sales)
print("\nPricing:")
print(pricing)

composite_merge = pd.merge(sales, pricing, on=['product', 'region'])
print("\nMerged on multiple columns:")
print(composite_merge)

## Question 6
Perform an outer join between two DataFrames, showing all rows from both DataFrames.

In [None]:
outer_join = pd.merge(customers, orders, on='customer_id', how='outer')
print("Outer join (all customers and orders):")
print(outer_join)

## Question 7
Use join() method to combine two DataFrames based on their indexes.

In [None]:
df_info = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie']
}, index=[1, 2, 3])

df_details = pd.DataFrame({
    'age': [25, 30, 35],
    'city': ['NYC', 'LA', 'Chicago']
}, index=[1, 2, 4])

print("Info DataFrame:")
print(df_info)
print("\nDetails DataFrame:")
print(df_details)

joined = df_info.join(df_details, how='left')
print("\nJoined on index:")
print(joined)

## Question 8
Merge DataFrames where the join columns have different names using left_on and right_on parameters.

In [None]:
employees = pd.DataFrame({
    'emp_id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie']
})

salaries = pd.DataFrame({
    'employee_id': [1, 2, 4],
    'salary': [50000, 60000, 55000]
})

print("Employees:")
print(employees)
print("\nSalaries:")
print(salaries)

different_names_merge = pd.merge(employees, salaries, left_on='emp_id', right_on='employee_id')
print("\nMerged with different column names:")
print(different_names_merge)

## Question 9
Concatenate multiple DataFrames (more than 2) and add keys to identify the source of each row.

In [None]:
q1_data = pd.DataFrame({'sales': [100, 120, 90]})
q2_data = pd.DataFrame({'sales': [110, 130, 95]})
q3_data = pd.DataFrame({'sales': [105, 125, 85]})

print("Q1 Data:", q1_data.values.flatten())
print("Q2 Data:", q2_data.values.flatten())
print("Q3 Data:", q3_data.values.flatten())

with_keys = pd.concat([q1_data, q2_data, q3_data], keys=['Q1', 'Q2', 'Q3'])
print("\nConcatenated with keys:")
print(with_keys)

## Question 10
Handle duplicate column names when merging DataFrames by using suffixes.

In [None]:
scores_math = pd.DataFrame({
    'student': ['Alice', 'Bob', 'Charlie'],
    'score': [85, 90, 78]
})

scores_science = pd.DataFrame({
    'student': ['Alice', 'Bob', 'David'],
    'score': [88, 92, 85]
})

print("Math scores:")
print(scores_math)
print("\nScience scores:")
print(scores_science)

with_suffixes = pd.merge(scores_math, scores_science, on='student', suffixes=('_math', '_science'))
print("\nMerged with suffixes:")
print(with_suffixes)