In [1]:
import pandas as pd

In [2]:
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],
)
df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    },
    index=[4, 5, 6, 7],
)
df3 = pd.DataFrame(
    {
        "A": ["A8", "A9", "A10", "A11"],
        "B": ["B8", "B9", "B10", "B11"],
        "C": ["C8", "C9", "C10", "C11"],
        "D": ["D8", "D9", "D10", "D11"],
    },
    index=[8, 9, 10, 11],
)
dataframes = [df1, df2, df3]

result = pd.concat(dataframes)
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [3]:
df4 = pd.DataFrame(
    {
        "B": ["B2", "B3", "B6", "B7"],
        "D": ["D2", "D3", "D6", "D7"],
        "F": ["F2", "F3", "F6", "F7"],
    },
    index=[2, 3, 6, 7],
)
result = pd.concat([df1, df4], axis=1)
result

Unnamed: 0,A,B,C,D,B.1,D.1,F
0,A0,B0,C0,D0,,,
1,A1,B1,C1,D1,,,
2,A2,B2,C2,D2,B2,D2,F2
3,A3,B3,C3,D3,B3,D3,F3
6,,,,,B6,D6,F6
7,,,,,B7,D7,F7


#### The how parameter determines which rows are included in the resulting DataFrame based on matches in the key(s).
1) inner (default): Returns only the rows where the key(s) exist in both DataFrames (intersection).
2) left: Returns all rows from the left DataFrame and matching rows from the right DataFrame. Non-matching values from the right DataFrame are filled with NaN.
3) right: Returns all rows from the right DataFrame and matching rows from the left DataFrame. Non-matching values from the left DataFrame are filled with NaN.
4) outer: Returns all rows from both DataFrames, filling non-matching values with NaN (union).
5) cross: Creates a Cartesian product, combining every row from the left DataFrame with every row from the right DataFrame.

In [5]:
"""merge() combines DataFrames based on common columns or indices, similar to SQL JOIN operations.
It offers various join types (inner, outer, left, right) to control how rows are handled when keys do not match."""

# Create the first DataFrame
df1 = pd.DataFrame({
    'employee_id': [101, 102, 103, 104],
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'department': ['HR', 'IT', 'Finance', 'HR']
})

# Create the second DataFrame
df2 = pd.DataFrame({
    'employee_id': [102, 104, 105, 101],
    'salary': [70000, 85000, 60000, 75000],
    'start_date': ['2020-01-15', '2019-03-01', '2021-07-20', '2022-05-10']
})

# Perform an inner merge on the 'employee_id' column
merged_df = pd.merge(df1, df2, on='employee_id', how='inner')

# Perform an inner merge on the 'employee_id' column
merged_df_left = pd.merge(df1, df2, on='employee_id', how='left')

print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)
print("\nMerged DataFrame (Inner Join):")
print(merged_df)
print("\nMerged DataFrame (Left Join):")
print(merged_df_left)

DataFrame 1:
   employee_id     name department
0          101    Alice         HR
1          102      Bob         IT
2          103  Charlie    Finance
3          104    David         HR

DataFrame 2:
   employee_id  salary  start_date
0          102   70000  2020-01-15
1          104   85000  2019-03-01
2          105   60000  2021-07-20
3          101   75000  2022-05-10

Merged DataFrame (Inner Join):
   employee_id   name department  salary  start_date
0          101  Alice         HR   75000  2022-05-10
1          102    Bob         IT   70000  2020-01-15
2          104  David         HR   85000  2019-03-01

Merged DataFrame (Left Join):
   employee_id     name department   salary  start_date
0          101    Alice         HR  75000.0  2022-05-10
1          102      Bob         IT  70000.0  2020-01-15
2          103  Charlie    Finance      NaN         NaN
3          104    David         HR  85000.0  2019-03-01


In [6]:
"""join() is a DataFrame method primarily used for combining DataFrames based on their indices.
It's a convenient shorthand for certain merge() operations, especially when joining on indices."""
df_main = pd.DataFrame({'Value': [10, 20, 30]}, index=['A', 'B', 'C'])
df_extra = pd.DataFrame({'Info': ['X', 'Y']}, index=['A', 'C'])

# Join on index (default)
result_join = df_main.join(df_extra)
print("Joined DataFrames:\n", result_join)

Joined DataFrames:
    Value Info
A     10    X
B     20  NaN
C     30    Y
