# Pandas Method Chaining and `pipe()` — A Detailed Guide


In [None]:

import pandas as pd
import numpy as np

# ---------------------------
# Sample Data
# ---------------------------
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [25, 30, 35, 40, 28],
    'Department': ['HR', 'IT', 'IT', 'Finance', 'HR'],
    'Salary': [50000, 60000, 70000, 80000, 52000],
    'Bonus': [5000, 6000, 7000, 8000, 5500]
})

# ---------------------------
# What is Method Chaining?
# ---------------------------
# Chaining multiple operations in a readable and compact way.

# Example: without method chaining
filtered = df[df['Age'] > 30]
sorted_df = filtered.sort_values('Salary', ascending=False)
final = sorted_df.reset_index(drop=True)

print("\n[No Chaining]")
print(final)

# With method chaining
chained = (
    df
    .loc[df['Age'] > 30]
    .sort_values('Salary', ascending=False)
    .reset_index(drop=True)
)

print("\n[With Method Chaining]")
print(chained)

# ---------------------------
# Use .assign() to add columns
# ---------------------------
assigned = (
    df
    .assign(SalaryWithBonus=lambda d: d['Salary'] + d['Bonus'])
    .assign(DepartmentUpper=lambda d: d['Department'].str.upper())
)

print("\n[Using assign()]")
print(assigned)

# ---------------------------
# Add .query() to filter by condition
# ---------------------------
queried = (
    df
    .query("Department == 'IT' and Age > 30")
    .assign(SalaryWithBonus=lambda d: d['Salary'] + d['Bonus'])
)

print("\n[Using query()]")
print(queried)

# ---------------------------
# Use .pipe() to integrate custom functions
# ---------------------------
# Define reusable functions
def add_tax_column(df, tax_rate=0.1):
    df = df.copy()
    df['Tax'] = df['Salary'] * tax_rate
    return df

def filter_department(df, dept):
    return df[df['Department'] == dept]

# Chain with pipe
piped = (
    df
    .pipe(filter_department, dept='HR')
    .pipe(add_tax_column, tax_rate=0.2)
)

print("\n[Using pipe() with arguments]")
print(piped)

# ---------------------------
# GroupBy with pipe
# ---------------------------
def summarize(group):
    return pd.Series({
        'AvgSalary': group['Salary'].mean(),
        'TotalBonus': group['Bonus'].sum()
    })

summary = (
    df
    .groupby('Department')
    .pipe(lambda g: g.apply(summarize))
)

print("\n[GroupBy with pipe()]")
print(summary)

# ---------------------------
# Conditional logic with pipe
# ---------------------------
def maybe_filter(df, do_filter=True):
    if do_filter:
        return df[df['Salary'] > 55000]
    return df

conditional = df.pipe(maybe_filter, do_filter=True)
print("\n[Conditional logic with pipe()]")
print(conditional)

# ---------------------------
# Final Thoughts
# ---------------------------
# Method chaining and .pipe() promote readability, modularity, and functional-style programming.
# Especially powerful for building data transformation pipelines in ETL workflows or dashboards.

# END
