# 06 â€” Transformations (assign, vectorization, pipe)

Mirrors: `chapters/06_transform_assign.md`


In [None]:
import sys
from pathlib import Path

import numpy as np
import pandas as pd

# If running from the repository root, this makes the shared module importable:
shared = Path.cwd() / "docs" / "tutorials" / "python" / "modules" / "pandas" / "shared"
sys.path.insert(0, str(shared))

from make_orders import make_orders

orders = make_orders()
orders.head()

In [None]:
orders2 = (
    orders
    .assign(is_big=lambda d: d["revenue"].ge(500))
    .assign(segment=lambda d: np.where(d["is_big"], "high", "low"))
)
orders2[["revenue", "is_big", "segment"]].head()

In [None]:
conds = [orders["revenue"].ge(1000), orders["revenue"].between(500, 999.99, inclusive="both")]
choices = ["vip", "high"]
orders.assign(tier=np.select(conds, choices, default="standard"))["tier"].value_counts()

In [None]:
def normalize_status(d: pd.DataFrame) -> pd.DataFrame:
    return d.assign(shipping_status=d["shipping_status"].astype("string").str.strip().str.lower())


def add_revenue(d: pd.DataFrame) -> pd.DataFrame:
    return d.assign(revenue=d["quantity"] * d["price"])


clean = orders.pipe(normalize_status).pipe(add_revenue)
clean.head()