### Pandas trick #1: Keep rows that contain smallest values 

In [1]:
import pandas as pd

df = pd.DataFrame({
    "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
    "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
    "C": ["small", "large", "large", "small", "small", "large", "small", "small", "large"],
    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
    "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]
})
df

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
1,foo,one,large,2,4
2,foo,one,large,2,5
3,foo,two,small,3,5
4,foo,two,small,3,6
5,bar,one,large,4,6
6,bar,one,small,5,8
7,bar,two,small,6,9
8,bar,two,large,7,9


In [2]:
df.groupby("A")["E"].transform("min")

0    2
1    2
2    2
3    2
4    2
5    6
6    6
7    6
8    6
Name: E, dtype: int64

In [3]:
df.groupby("A")["E"].transform("min").eq(df["E"])

0     True
1    False
2    False
3    False
4    False
5     True
6    False
7    False
8    False
Name: E, dtype: bool

In [4]:
df.loc[df.groupby("A")["E"].transform("min").eq(df["E"])]

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
5,bar,one,large,4,6


#### add a column with group summaries

In [9]:
df["group_count"] = df.groupby("A")["E"].transform("count")  # or size
df["group_min"] = df.groupby("A")["E"].transform("min")
df["group_mean"] = df.groupby("A")["E"].transform("mean")
df

Unnamed: 0,A,B,C,D,E,group_count,group_min,group_mean
0,foo,one,small,1,2,5,2,4.4
1,foo,one,large,2,4,5,2,4.4
2,foo,one,large,2,5,5,2,4.4
3,foo,two,small,3,5,5,2,4.4
4,foo,two,small,3,6,5,2,4.4
5,bar,one,large,4,6,4,6,8.0
6,bar,one,small,5,8,4,6,8.0
7,bar,two,small,6,9,4,6,8.0
8,bar,two,large,7,9,4,6,8.0


### Pandas trick #2: named aggregation

In [None]:
import pandas as pd
import numpy as np

animals = pd.DataFrame({
    "kind": ["cat", "dog", "cat", "dog"],
    "height": [9.1, 6.0, 9.5, 34.0],
    "weight": [7.9, 7.5, 9.9, 198.0]
})
animals

In [None]:
animals.groupby("kind", as_index=False).agg(
    min_height=("height", "min"),
    max_height=("height", "max"),
    average_weight=("weight", np.mean)
)

In [None]:
animals.groupby("kind", as_index=False).agg({"height": "min", "weight": "mean"})

### Pandas trick #3: where

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(
    np.random.randn(6,4),
    index=list("abcdef"),
    columns=list("ABCD")
)
df

In [None]:
df.where(df>0, -df)

In [None]:
df.where(df>0, 0)

### Pandas trick #4: ngroup()

In [None]:
import pandas as pd
df = pd.DataFrame({
    "a": [1,1,1,2,2,2],
    "b": [1,1,2,1,1,2]
})
df["idx"] = df.groupby(["a", "b"]).ngroup() + 1
df

### Pandas trick #5: resample & interpolate, pivot & stack

In [None]:
import pandas as pd
df = pd.DataFrame({
    "Date": ["Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4"],
    "Scenario": [1,1,1,1,1,1,1,1],
    "Path": [1,1,1,1,2,2,2,2],
    "rate_1": [0.02213, 0.02867, 0.03426, 0.01678, 0.02218, 0.02572, 0.01235, 0.03215],
    "rate_2": [0.02213, 0.02867, 0.03426, 0.01678, 0.02218, 0.02572, 0.01235, 0.03215]
})
df

In [None]:
d = {
    "Q1": "2022-04-01",
    "Q2": "2022-07-01",
    "Q3": "2022-10-01",
    "Q4": "2023-01-01"
}
df["Date"] = df["Date"].map(d).fillna(df["Date"])
df

In [None]:
df["Date"] = pd.to_datetime(df["Date"]).dt.to_period("M")
(
    df.set_index("Date")
      .groupby(["Scenario", "Path"])
      .resample("M")
      .ffill()
)

In [None]:
(
    df.set_index("Date")
      .groupby(["Scenario", "Path"])
      .resample("M")
      .interpolate(method="linear")
      .drop(["Scenario", "Path"], axis=1)
)

In [None]:
df = (
    df.set_index("Date")
      .groupby(["Scenario", "Path"])
      .resample("M")
      .asfreq()
      .drop(["Scenario", "Path"], axis=1)
      .interpolate(method="linear")
)
df

In [None]:
cols = df.columns
df = (
    df.reset_index()
      .pivot(index=["Scenario", "Path"], columns="Date", values=cols)
      .stack(level=0)
      .reset_index()
)
df

### Pandas trick #6: mapping

In [None]:
countries = pd.Series([
    'United States',
    'Canada',
    'Mexico',
    'Belgium',
    'United Kingdom',
    'Thailand'
])
groups = {
    'North America': ('United States', 'Canada', 'Mexico', 'Greenland'),
    'Europe': ('France', 'Germany', 'United Kingdom', 'Belgium')
}

In [None]:
groups = {x: k for k, v in groups.items() for x in v}
groups

In [None]:
countries.map(groups).fillna(countries)

In [None]:
countries.map(groups).fillna(-999)

### Pandas trick #7: stack and unstack

In [None]:
import pandas as pd
import numpy as np

tuples = list(
    zip(
        *[
            ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
            ["one", "two", "one", "two", "one", "two", "one", "two"],
        ]
    )
)
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])
df

In [None]:
stacked = df.stack()
stacked

In [None]:
stacked.unstack()

### Pandas trick #8: multi-index, slice

In [None]:
import pandas as pd
import numpy as np

def mklbl(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]

miindex = pd.MultiIndex.from_product(
    [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)]
)

micolumns = pd.MultiIndex.from_tuples(
    [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"]
)

dfmi = (
    pd.DataFrame(
        np.arange(len(miindex) * len(micolumns)).reshape(
            (len(miindex), len(micolumns))
        ),
        index=miindex,
        columns=micolumns,
    )
    .sort_index()
    .sort_index(axis=1)
)
dfmi

In [None]:
dfmi.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3"))]

In [None]:
dfmi.loc[("A0", "B0", slice(None), "D1")]

### Pandas trick #9: time series  

In [None]:
import pandas as pd
import datetime

start_date = datetime.datetime.strptime("2023-01", "%Y-%m")
date_generated = pd.date_range(start_date, periods=12, freq="3M")
qtr = ["Q" + str(i) for i in range(1, 12)]
d = dict(zip(qtr, date_generated.strftime("%Y-%m")))
d