### Pandas trick #1: Keep rows having minimum values 

In [1]:
import pandas as pd

df = pd.DataFrame({
    "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
    "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
    "C": ["small", "large", "large", "small", "small", "large", "small", "small", "large"],
    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
    "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]
})
df

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
1,foo,one,large,2,4
2,foo,one,large,2,5
3,foo,two,small,3,5
4,foo,two,small,3,6
5,bar,one,large,4,6
6,bar,one,small,5,8
7,bar,two,small,6,9
8,bar,two,large,7,9


In [2]:
df.groupby("A")["E"].transform("min")

0    2
1    2
2    2
3    2
4    2
5    6
6    6
7    6
8    6
Name: E, dtype: int64

In [3]:
df.groupby("A")["E"].transform("min").eq(df["E"])

0     True
1    False
2    False
3    False
4    False
5     True
6    False
7    False
8    False
Name: E, dtype: bool

In [4]:
df.loc[df.groupby("A")["E"].transform("min").eq(df["E"])]

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
5,bar,one,large,4,6


### Pandas trick #2: named aggregation

In [None]:
import pandas as pd
import numpy as np

animals = pd.DataFrame({
    "kind": ["cat", "dog", "cat", "dog"],
    "height": [9.1, 6.0, 9.5, 34.0],
    "weight": [7.9, 7.5, 9.9, 198.0]
})
animals

In [None]:
animals.groupby("kind", as_index=False).agg(
    min_height=("height", "min"),
    max_height=("height", "max"),
    average_weight=("weight", np.mean)
)

In [None]:
animals.groupby("kind", as_index=False).agg({"height": "min", "weight": "mean"})

### Pandas trick #3: where

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(
    np.random.randn(6,4),
    index=list("abcdef"),
    columns=list("ABCD")
)
df

In [None]:
df.where(df>0, -df)

In [None]:
df.where(df>0, 0)

### Pandas trick #4: ngroup()

In [None]:
import pandas as pd
df = pd.DataFrame({
    "a": [1,1,1,2,2,2],
    "b": [1,1,2,1,1,2]
})
df["idx"] = df.groupby(["a", "b"]).ngroup() + 1
df

### Pandas trick #5: resample & interpolate, pivot & stack

In [None]:
import pandas as pd
df = pd.DataFrame({
    "Date": ["Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4"],
    "Scenario": [1,1,1,1,1,1,1,1],
    "Path": [1,1,1,1,2,2,2,2],
    "rate_1": [0.02213, 0.02867, 0.03426, 0.01678, 0.02218, 0.02572, 0.01235, 0.03215],
    "rate_2": [0.02213, 0.02867, 0.03426, 0.01678, 0.02218, 0.02572, 0.01235, 0.03215]
})
df

In [None]:
d = {
    "Q1": "2022-04-01",
    "Q2": "2022-07-01",
    "Q3": "2022-10-01",
    "Q4": "2023-01-01"
}
df["Date"] = df["Date"].map(d).fillna(df["Date"])
df

In [None]:
df["Date"] = pd.to_datetime(df["Date"]).dt.to_period("M")
(
    df.set_index("Date")
      .groupby(["Scenario", "Path"])
      .resample("M")
      .ffill()
)

In [None]:
(
    df.set_index("Date")
      .groupby(["Scenario", "Path"])
      .resample("M")
      .interpolate(method="linear")
      .drop(["Scenario", "Path"], axis=1)
)

In [None]:
df = (
    df.set_index("Date")
      .groupby(["Scenario", "Path"])
      .resample("M")
      .asfreq()
      .drop(["Scenario", "Path"], axis=1)
      .interpolate(method="linear")
)
df

In [None]:
cols = df.columns
df = (
    df.reset_index()
      .pivot(index=["Scenario", "Path"], columns="Date", values=cols)
      .stack(level=0)
      .reset_index()
)
df

### Pandas trick #6: mapping

In [None]:
countries = pd.Series([
    'United States',
    'Canada',
    'Mexico',
    'Belgium',
    'United Kingdom',
    'Thailand'
])
groups = {
    'North America': ('United States', 'Canada', 'Mexico', 'Greenland'),
    'Europe': ('France', 'Germany', 'United Kingdom', 'Belgium')
}

In [None]:
groups = {x: k for k, v in groups.items() for x in v}
groups

In [None]:
countries.map(groups).fillna(countries)

In [None]:
countries.map(groups).fillna(-999)

### Pandas trick #7: stack and unstack

In [6]:
import pandas as pd
import numpy as np

tuples = list(
    zip(
        *[
            ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
            ["one", "two", "one", "two", "one", "two", "one", "two"],
        ]
    )
)
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-1.693951,0.944677
bar,two,-1.834788,-0.657229
baz,one,-0.588643,-0.188716
baz,two,0.02523,0.002564
foo,one,-0.383782,-0.172197
foo,two,0.076969,-1.020149
qux,one,-1.416278,1.320298
qux,two,-0.545749,-0.162633


In [7]:
stacked = df.stack()
stacked

first  second   
bar    one     A   -1.693951
               B    0.944677
       two     A   -1.834788
               B   -0.657229
baz    one     A   -0.588643
               B   -0.188716
       two     A    0.025230
               B    0.002564
foo    one     A   -0.383782
               B   -0.172197
       two     A    0.076969
               B   -1.020149
qux    one     A   -1.416278
               B    1.320298
       two     A   -0.545749
               B   -0.162633
dtype: float64

In [9]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-1.693951,0.944677
bar,two,-1.834788,-0.657229
baz,one,-0.588643,-0.188716
baz,two,0.02523,0.002564
foo,one,-0.383782,-0.172197
foo,two,0.076969,-1.020149
qux,one,-1.416278,1.320298
qux,two,-0.545749,-0.162633


### Pandas trick #8: multi-index, slice

In [10]:
import pandas as pd
import numpy as np

def mklbl(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]

miindex = pd.MultiIndex.from_product(
    [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)]
)

micolumns = pd.MultiIndex.from_tuples(
    [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"]
)

dfmi = (
    pd.DataFrame(
        np.arange(len(miindex) * len(micolumns)).reshape(
            (len(miindex), len(micolumns))
        ),
        index=miindex,
        columns=micolumns,
    )
    .sort_index()
    .sort_index(axis=1)
)
dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249,248,251,250


In [11]:
dfmi.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3"))]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C2,D0,81,80,83,82
A1,B0,C2,D1,85,84,87,86
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C2,D0,113,112,115,114
A1,B1,C2,D1,117,116,119,118


In [12]:
dfmi.loc[("A0", "B0", slice(None), "D1")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D1,21,20,23,22
A0,B0,C3,D1,29,28,31,30
