### Pandas trick #1: Keep rows having minimum values 

In [1]:
import pandas as pd

df = pd.DataFrame({
    "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
    "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
    "C": ["small", "large", "large", "small", "small", "large", "small", "small", "large"],
    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
    "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]
})
df

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
1,foo,one,large,2,4
2,foo,one,large,2,5
3,foo,two,small,3,5
4,foo,two,small,3,6
5,bar,one,large,4,6
6,bar,one,small,5,8
7,bar,two,small,6,9
8,bar,two,large,7,9


In [2]:
df.groupby("A")["E"].transform("min")

0    2
1    2
2    2
3    2
4    2
5    6
6    6
7    6
8    6
Name: E, dtype: int64

In [3]:
df.groupby("A")["E"].transform("min").eq(df["E"])

0     True
1    False
2    False
3    False
4    False
5     True
6    False
7    False
8    False
Name: E, dtype: bool

In [4]:
df.loc[df.groupby("A")["E"].transform("min").eq(df["E"])]

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
5,bar,one,large,4,6


### Pandas trick #2: named aggregation

In [5]:
import pandas as pd
import numpy as np

animals = pd.DataFrame({
    "kind": ["cat", "dog", "cat", "dog"],
    "height": [9.1, 6.0, 9.5, 34.0],
    "weight": [7.9, 7.5, 9.9, 198.0]
})
animals

Unnamed: 0,kind,height,weight
0,cat,9.1,7.9
1,dog,6.0,7.5
2,cat,9.5,9.9
3,dog,34.0,198.0


In [6]:
animals.groupby("kind", as_index=False).agg(
    min_height=("height", "min"),
    max_height=("height", "max"),
    average_weight=("weight", np.mean)
)

Unnamed: 0,kind,min_height,max_height,average_weight
0,cat,9.1,9.5,8.9
1,dog,6.0,34.0,102.75


In [7]:
animals.groupby("kind", as_index=False).agg({"height": "min", "weight": "mean"})

Unnamed: 0,kind,height,weight
0,cat,9.1,8.9
1,dog,6.0,102.75


### Pandas trick #3: where

In [8]:
import pandas as pd
import numpy as np

df = pd.DataFrame(
    np.random.randn(6,4),
    index=list("abcdef"),
    columns=list("ABCD")
)
df

Unnamed: 0,A,B,C,D
a,0.389157,-0.463023,-0.470306,0.282709
b,-0.868117,-1.495077,-0.735478,0.127727
c,-1.496857,-1.995401,0.265477,-1.551274
d,0.586459,-0.20472,-0.951872,1.683046
e,0.191106,0.408077,-1.276775,0.178361
f,-0.061398,0.202318,-1.827769,-0.337652


In [9]:
df.where(df>0, -df)

Unnamed: 0,A,B,C,D
a,0.389157,0.463023,0.470306,0.282709
b,0.868117,1.495077,0.735478,0.127727
c,1.496857,1.995401,0.265477,1.551274
d,0.586459,0.20472,0.951872,1.683046
e,0.191106,0.408077,1.276775,0.178361
f,0.061398,0.202318,1.827769,0.337652


In [10]:
df.where(df>0, 0)

Unnamed: 0,A,B,C,D
a,0.389157,0.0,0.0,0.282709
b,0.0,0.0,0.0,0.127727
c,0.0,0.0,0.265477,0.0
d,0.586459,0.0,0.0,1.683046
e,0.191106,0.408077,0.0,0.178361
f,0.0,0.202318,0.0,0.0


### Pandas trick #4: ngroup()

In [11]:
import pandas as pd
df = pd.DataFrame({
    "a": [1,1,1,2,2,2],
    "b": [1,1,2,1,1,2]
})
df["idx"] = df.groupby(["a", "b"]).ngroup() + 1
df

Unnamed: 0,a,b,idx
0,1,1,1
1,1,1,1
2,1,2,2
3,2,1,3
4,2,1,3
5,2,2,4


### Pandas trick #5: resample & interpolate, pivot & stack

In [12]:
import pandas as pd
df = pd.DataFrame({
    "Date": ["Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4"],
    "Scenario": [1,1,1,1,1,1,1,1],
    "Path": [1,1,1,1,2,2,2,2],
    "rate_1": [0.02213, 0.02867, 0.03426, 0.01678, 0.02218, 0.02572, 0.01235, 0.03215],
    "rate_2": [0.02213, 0.02867, 0.03426, 0.01678, 0.02218, 0.02572, 0.01235, 0.03215]
})
df

Unnamed: 0,Date,Scenario,Path,rate_1,rate_2
0,Q1,1,1,0.02213,0.02213
1,Q2,1,1,0.02867,0.02867
2,Q3,1,1,0.03426,0.03426
3,Q4,1,1,0.01678,0.01678
4,Q1,1,2,0.02218,0.02218
5,Q2,1,2,0.02572,0.02572
6,Q3,1,2,0.01235,0.01235
7,Q4,1,2,0.03215,0.03215


In [13]:
d = {
    "Q1": "2022-04-01",
    "Q2": "2022-07-01",
    "Q3": "2022-10-01",
    "Q4": "2023-01-01"
}
df["Date"] = df["Date"].map(d).fillna(df["Date"])
df

Unnamed: 0,Date,Scenario,Path,rate_1,rate_2
0,2022-04-01,1,1,0.02213,0.02213
1,2022-07-01,1,1,0.02867,0.02867
2,2022-10-01,1,1,0.03426,0.03426
3,2023-01-01,1,1,0.01678,0.01678
4,2022-04-01,1,2,0.02218,0.02218
5,2022-07-01,1,2,0.02572,0.02572
6,2022-10-01,1,2,0.01235,0.01235
7,2023-01-01,1,2,0.03215,0.03215


In [14]:
df["Date"] = pd.to_datetime(df["Date"]).dt.to_period("M")
(
    df.set_index("Date")
      .groupby(["Scenario", "Path"])
      .resample("M")
      .ffill()
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Scenario,Path,rate_1,rate_2
Scenario,Path,Date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,2022-04,1,1,0.02213,0.02213
1,1,2022-05,1,1,0.02213,0.02213
1,1,2022-06,1,1,0.02213,0.02213
1,1,2022-07,1,1,0.02867,0.02867
1,1,2022-08,1,1,0.02867,0.02867
1,1,2022-09,1,1,0.02867,0.02867
1,1,2022-10,1,1,0.03426,0.03426
1,1,2022-11,1,1,0.03426,0.03426
1,1,2022-12,1,1,0.03426,0.03426
1,1,2023-01,1,1,0.01678,0.01678


In [15]:
df = (
    df.set_index("Date")
      .groupby(["Scenario", "Path"])
      .resample("M")
      .asfreq()
      .drop(["Scenario", "Path"], axis=1)
      .interpolate(method="linear")
)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rate_1,rate_2
Scenario,Path,Date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,2022-04,0.02213,0.02213
1,1,2022-05,0.02431,0.02431
1,1,2022-06,0.02649,0.02649
1,1,2022-07,0.02867,0.02867
1,1,2022-08,0.030533,0.030533
1,1,2022-09,0.032397,0.032397
1,1,2022-10,0.03426,0.03426
1,1,2022-11,0.028433,0.028433
1,1,2022-12,0.022607,0.022607
1,1,2023-01,0.01678,0.01678


In [16]:
cols = df.columns
df = (
    df.reset_index()
      .pivot(index=["Scenario", "Path"], columns="Date", values=cols)
      .stack(level=0)
      .reset_index()
)
df

Date,Scenario,Path,level_2,2022-04,2022-05,2022-06,2022-07,2022-08,2022-09,2022-10,2022-11,2022-12,2023-01
0,1,1,rate_1,0.02213,0.02431,0.02649,0.02867,0.030533,0.032397,0.03426,0.028433,0.022607,0.01678
1,1,1,rate_2,0.02213,0.02431,0.02649,0.02867,0.030533,0.032397,0.03426,0.028433,0.022607,0.01678
2,1,2,rate_1,0.02218,0.02336,0.02454,0.02572,0.021263,0.016807,0.01235,0.01895,0.02555,0.03215
3,1,2,rate_2,0.02218,0.02336,0.02454,0.02572,0.021263,0.016807,0.01235,0.01895,0.02555,0.03215


### Pandas trick #6: mapping

In [17]:
countries = pd.Series([
    'United States',
    'Canada',
    'Mexico',
    'Belgium',
    'United Kingdom',
    'Thailand'
])
groups = {
    'North America': ('United States', 'Canada', 'Mexico', 'Greenland'),
    'Europe': ('France', 'Germany', 'United Kingdom', 'Belgium')
}

In [18]:
groups = {x: k for k, v in groups.items() for x in v}
groups

{'United States': 'North America',
 'Canada': 'North America',
 'Mexico': 'North America',
 'Greenland': 'North America',
 'France': 'Europe',
 'Germany': 'Europe',
 'United Kingdom': 'Europe',
 'Belgium': 'Europe'}

In [20]:
countries.map(groups).fillna(countries)

0    North America
1    North America
2    North America
3           Europe
4           Europe
5         Thailand
dtype: object

In [21]:
countries.map(groups).fillna(-999)

0    North America
1    North America
2    North America
3           Europe
4           Europe
5             -999
dtype: object