# 2.27.1 Idioms

In [1]:
import pandas as pd
df = pd.DataFrame({"AAA": [4, 5, 6, 7,8,9,10], "BBB": [10, 20, 30, 40,50,60,70], "CCC": [100, 50, -30, -50,-20,-40,110]})

In [2]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50
4,8,50,-20
5,9,60,-40
6,10,70,110


# if-then.

In [3]:
df.loc[df.AAA >9, "BBB"] = -1

In [4]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50
4,8,50,-20
5,9,60,-40
6,10,-1,110


In [5]:
df.loc[df.AAA>=5, ["BBB", "CCC"]] = 777

In [6]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,777,777
2,6,777,777
3,7,777,777
4,8,777,777
5,9,777,777
6,10,777,777


In [7]:
df.loc[df.AAA < 5, ["BBB", "CCC"]] = 2000

In [8]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,2000,2000
1,5,777,777
2,6,777,777
3,7,777,777
4,8,777,777
5,9,777,777
6,10,777,777


In [9]:
df_mask = pd.DataFrame({"AAA": [True] * 4, "BBB": [False] * 4, "CCC": [True, False] * 2})

In [10]:
df.where(df_mask, -1000)

Unnamed: 0,AAA,BBB,CCC
0,4,-1000,2000
1,5,-1000,-1000
2,6,-1000,777
3,7,-1000,-1000
4,-1000,-1000,-1000
5,-1000,-1000,-1000
6,-1000,-1000,-1000


In [11]:
df = pd.DataFrame( {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]} )

In [12]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [13]:
import numpy as np
df["logic"] = np.where(df["AAA"] > 5, "high", "low")

In [14]:
df

Unnamed: 0,AAA,BBB,CCC,logic
0,4,10,100,low
1,5,20,50,low
2,6,30,-30,high
3,7,40,-50,high


In [15]:
df[df.AAA <= 5]

Unnamed: 0,AAA,BBB,CCC,logic
0,4,10,100,low
1,5,20,50,low


In [16]:
df[df.AAA > 5]

Unnamed: 0,AAA,BBB,CCC,logic
2,6,30,-30,high
3,7,40,-50,high


In [17]:
df.loc[(df["BBB"] < 25) & (df["CCC"] >= -40), "AAA"]

0    4
1    5
Name: AAA, dtype: int64

In [18]:
df.loc[(df["BBB"] > 25) | (df["CCC"] >= -40), "AAA"]

0    4
1    5
2    6
3    7
Name: AAA, dtype: int64

In [19]:
df.loc[(df["BBB"] > 25) | (df["CCC"] >= 75), "AAA"] = 0.1

In [20]:
df

Unnamed: 0,AAA,BBB,CCC,logic
0,0.1,10,100,low
1,5.0,20,50,low
2,0.1,30,-30,high
3,0.1,40,-50,high


# 2.27.2 Selection

In [21]:
df = pd.DataFrame({"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]} )

In [22]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [23]:
df[(df.AAA <= 6) & (df.index.isin([0, 2, 4]))]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
2,6,30,-30


In [24]:
df = pd.DataFrame( {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40],
                    "CCC": [100, 50, -30, -50]},index=["foo", "bar", "boo", "kar"], )

In [25]:
df

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [26]:
df = pd.DataFrame({"AAA": [1, 2, 1, 3], "BBB": [1, 1, 2, 2], "CCC": [2, 1, 3,1]})

In [27]:
df

Unnamed: 0,AAA,BBB,CCC
0,1,1,2
1,2,1,1
2,1,2,3
3,3,2,1


In [28]:
source_cols = df.columns

In [29]:
new_cols = [str(x) + "_cat" for x in source_cols]

In [30]:
categories = {1: "Alpha", 2: "Beta", 3: "Charlie"}

In [31]:
df[new_cols] = df[source_cols].applymap(categories.get)

In [32]:
df

Unnamed: 0,AAA,BBB,CCC,AAA_cat,BBB_cat,CCC_cat
0,1,1,2,Alpha,Alpha,Beta
1,2,1,1,Beta,Alpha,Alpha
2,1,2,3,Alpha,Beta,Charlie
3,3,2,1,Charlie,Beta,Alpha
