# Idioms


In [1]:
import pandas as pd
import numpy as np

data = {"columnA": [4, 5, 6, 7], "columnB": [10, 20, 30, 40], "columnC": [100, 50, -30, -50]}
df = pd.DataFrame(data)
df

Unnamed: 0,columnA,columnB,columnC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [2]:
df.loc[df.columnA >= 5, "columnB"] = -1
df

Unnamed: 0,columnA,columnB,columnC
0,4,10,100
1,5,-1,50
2,6,-1,-30
3,7,-1,-50


In [3]:
df.loc[df.columnA >= 5, ["columnB","columnC"]] = 555
df

Unnamed: 0,columnA,columnB,columnC
0,4,10,100
1,5,555,555
2,6,555,555
3,7,555,555


In [4]:
df.loc[df.columnA < 5, ["columnB", "columnC"]] = 2000
df

Unnamed: 0,columnA,columnB,columnC
0,4,2000,2000
1,5,555,555
2,6,555,555
3,7,555,555


In [5]:
df_mask = pd.DataFrame({"columnA": [True] * 4, "columnB": [False] * 4, "columnC": [True, False] * 2})
df.where(df_mask, -1000)

Unnamed: 0,columnA,columnB,columnC
0,4,-1000,2000
1,5,-1000,-1000
2,6,-1000,555
3,7,-1000,-1000


In [6]:
df = pd.DataFrame(data)
df

Unnamed: 0,columnA,columnB,columnC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [7]:
df['logic'] = np.where(df['columnA'] > 5, 'high', 'low')
df

Unnamed: 0,columnA,columnB,columnC,logic
0,4,10,100,low
1,5,20,50,low
2,6,30,-30,high
3,7,40,-50,high


In [8]:
df = pd.DataFrame(data)
df[df.columnA <= 5]

Unnamed: 0,columnA,columnB,columnC
0,4,10,100
1,5,20,50


In [9]:
df[df.columnA > 5]

Unnamed: 0,columnA,columnB,columnC
2,6,30,-30
3,7,40,-50


In [10]:
df = pd.DataFrame(data)
df.loc[(df['columnB'] < 25) & (df['columnC'] >= -40), 'columnA']

0    4
1    5
Name: columnA, dtype: int64

In [11]:
df.loc[(df['columnB'] < 25) | (df['columnC'] >= -40), 'columnA']

0    4
1    5
2    6
Name: columnA, dtype: int64

In [12]:
df.loc[(df['columnB'] > 25) | (df['columnC'] >= 75), 'columnA'] = 0.1
df

Unnamed: 0,columnA,columnB,columnC
0,0.1,10,100
1,5.0,20,50
2,0.1,30,-30
3,0.1,40,-50


In [13]:
df = pd.DataFrame(data)

aValue = 43.0
df.loc[(df.columnC - aValue).abs().argsort()]

Unnamed: 0,columnA,columnB,columnC
1,5,20,50
0,4,10,100
2,6,30,-30
3,7,40,-50


In [14]:
df = pd.DataFrame(data)
Crit1 = df.columnA <= 5.5
Crit2 = df.columnB == 10.0
Crit3 = df.columnC > -40.0

AllCrit = Crit1 & Crit2 & Crit3
df[AllCrit]

Unnamed: 0,columnA,columnB,columnC
0,4,10,100


In [15]:
def build():
    data = {"columnA": [4, 5, 6, 7], "columnB": [10, 20, 30, 40], "columnC": [100, 50, -30, -50]}
    return pd.DataFrame(data)

df = build()

df[(df.columnA <= 6) & (df.index.isin([0,2,4]))]

df = pd.DataFrame(data, index=['foo','bar','boo','kar'])
df

Unnamed: 0,columnA,columnB,columnC
foo,4,10,100
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [16]:
df.loc['bar':'kar']

Unnamed: 0,columnA,columnB,columnC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [17]:
df[0:3]

Unnamed: 0,columnA,columnB,columnC
foo,4,10,100
bar,5,20,50
boo,6,30,-30


In [18]:
df['bar':'kar']

Unnamed: 0,columnA,columnB,columnC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [19]:
df2 = pd.DataFrame(data, index=[1,2,3,4])
df2.iloc[1:3]

Unnamed: 0,columnA,columnB,columnC
2,5,20,50
3,6,30,-30


In [20]:
df2.loc[1:3]

Unnamed: 0,columnA,columnB,columnC
1,4,10,100
2,5,20,50
3,6,30,-30


In [21]:
df = build()
df[~( (df.columnA <= 6) & (df.index.isin([0,2,4])) )]

Unnamed: 0,columnA,columnB,columnC
1,5,20,50
3,7,40,-50


In [23]:
df = pd.DataFrame({"a": [1, 2, 1, 3], "b": [1, 1, 2, 2], "c": [2, 1, 3, 1]})
source_cols = df.columns
new_cols = [str(x) + "_cat" for x in source_cols]
categories = {1:'Alpha',2:'Beta',3:'Charlie'}
df[new_cols] = df[source_cols].applymap(categories.get)
df

<built-in method get of dict object at 0x000001FA7693A300>


Unnamed: 0,a,b,c,a_cat,b_cat,c_cat
0,1,1,2,Alpha,Alpha,Beta
1,2,1,1,Beta,Alpha,Alpha
2,1,2,3,Alpha,Beta,Charlie
3,3,2,1,Charlie,Beta,Alpha


In [30]:
df = pd.DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 3], "b": [2, 1, 3, 4, 5, 1, 2, 3]})
df.loc[df.groupby('a')['b'].idxmin()]

Unnamed: 0,a,b
1,1,1
5,2,1
6,3,2
