In [16]:
import pandas as pd
import numpy as np

# Create the dataframe

In [4]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                    'BBB': [10, 20, 30, 40],
                    'CCC': [100, 50, -30, -50]})
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


# if-then…

An if-then on one column

In [5]:
df.loc[df.AAA >= 5, 'BBB'] = -1

In [6]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,-1,50
2,6,-1,-30
3,7,-1,-50


In [7]:
df.loc[df.AAA >= 5, ['BBB', 'CCC']] = 555

In [8]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,555,555
2,6,555,555
3,7,555,555


In [9]:
df.loc[df.AAA < 5, ['BBB', 'CCC']] = 2000

In [10]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,2000,2000
1,5,555,555
2,6,555,555
3,7,555,555


In [11]:
df_mask = pd.DataFrame({'AAA': [True] * 4,
                         'BBB': [False] * 4,
                         'CCC': [True, False] * 2})

In [12]:
df.where(df_mask, -1000)

Unnamed: 0,AAA,BBB,CCC
0,4,-1000,2000
1,5,-1000,-1000
2,6,-1000,555
3,7,-1000,-1000


In [13]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                   'BBB': [10, 20, 30, 40],
                   'CCC': [100, 50, -30, -50]})

In [14]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [17]:
df['logic'] = np.where(df['AAA'] > 5, 'high', 'low')

In [18]:
df

Unnamed: 0,AAA,BBB,CCC,logic
0,4,10,100,low
1,5,20,50,low
2,6,30,-30,high
3,7,40,-50,high


# Splitting

In [19]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                    'BBB': [10, 20, 30, 40],
                    'CCC': [100, 50, -30, -50]})

In [20]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [21]:
df[df.AAA <= 5]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50


In [22]:
df[df.AAA > 5]

Unnamed: 0,AAA,BBB,CCC
2,6,30,-30
3,7,40,-50


# Building criteria

In [23]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                    'BBB': [10, 20, 30, 40],
                    'CCC': [100, 50, -30, -50]})

In [24]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [25]:
df.loc[(df['BBB'] < 25) & (df['CCC'] >= -40), 'AAA']

0    4
1    5
Name: AAA, dtype: int64

In [26]:
df.loc[(df['BBB'] > 25) | (df['CCC'] >= -40), 'AAA']

0    4
1    5
2    6
3    7
Name: AAA, dtype: int64

In [27]:
df.loc[(df['BBB'] > 25) | (df['CCC'] >= 75), 'AAA'] = 0.1

In [28]:
df

Unnamed: 0,AAA,BBB,CCC
0,0.1,10,100
1,5.0,20,50
2,0.1,30,-30
3,0.1,40,-50


In [29]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                   'BBB': [10, 20, 30, 40],
                   'CCC': [100, 50, -30, -50]})

In [30]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [31]:
aValue = 43.0

In [32]:
df.loc[(df.CCC - aValue).abs().argsort()]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
0,4,10,100
2,6,30,-30
3,7,40,-50


In [38]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                    'BBB': [10, 20, 30, 40],
                    'CCC': [100, 50, -30, -50]})

In [39]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [40]:
Crit1 = df.AAA <= 5.5

In [41]:
Crit2 = df.BBB == 10.0

In [42]:
Crit3 = df.CCC > -40.0

In [43]:
AllCrit = Crit1 & Crit2 & Crit3

In [44]:
df[AllCrit]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100


In [45]:
import functools

In [46]:
CritList = [Crit1, Crit2, Crit3]

In [47]:
AllCrit = functools.reduce(lambda x, y: x & y, CritList)

In [48]:
df[AllCrit]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100


# Selection DataFrames

In [49]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                    'BBB': [10, 20, 30, 40],
                    'CCC': [100, 50, -30, -50]})

In [50]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [51]:
df[(df.AAA <= 6) & (df.index.isin([0, 2, 4]))]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
2,6,30,-30


In [52]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                    'BBB': [10, 20, 30, 40],
                    'CCC': [100, 50, -30, -50]},
                   index=['foo', 'bar', 'boo', 'kar'])

In [53]:
df.loc['bar':'kar']

Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [54]:
df.iloc[0:3]

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30


In [56]:
data = {'AAA': [4, 5, 6, 7],
         'BBB': [10, 20, 30, 40],
         'CCC': [100, 50, -30, -50]}

In [57]:
df2 = pd.DataFrame(data=data, index=[1, 2, 3, 4])  # Note index starts at 1.

In [58]:
df2.iloc[1:3]

Unnamed: 0,AAA,BBB,CCC
2,5,20,50
3,6,30,-30


In [59]:
df2.loc[1:3]

Unnamed: 0,AAA,BBB,CCC
1,4,10,100
2,5,20,50
3,6,30,-30


In [60]:
df = pd.DataFrame({'AAA': [4, 5, 6, 7],
                    'BBB': [10, 20, 30, 40],
                    'CCC': [100, 50, -30, -50]})

In [61]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [62]:
df[~((df.AAA <= 6) & (df.index.isin([0, 2, 4])))]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
3,7,40,-50


# New columns

In [63]:
df = pd.DataFrame({'AAA': [1, 2, 1, 3],
                    'BBB': [1, 1, 2, 2],
                    'CCC': [2, 1, 3, 1]})

In [64]:
df

Unnamed: 0,AAA,BBB,CCC
0,1,1,2
1,2,1,1
2,1,2,3
3,3,2,1


In [65]:
source_cols = df.columns

In [66]:
new_cols = [str(x) + "_cat" for x in source_cols]

In [67]:
categories = {1: 'Alpha', 2: 'Beta', 3: 'Charlie'}

In [68]:
df[new_cols] = df[source_cols].applymap(categories.get)

In [69]:
df

Unnamed: 0,AAA,BBB,CCC,AAA_cat,BBB_cat,CCC_cat
0,1,1,2,Alpha,Alpha,Beta
1,2,1,1,Beta,Alpha,Alpha
2,1,2,3,Alpha,Beta,Charlie
3,3,2,1,Charlie,Beta,Alpha


In [70]:
df = pd.DataFrame({'AAA': [1, 1, 1, 2, 2, 2, 3, 3],
                       'BBB': [2, 1, 3, 4, 5, 1, 2, 3]})

In [71]:
df

Unnamed: 0,AAA,BBB
0,1,2
1,1,1
2,1,3
3,2,4
4,2,5
5,2,1
6,3,2
7,3,3
