In [1]:
import pandas as pd
import numpy as np
from numpy.random import randn

In [2]:
df = pd.DataFrame(randn(5, 4), index = 'A B C D E'.split(), columns = 'W X Y Z'.split())
# or: df = pd.DataFrame(100, ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z'])

# Show: 
df

Unnamed: 0,W,X,Y,Z
A,1.48451,0.582819,-0.849725,0.355747
B,0.24729,0.935286,0.137584,-1.378622
C,0.596107,1.624614,-1.510829,0.742234
D,2.246514,-0.159797,-0.522798,0.488819
E,-0.157078,-0.191695,0.155433,-0.251367


### Conditional Selection

An important feature of pandas is conditional selection using bracket notation, very similar to numpy:

In [3]:
booldf = df > 0.5
booldf

Unnamed: 0,W,X,Y,Z
A,True,True,False,False
B,False,True,False,False
C,True,True,False,True
D,True,False,False,False
E,False,False,False,False


In [4]:
df[booldf]
# = df[df > 0.5]

Unnamed: 0,W,X,Y,Z
A,1.48451,0.582819,,
B,,0.935286,,
C,0.596107,1.624614,,0.742234
D,2.246514,,,
E,,,,


In [5]:
bool_W = df['W'] > 0.5
bool_W 

A     True
B    False
C     True
D     True
E    False
Name: W, dtype: bool

In [6]:
df[bool_W]
# = df[df['W'] > 0.5]

Unnamed: 0,W,X,Y,Z
A,1.48451,0.582819,-0.849725,0.355747
C,0.596107,1.624614,-1.510829,0.742234
D,2.246514,-0.159797,-0.522798,0.488819


In [7]:
df[bool_W]['X']
# = df[df['W'] > 0.5]['X']

A    0.582819
C    1.624614
D   -0.159797
Name: X, dtype: float64

In [8]:
my_column = ['X', 'Y']
df[bool_W][my_column] # = df[df['W'] > 0.5][['X', 'Y']]

Unnamed: 0,X,Y
A,0.582819,-0.849725
C,1.624614,-1.510829
D,-0.159797,-0.522798


*For two conditions you can use | and & with parenthesis:*

In [9]:
df[(df['W'] > 0.5) & (df['Y'] < 0)]

Unnamed: 0,W,X,Y,Z
A,1.48451,0.582819,-0.849725,0.355747
C,0.596107,1.624614,-1.510829,0.742234
D,2.246514,-0.159797,-0.522798,0.488819
