In [52]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [53]:
np.random.seed(101)

In [54]:
df = pd.DataFrame(randn(5,4), 'A B C D E'.split(), 'W X Y Z'.split())

In [55]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


# Conditional Selection

In [56]:
bol = df > 0

In [57]:
bol

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [58]:
df[bol]
#pass in the whole dataframe of booleans, gets NAN

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [59]:
df[df > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [60]:
df['W'] > 0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [61]:
result = df[df['W'] > 0]

In [62]:
result
#pass in a series, don't get NAN

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [63]:
df[df['Z'] < 0]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [64]:
result['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [65]:
df[df['W']>0]['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [66]:
df[df['W']>0][['X', 'Y']]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077
D,-0.758872,-0.933237
E,1.978757,2.605967


# and, or Condition

In [67]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [68]:
df[(df['X']<1) & (df['Z'] > 0)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [69]:
df[((0<df['X']) & (df['X']<1)) | (df['Y']>2)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001
E,0.190794,1.978757,2.605967,0.683509


In [70]:
#Pandas DataFrame
#and: &
#or: |

# Reset Index

In [71]:
#turn original index into a new column, new index is numerical
df.reset_index()    #won't effect df unless inplace=True

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [72]:
df['Names'] = 'Arthur Bill Charles David Evan'.split()

In [73]:
df

Unnamed: 0,W,X,Y,Z,Names
A,2.70685,0.628133,0.907969,0.503826,Arthur
B,0.651118,-0.319318,-0.848077,0.605965,Bill
C,-2.018168,0.740122,0.528813,-0.589001,Charles
D,0.188695,-0.758872,-0.933237,0.955057,David
E,0.190794,1.978757,2.605967,0.683509,Evan


In [74]:
df.set_index('Names', inplace=True)    #won't effect df unless inplace=True

In [75]:
df

Unnamed: 0_level_0,W,X,Y,Z
Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Arthur,2.70685,0.628133,0.907969,0.503826
Bill,0.651118,-0.319318,-0.848077,0.605965
Charles,-2.018168,0.740122,0.528813,-0.589001
David,0.188695,-0.758872,-0.933237,0.955057
Evan,0.190794,1.978757,2.605967,0.683509


In [80]:
df.loc['Bill']

W    0.651118
X   -0.319318
Y   -0.848077
Z    0.605965
Name: Bill, dtype: float64