# Pandas DataFrames (Part - II)

### *Content :*
- **Conditional selections**
- **Resetting/setting Indexes**

In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [2]:
np.random.seed(101)

In [3]:
dataSet = randn(5,4)
rows = ['A','B','C','D','E']
cols = ['W','X','Y','Z']

df = pd.DataFrame(dataSet, rows, cols)
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


---

# Conditional Selection :

- Important feature of pandas, can be done by using bracket notations
- Similar to NumPy

In [4]:
df > 0

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [5]:
bool_df = df > 0

In [6]:
df[bool_df]
# It will put NaN in False positions 

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [7]:
df[df < 0]

Unnamed: 0,W,X,Y,Z
A,,,,
B,,-0.319318,-0.848077,
C,-2.018168,,,-0.589001
D,,-0.758872,-0.933237,
E,,,,


---

In [8]:
df['W'] > 0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [9]:
# Common usage :
df[ df['W']>0 ]
#     will delete entire row C, cause it has false value and returns rest 

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


> **More Examples :**

In [10]:
# 1 
df['Z'] < 0

A    False
B    False
C     True
D    False
E    False
Name: Z, dtype: bool

In [11]:
df[ df['Z']<0 ]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [12]:
# miscellaneous example, stacking multiple commands (one liners)

df[ df['W']>0 ]['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [13]:
df[ df['W']>0 ][['Y','X']]

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


---

## Multiple Conditions : 

In [14]:
df[(df['W']>0) and (df['Y']>1)]
# This will give error
# because python normal 'and'/'or' operators can't actually count a bool-series
# So, here we use '&' - and / '|' - or

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [15]:
(df['W']>0) & (df['Y']>1)

A    False
B    False
C    False
D    False
E     True
dtype: bool

In [16]:
df[ (df['W']>0) & (df['Y']>1) ] 

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


In [17]:
(df['W']>0) | (df['Y']>1)

A     True
B     True
C    False
D     True
E     True
dtype: bool

In [18]:
df[ (df['W']>0) | (df['Y']>1) ]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


---

# Reset the index to Default :
> Syntax: `df_name.reset_index()`

In [19]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [20]:
df.reset_index()
# here 'index' column is formed containing indexes' labels and
# rows are reset to 0-nth number
# It won't affect original dataFrame
# to replace original DataFrame put `inplace=True`

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


---

# Setting the Index :
> Syntax: `df_name.set_index('columnName')`

In [21]:
new_index = 'CA NY WY OR CO'.split() 
new_index

['CA', 'NY', 'WY', 'OR', 'CO']

In [22]:
df['States'] = new_index
df

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO


In [23]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


In [24]:
# use 'inplace=True' for replacing original index
df.set_index('States', inplace=True)
df

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


---