# Selection, Conditional Selection , Indexing and Multi-index

In [1]:
import pandas as pd
import numpy as np

In [2]:
from numpy.random import randn
np.random.seed (42)

In [3]:
df = pd.DataFrame(randn(5,3) , index = ['A','B','C','D','E'],columns = ['X','Y','Z'])

### Selection

In [4]:
df 

Unnamed: 0,X,Y,Z
A,0.496714,-0.138264,0.647689
B,1.52303,-0.234153,-0.234137
C,1.579213,0.767435,-0.469474
D,0.54256,-0.463418,-0.46573
E,0.241962,-1.91328,-1.724918


In [5]:
df ['X']

A    0.496714
B    1.523030
C    1.579213
D    0.542560
E    0.241962
Name: X, dtype: float64

In [6]:
df[['X','Y']]

Unnamed: 0,X,Y
A,0.496714,-0.138264
B,1.52303,-0.234153
C,1.579213,0.767435
D,0.54256,-0.463418
E,0.241962,-1.91328


In [7]:
df ['new'] = df['X'] + df['Y']

In [8]:
df

Unnamed: 0,X,Y,Z,new
A,0.496714,-0.138264,0.647689,0.35845
B,1.52303,-0.234153,-0.234137,1.288876
C,1.579213,0.767435,-0.469474,2.346648
D,0.54256,-0.463418,-0.46573,0.079142
E,0.241962,-1.91328,-1.724918,-1.671318


In [9]:
df.drop ('new', axis=1,inplace =True)

In [10]:
df

Unnamed: 0,X,Y,Z
A,0.496714,-0.138264,0.647689
B,1.52303,-0.234153,-0.234137
C,1.579213,0.767435,-0.469474
D,0.54256,-0.463418,-0.46573
E,0.241962,-1.91328,-1.724918


**using `df.loc[]`**

In [11]:
# selecting rows
df.loc['C']

X    1.579213
Y    0.767435
Z   -0.469474
Name: C, dtype: float64

**using `df.iloc[]`**

In [12]:
df.iloc[2]

X    1.579213
Y    0.767435
Z   -0.469474
Name: C, dtype: float64

In [13]:
df.loc [['B','C']]

Unnamed: 0,X,Y,Z
B,1.52303,-0.234153,-0.234137
C,1.579213,0.767435,-0.469474


In [14]:
df

Unnamed: 0,X,Y,Z
A,0.496714,-0.138264,0.647689
B,1.52303,-0.234153,-0.234137
C,1.579213,0.767435,-0.469474
D,0.54256,-0.463418,-0.46573
E,0.241962,-1.91328,-1.724918


In [15]:
df.loc['D','Y']

-0.46341769281246226

In [16]:
df.loc [['C','D'], 'Y']

C    0.767435
D   -0.463418
Name: Y, dtype: float64

In [17]:
df.loc [['E','D'],[ 'Y','Z']]

Unnamed: 0,Y,Z
E,-1.91328,-1.724918
D,-0.463418,-0.46573


### Conditional Selection

In [18]:
df

Unnamed: 0,X,Y,Z
A,0.496714,-0.138264,0.647689
B,1.52303,-0.234153,-0.234137
C,1.579213,0.767435,-0.469474
D,0.54256,-0.463418,-0.46573
E,0.241962,-1.91328,-1.724918


In [19]:
df > 0

Unnamed: 0,X,Y,Z
A,True,False,True
B,True,False,False
C,True,True,False
D,True,False,False
E,True,False,False


In [20]:
df [df > 0]

Unnamed: 0,X,Y,Z
A,0.496714,,0.647689
B,1.52303,,
C,1.579213,0.767435,
D,0.54256,,
E,0.241962,,


In [21]:
df [df > 0 ]

Unnamed: 0,X,Y,Z
A,0.496714,,0.647689
B,1.52303,,
C,1.579213,0.767435,
D,0.54256,,
E,0.241962,,


In [22]:
df [df > 0 ] ['Y']

A         NaN
B         NaN
C    0.767435
D         NaN
E         NaN
Name: Y, dtype: float64

In [23]:
df 

Unnamed: 0,X,Y,Z
A,0.496714,-0.138264,0.647689
B,1.52303,-0.234153,-0.234137
C,1.579213,0.767435,-0.469474
D,0.54256,-0.463418,-0.46573
E,0.241962,-1.91328,-1.724918


In [24]:
df [(df > 0)  & (df <.6)]

Unnamed: 0,X,Y,Z
A,0.496714,,
B,,,
C,,,
D,0.54256,,
E,0.241962,,


In [25]:
df [(df > 1)  | (df <.6)]

Unnamed: 0,X,Y,Z
A,0.496714,-0.138264,
B,1.52303,-0.234153,-0.234137
C,1.579213,,-0.469474
D,0.54256,-0.463418,-0.46573
E,0.241962,-1.91328,-1.724918


### Indexing 

In [26]:
df

Unnamed: 0,X,Y,Z
A,0.496714,-0.138264,0.647689
B,1.52303,-0.234153,-0.234137
C,1.579213,0.767435,-0.469474
D,0.54256,-0.463418,-0.46573
E,0.241962,-1.91328,-1.724918


In [27]:
df.reset_index (inplace = True)

In [28]:
df

Unnamed: 0,index,X,Y,Z
0,A,0.496714,-0.138264,0.647689
1,B,1.52303,-0.234153,-0.234137
2,C,1.579213,0.767435,-0.469474
3,D,0.54256,-0.463418,-0.46573
4,E,0.241962,-1.91328,-1.724918


In [29]:
list1 = ['cairo', 'alexandria','aswan','luxor','giza']

In [30]:
df ['cities'] = list1

In [31]:
df

Unnamed: 0,index,X,Y,Z,cities
0,A,0.496714,-0.138264,0.647689,cairo
1,B,1.52303,-0.234153,-0.234137,alexandria
2,C,1.579213,0.767435,-0.469474,aswan
3,D,0.54256,-0.463418,-0.46573,luxor
4,E,0.241962,-1.91328,-1.724918,giza


In [32]:
df.set_index ('cities', inplace = True)

In [33]:
df

Unnamed: 0_level_0,index,X,Y,Z
cities,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cairo,A,0.496714,-0.138264,0.647689
alexandria,B,1.52303,-0.234153,-0.234137
aswan,C,1.579213,0.767435,-0.469474
luxor,D,0.54256,-0.463418,-0.46573
giza,E,0.241962,-1.91328,-1.724918


###  Multi-index and Index Hierarchy

In [34]:
outside_index = ['K1','K1','K1','K2','K2','K2']
inside_index = [1,2,3,1,2,3]
hier_index = list (zip(outside_index, inside_index))
hier_index = pd.MultiIndex.from_tuples (hier_index)

In [35]:
df = pd.DataFrame (randn (6,3),index = hier_index, columns =['A','B','C'])

In [36]:
df

Unnamed: 0,Unnamed: 1,A,B,C
K1,1,-0.562288,-1.012831,0.314247
K1,2,-0.908024,-1.412304,1.465649
K1,3,-0.225776,0.067528,-1.424748
K2,1,-0.544383,0.110923,-1.150994
K2,2,0.375698,-0.600639,-0.291694
K2,3,-0.601707,1.852278,-0.013497


In [37]:
df.loc ['K2']

Unnamed: 0,A,B,C
1,-0.544383,0.110923,-1.150994
2,0.375698,-0.600639,-0.291694
3,-0.601707,1.852278,-0.013497


In [38]:
df.loc ['K2'].loc[2]

A    0.375698
B   -0.600639
C   -0.291694
Name: 2, dtype: float64

In [39]:
df

Unnamed: 0,Unnamed: 1,A,B,C
K1,1,-0.562288,-1.012831,0.314247
K1,2,-0.908024,-1.412304,1.465649
K1,3,-0.225776,0.067528,-1.424748
K2,1,-0.544383,0.110923,-1.150994
K2,2,0.375698,-0.600639,-0.291694
K2,3,-0.601707,1.852278,-0.013497


In [40]:
df.index.names

FrozenList([None, None])

In [41]:
df.index.names = ['Category' , 'Model']

In [42]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
Category,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
K1,1,-0.562288,-1.012831,0.314247
K1,2,-0.908024,-1.412304,1.465649
K1,3,-0.225776,0.067528,-1.424748
K2,1,-0.544383,0.110923,-1.150994
K2,2,0.375698,-0.600639,-0.291694
K2,3,-0.601707,1.852278,-0.013497


In [43]:
df.xs ('K1')

Unnamed: 0_level_0,A,B,C
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,-0.562288,-1.012831,0.314247
2,-0.908024,-1.412304,1.465649
3,-0.225776,0.067528,-1.424748


In [44]:
# df.xs (['K1',3])

In [45]:
df.xs (3, level='Model')

Unnamed: 0_level_0,A,B,C
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
K1,-0.225776,0.067528,-1.424748
K2,-0.601707,1.852278,-0.013497
