In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame(data=[[2,4,6,-1],[3,-4,2,-1],[-10,3,5,-7]], columns=["c1","c2","c3","c4"], index = ["r1","r2","r3"])

In [3]:
df

Unnamed: 0,c1,c2,c3,c4
r1,2,4,6,-1
r2,3,-4,2,-1
r3,-10,3,5,-7


In [4]:
df.index #shows the current index

Index(['r1', 'r2', 'r3'], dtype='object')

In [6]:
df.reset_index() #reset the naming index into integer and earlier (r1,r2,r3) index are added as a column

Unnamed: 0,index,c1,c2,c3,c4
0,r1,2,4,6,-1
1,r2,3,-4,2,-1
2,r3,-10,3,5,-7


In [7]:
#We can set any one of the columns as index using set_index

df.set_index("c3", inplace = True)

#Now c3 column has became the row index

In [8]:
df

Unnamed: 0_level_0,c1,c2,c4
c3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6,2,4,-1
2,3,-4,-1
5,-10,3,-7


In [9]:
df["c2"]

c3
6    4
2   -4
5    3
Name: c2, dtype: int64

In [10]:
df.loc[2,:] #here index is 2

c1    3
c2   -4
c4   -1
Name: 2, dtype: int64

In [11]:
df.index #Modified current index

Int64Index([6, 2, 5], dtype='int64', name='c3')

## Multi index

In [17]:
df1 = pd.DataFrame(data = np.array([["Math","C1","S1",93],["Sci","C1","S2",78],["Eng","C1","S3",86],["Math","C2","S4",58],
                                   ["Eng","C2","S5",71],["Sci","C2","S6",69]]), columns = ["subject","class","student","maxscore"]) 

In [18]:
df1 

Unnamed: 0,subject,class,student,maxscore
0,Math,C1,S1,93
1,Sci,C1,S2,78
2,Eng,C1,S3,86
3,Math,C2,S4,58
4,Eng,C2,S5,71
5,Sci,C2,S6,69


In [19]:
df1.index
#Here index ranges from 0 to 5
#but we want to see index based on class and subject

RangeIndex(start=0, stop=6, step=1)

In [20]:
df1.set_index(["class","subject"], inplace = True)
#index based on class and subject simultaneously
#inplace as True to makes changes in original dataframe

In [21]:
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,student,maxscore
class,subject,Unnamed: 2_level_1,Unnamed: 3_level_1
C1,Math,S1,93
C1,Sci,S2,78
C1,Eng,S3,86
C2,Math,S4,58
C2,Eng,S5,71
C2,Sci,S6,69


In [22]:
df1.index #labels based on alphabetical order of subjects

MultiIndex(levels=[['C1', 'C2'], ['Eng', 'Math', 'Sci']],
           labels=[[0, 0, 0, 1, 1, 1], [1, 2, 0, 1, 0, 2]],
           names=['class', 'subject'])

In [23]:
df1.loc["C2"] #displays the class2 results

Unnamed: 0_level_0,student,maxscore
subject,Unnamed: 1_level_1,Unnamed: 2_level_1
Math,S4,58
Eng,S5,71
Sci,S6,69


In [24]:
df1.loc["C1"]

Unnamed: 0_level_0,student,maxscore
subject,Unnamed: 1_level_1,Unnamed: 2_level_1
Math,S1,93
Sci,S2,78
Eng,S3,86


In [25]:
df1.loc["C1"].loc["Sci"] #to find the score of science in class1

student     S2
maxscore    78
Name: Sci, dtype: object

In [26]:
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,student,maxscore
class,subject,Unnamed: 2_level_1,Unnamed: 3_level_1
C1,Math,S1,93
C1,Sci,S2,78
C1,Eng,S3,86
C2,Math,S4,58
C2,Eng,S5,71
C2,Sci,S6,69


In [27]:
df1.iloc[:4,0:] # .iloc works only with range index whether labels are seperated or not

Unnamed: 0_level_0,Unnamed: 1_level_0,student,maxscore
class,subject,Unnamed: 2_level_1,Unnamed: 3_level_1
C1,Math,S1,93
C1,Sci,S2,78
C1,Eng,S3,86
C2,Math,S4,58


In [30]:
df1.iloc[2:5,0:]

Unnamed: 0_level_0,Unnamed: 1_level_0,student,maxscore
class,subject,Unnamed: 2_level_1,Unnamed: 3_level_1
C1,Eng,S3,86
C2,Math,S4,58
C2,Eng,S5,71
