## Resetting and Setting Index

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(100)

df = pd.DataFrame(np.random.randint(0,20,(5,5)), 'R1 R2 R3 R4 R5'.split(), 'C1 C2 C3 C4 C5'.split())
df

Unnamed: 0,C1,C2,C3,C4,C5
R1,8,3,7,15,16
R2,10,2,2,2,14
R3,2,17,16,15,4
R4,11,16,9,2,12
R5,4,1,13,19,4


### Using .reset_index()

   
    - setting index into defaults

In [3]:
# Notice that you will get the index into a column
# This is how you can reset the index

df.reset_index()

Unnamed: 0,index,C1,C2,C3,C4,C5
0,R1,8,3,7,15,16
1,R2,10,2,2,2,14
2,R3,2,17,16,15,4
3,R4,11,16,9,2,12
4,R5,4,1,13,19,4


In [4]:
# Something to remember here is that this does not occur inplace as you can see below

df

Unnamed: 0,C1,C2,C3,C4,C5
R1,8,3,7,15,16
R2,10,2,2,2,14
R3,2,17,16,15,4
R4,11,16,9,2,12
R5,4,1,13,19,4


In [5]:
# You should provide the inplace parameter to make it permanent

# df.reset_index(inplace=True) --> use this one to reset index permanently

# You will do this if you want to reset your index into numerical values

### Using set_index()

    - a method to set a List, Series or Data frame as index of a Data Frame.

In [6]:
# To demonstrate the method, let us create a new column in the DataFrame

df['new'] = 'A B C D E'.split()
df

Unnamed: 0,C1,C2,C3,C4,C5,new
R1,8,3,7,15,16,A
R2,10,2,2,2,14,B
R3,2,17,16,15,4,C
R4,11,16,9,2,12,D
R5,4,1,13,19,4,E


In [7]:
# Use the set_index to set a specific column as the new index

df.set_index('new')

Unnamed: 0_level_0,C1,C2,C3,C4,C5
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,8,3,7,15,16
B,10,2,2,2,14
C,2,17,16,15,4
D,11,16,9,2,12
E,4,1,13,19,4


In [8]:
# Again this is not a permanent change, use the inplace parameter if you want to do this

df

Unnamed: 0,C1,C2,C3,C4,C5,new
R1,8,3,7,15,16,A
R2,10,2,2,2,14,B
R3,2,17,16,15,4,C
R4,11,16,9,2,12,D
R5,4,1,13,19,4,E


In [9]:
df.set_index('new', inplace = True)
df

Unnamed: 0_level_0,C1,C2,C3,C4,C5
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,8,3,7,15,16
B,10,2,2,2,14
C,2,17,16,15,4
D,11,16,9,2,12
E,4,1,13,19,4


In [10]:
# Now the indexing is permanent with the inplace parameter set to True
# This will overide your original indices 'R1 R2 ...'

df

Unnamed: 0_level_0,C1,C2,C3,C4,C5
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,8,3,7,15,16
B,10,2,2,2,14
C,2,17,16,15,4
D,11,16,9,2,12
E,4,1,13,19,4


## DataFrames with Multiple Index

In [11]:
# From Arrays
# Index Levels

arr = [['R1','R1','R1','R2','R2','R2'],[1,2,3,1,2,3]]
h_index = pd.MultiIndex.from_arrays(arr)

In [12]:
h_index

MultiIndex([('R1', 1),
            ('R1', 2),
            ('R1', 3),
            ('R2', 1),
            ('R2', 2),
            ('R2', 3)],
           )

In [13]:
df = pd.DataFrame(np.random.randn(6,2), h_index, ['A','B'])

In [14]:
df

Unnamed: 0,Unnamed: 1,A,B
R1,1,1.430188,0.949711
R1,2,0.65692,0.222034
R1,3,0.598291,-1.750374
R2,1,0.133306,-1.31521
R2,2,-0.937495,0.384211
R2,3,-0.66168,2.587856


In [15]:
# From Tuples
# Index Levels

outer = ['R1','R1','R1','R2','R2','R2']
inner = [1,2,3,1,2,3]

h_index = list(zip(outer,inner))

In [16]:
h_index

[('R1', 1), ('R1', 2), ('R1', 3), ('R2', 1), ('R2', 2), ('R2', 3)]

In [17]:
h_index = pd.MultiIndex.from_tuples(h_index)

In [18]:
h_index

MultiIndex([('R1', 1),
            ('R1', 2),
            ('R1', 3),
            ('R2', 1),
            ('R2', 2),
            ('R2', 3)],
           )

In [19]:
df = pd.DataFrame(np.random.randn(6,2), h_index, ['A','B'])

In [20]:
df

Unnamed: 0,Unnamed: 1,A,B
R1,1,-1.083102,-2.066341
R1,2,-1.270645,-2.095724
R1,3,-2.137406,0.915849
R2,1,0.592576,0.037958
R2,2,0.371968,0.359754
R2,3,-0.140396,-0.443739


### Indexing and Selection of DataFrames with Multiple Indices

In [21]:
# using .loc

df.loc['R1']

Unnamed: 0,A,B
1,-1.083102,-2.066341
2,-1.270645,-2.095724
3,-2.137406,0.915849


In [22]:
df.loc['R1'].loc[3]

Unnamed: 0,3
A,-2.137406
B,0.915849


In [23]:
# using .xs

df.xs('R1')

Unnamed: 0,A,B
1,-1.083102,-2.066341
2,-1.270645,-2.095724
3,-2.137406,0.915849


In [24]:
df.xs(('R1',3))

Unnamed: 0_level_0,R1
Unnamed: 0_level_1,3
A,-2.137406
B,0.915849


In [25]:
# cross section using level

df.xs(1, level = 1)

Unnamed: 0,A,B
R1,-1.083102,-2.066341
R2,0.592576,0.037958


In [26]:
# setting index names

df.index.names = ['Groups','Numbers']

In [27]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Numbers,Unnamed: 2_level_1,Unnamed: 3_level_1
R1,1,-1.083102,-2.066341
R1,2,-1.270645,-2.095724
R1,3,-2.137406,0.915849
R2,1,0.592576,0.037958
R2,2,0.371968,0.359754
R2,3,-0.140396,-0.443739


In [28]:
df.xs(1, level = 'Numbers')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
R1,-1.083102,-2.066341
R2,0.592576,0.037958


In [29]:
df.xs(1, level = 'Numbers')['A']

Unnamed: 0_level_0,A
Groups,Unnamed: 1_level_1
R1,-1.083102
R2,0.592576
