# How to Create a MultiIndex with `set_index`

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    'area': ['Area 1', 'Area 1', 'Area 1', 'Area 2', 'Area 2'],
    'capability': ['Cap 1', 'Cap 2', 'Cap 3', 'Cap 1', 'Cap 4'],
    'v1': np.random.randint(5, size=5),
    'v2': np.random.randint(5, size=5),
    'v3': np.random.randint(5, size=5)
}
df = pd.DataFrame(data)
df

Unnamed: 0,area,capability,v1,v2,v3
0,Area 1,Cap 1,0,1,2
1,Area 1,Cap 2,0,3,1
2,Area 1,Cap 3,0,4,3
3,Area 2,Cap 1,0,2,4
4,Area 2,Cap 4,0,1,3


## Basic Indexing

In [3]:
# Label indexing
df.loc[2, 'area']

'Area 1'

In [4]:
# Integer indexing
df.iloc[2, 0]

'Area 1'

In [5]:
# Slicing
df.loc[1:2, 'v2':'v3']

Unnamed: 0,v2,v3
1,3,1
2,4,3


In [6]:
# Indexing with lists
selected_v = ['v1', 'v3']
df.loc[:, selected_v]

Unnamed: 0,v1,v3
0,0,2
1,0,1
2,0,3
3,0,4
4,0,3


In [7]:
# Fast indexing for scalars only (no slicing allowed)
df.at[2, 'v2']

4

### Using Group-by

In [8]:
groupby_area = df.groupby('area')

In [9]:
groupby_area.mean()

Unnamed: 0_level_0,v1,v2,v3
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Area 1,0.0,2.666667,2.0
Area 2,0.0,1.5,3.5


### Create a MultiIndex

In [10]:
df2 = df.set_index(['area', 'capability'])
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,v1,v2,v3
area,capability,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Area 1,Cap 1,0,1,2
Area 1,Cap 2,0,3,1
Area 1,Cap 3,0,4,3
Area 2,Cap 1,0,2,4
Area 2,Cap 4,0,1,3


In [11]:
df2.index

MultiIndex([('Area 1', 'Cap 1'),
            ('Area 1', 'Cap 2'),
            ('Area 1', 'Cap 3'),
            ('Area 2', 'Cap 1'),
            ('Area 2', 'Cap 4')],
           names=['area', 'capability'])

In [12]:
df2.loc['Area 1']

Unnamed: 0_level_0,v1,v2,v3
capability,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Cap 1,0,1,2
Cap 2,0,3,1
Cap 3,0,4,3


In [13]:
df2.loc['Area 1'].mean()

v1    0.000000
v2    2.666667
v3    2.000000
dtype: float64

In [14]:
df2.loc[('Area 1', 'Cap 3')]

v1    0
v2    4
v3    3
Name: (Area 1, Cap 3), dtype: int64

In [15]:
df2.loc[('Area 1', 'Cap 3'), 'v3']

3

In [16]:
idx = pd.IndexSlice
df2.loc[idx[:, 'Cap 1'], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,v1,v2,v3
area,capability,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Area 1,Cap 1,0,1,2
Area 2,Cap 1,0,2,4
