# `Multi - index`

In [1]:
import numpy as np
import pandas as pd

`A mutli - index object is a kind of index object in pandas where we have more than one values used for labeling.`

In [2]:
names = ['Abhishek', 'Amrusha']
subjects = ['Phy', 'Chem', 'Maths']

index_labels = []

for name in names:
    for subject in subjects:
        index_labels.append(tuple([name, subject]))

index_labels

[('Abhishek', 'Phy'),
 ('Abhishek', 'Chem'),
 ('Abhishek', 'Maths'),
 ('Amrusha', 'Phy'),
 ('Amrusha', 'Chem'),
 ('Amrusha', 'Maths')]

In [3]:
index = pd.MultiIndex.from_tuples(index_labels)
print(index.ndim) # multiindex is also a 1D object.
index

1


MultiIndex([('Abhishek',   'Phy'),
            ('Abhishek',  'Chem'),
            ('Abhishek', 'Maths'),
            ( 'Amrusha',   'Phy'),
            ( 'Amrusha',  'Chem'),
            ( 'Amrusha', 'Maths')],
           )

`We can now use this multiindex object in place of index of a series.`

In [4]:
marks = pd.Series([50, 60, 70, 80, 90, 100], index = index)
marks

Abhishek  Phy       50
          Chem      60
          Maths     70
Amrusha   Phy       80
          Chem      90
          Maths    100
dtype: int64

In [5]:
marks['Abhishek']

Phy      50
Chem     60
Maths    70
dtype: int64

In [6]:
marks['Amrusha'] # love

Phy       80
Chem      90
Maths    100
dtype: int64

In [7]:
marks.ndim, marks.shape, marks.size

(1, (6,), 6)

In [8]:
marks[('Amrusha', 'Maths')]

100

### `We may also create a multi index object from pd.MultiIndex.from_product() method`

In [10]:
index = pd.MultiIndex.from_product([names, subjects])
index

MultiIndex([('Abhishek',   'Phy'),
            ('Abhishek',  'Chem'),
            ('Abhishek', 'Maths'),
            ( 'Amrusha',   'Phy'),
            ( 'Amrusha',  'Chem'),
            ( 'Amrusha', 'Maths')],
           )

In [11]:
marks = pd.Series([50, 60, 70, 80, 90, 100], index = index)
marks

Abhishek  Phy       50
          Chem      60
          Maths     70
Amrusha   Phy       80
          Chem      90
          Maths    100
dtype: int64

# `stack() and unstack()`

    The unstack() method is used to make the inner level of a series or a dataframe into columns.
    
    The stack() is used to make the inner level columns of a dataframe into a series.

In [12]:
marks

Abhishek  Phy       50
          Chem      60
          Maths     70
Amrusha   Phy       80
          Chem      90
          Maths    100
dtype: int64

In [14]:
marks.unstack()

Unnamed: 0,Chem,Maths,Phy
Abhishek,60,70,50
Amrusha,90,100,80


In [15]:
marks.unstack().unstack() # finally would be converted into a series

Chem   Abhishek     60
       Amrusha      90
Maths  Abhishek     70
       Amrusha     100
Phy    Abhishek     50
       Amrusha      80
dtype: int64

In [16]:
marks.unstack().unstack().unstack()

Unnamed: 0,Abhishek,Amrusha
Chem,60,90
Maths,70,100
Phy,50,80


In [17]:
marks.unstack().unstack().unstack().unstack()

Abhishek  Chem      60
          Maths     70
          Phy       50
Amrusha   Chem      90
          Maths    100
          Phy       80
dtype: int64

`We can also make a dataframe in which columns is a MultiIndex.`

In [18]:
branch = pd.DataFrame(
    [
        [1,2,0,0],
        [3,4,0,0],
        [5,6,0,0],
        [7,8,0,0],
    ],
    index = [2019,2020,2021,2022],
    columns = pd.MultiIndex.from_product([['delhi','mumbai'],['avg_package','students']])
)

branch

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2019,1,2,0,0
2020,3,4,0,0
2021,5,6,0,0
2022,7,8,0,0


In [19]:
branch.stack()

Unnamed: 0,Unnamed: 1,delhi,mumbai
2019,avg_package,1,0
2019,students,2,0
2020,avg_package,3,0
2020,students,4,0
2021,avg_package,5,0
2021,students,6,0
2022,avg_package,7,0
2022,students,8,0


In [21]:
branch.stack().stack() # back to a series

2019  avg_package  delhi     1
                   mumbai    0
      students     delhi     2
                   mumbai    0
2020  avg_package  delhi     3
                   mumbai    0
      students     delhi     4
                   mumbai    0
2021  avg_package  delhi     5
                   mumbai    0
      students     delhi     6
                   mumbai    0
2022  avg_package  delhi     7
                   mumbai    0
      students     delhi     8
                   mumbai    0
dtype: int64

### `row labels and column labels associated with a dataframe are index object at the end of the day. `

In [22]:
branch

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2019,1,2,0,0
2020,3,4,0,0
2021,5,6,0,0
2022,7,8,0,0


In [23]:
type(branch.index)

pandas.core.indexes.base.Index

In [24]:
type(branch.columns)

pandas.core.indexes.multi.MultiIndex

# `MultiIndex row labels and column labels`

    We can also use the MultiIndex object as row labels and column labels of a dataframe simultaneously.

In [26]:
branch = pd.DataFrame(
    [
        [1,2,0,0],
        [3,4,0,0],
        [5,6,0,0],
        [7,8,0,0],
        [9,10,0,0],
        [11,12,0,0],
        [13,14,0,0],
        [15,16,0,0],
    ],
    index = pd.MultiIndex.from_tuples([('cse',2019),('cse',2020),('cse',2021),('cse',2022),
                                       ('ece',2019),('ece',2020),('ece',2021),('ece',2022)
                                      ]),
    columns = pd.MultiIndex.from_product([['delhi','mumbai'],['avg_package','students']])
)

branch

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students,avg_package,students
cse,2019,1,2,0,0
cse,2020,3,4,0,0
cse,2021,5,6,0,0
cse,2022,7,8,0,0
ece,2019,9,10,0,0
ece,2020,11,12,0,0
ece,2021,13,14,0,0
ece,2022,15,16,0,0


In [28]:
branch['delhi']

Unnamed: 0,Unnamed: 1,avg_package,students
cse,2019,1,2
cse,2020,3,4
cse,2021,5,6
cse,2022,7,8
ece,2019,9,10
ece,2020,11,12
ece,2021,13,14
ece,2022,15,16


In [32]:
branch.loc[::,('Delhi', 'avg_package')::3]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students
cse,2019,1,0
cse,2020,3,0
cse,2021,5,0
cse,2022,7,0
ece,2019,9,0
ece,2020,11,0
ece,2021,13,0
ece,2022,15,0


In [33]:
branch

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students,avg_package,students
cse,2019,1,2,0,0
cse,2020,3,4,0,0
cse,2021,5,6,0,0
cse,2022,7,8,0,0
ece,2019,9,10,0,0
ece,2020,11,12,0,0
ece,2021,13,14,0,0
ece,2022,15,16,0,0


In [34]:
branch.loc[('cse', '2019')::2, ::2]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,avg_package
cse,2019,1,0
cse,2021,5,0
ece,2019,9,0
ece,2021,13,0


## `levels inside dataframe objects`

In [37]:
multi_index = pd.MultiIndex.from_product([['cse', 'ece'], [2019, 2020, 2021, 2022]])
multi_index

MultiIndex([('cse', 2019),
            ('cse', 2020),
            ('cse', 2021),
            ('cse', 2022),
            ('ece', 2019),
            ('ece', 2020),
            ('ece', 2021),
            ('ece', 2022)],
           )

In [39]:
multi_index.levels

FrozenList([['cse', 'ece'], [2019, 2020, 2021, 2022]])

In [42]:
multi_index = pd.MultiIndex.from_product([names, subjects, [2019, 2020, 2021, 2022]])
multi_index

MultiIndex([('Abhishek',   'Phy', 2019),
            ('Abhishek',   'Phy', 2020),
            ('Abhishek',   'Phy', 2021),
            ('Abhishek',   'Phy', 2022),
            ('Abhishek',  'Chem', 2019),
            ('Abhishek',  'Chem', 2020),
            ('Abhishek',  'Chem', 2021),
            ('Abhishek',  'Chem', 2022),
            ('Abhishek', 'Maths', 2019),
            ('Abhishek', 'Maths', 2020),
            ('Abhishek', 'Maths', 2021),
            ('Abhishek', 'Maths', 2022),
            ( 'Amrusha',   'Phy', 2019),
            ( 'Amrusha',   'Phy', 2020),
            ( 'Amrusha',   'Phy', 2021),
            ( 'Amrusha',   'Phy', 2022),
            ( 'Amrusha',  'Chem', 2019),
            ( 'Amrusha',  'Chem', 2020),
            ( 'Amrusha',  'Chem', 2021),
            ( 'Amrusha',  'Chem', 2022),
            ( 'Amrusha', 'Maths', 2019),
            ( 'Amrusha', 'Maths', 2020),
            ( 'Amrusha', 'Maths', 2021),
            ( 'Amrusha', 'Maths', 2022)],
           )

In [43]:
multi_index.levels

FrozenList([['Abhishek', 'Amrusha'], ['Chem', 'Maths', 'Phy'], [2019, 2020, 2021, 2022]])

In [44]:
multi_index.levels[0]

Index(['Abhishek', 'Amrusha'], dtype='object')

In [45]:
multi_index.levels[1]

Index(['Chem', 'Maths', 'Phy'], dtype='object')

In [46]:
multi_index.levels[2]

Index([2019, 2020, 2021, 2022], dtype='int64')