In [1]:
import numpy as np
import pandas as pd

# Let's try with an example

In [2]:
index_columns=[('ME',2019),('ME',2020),('ME',2021),('cse',2019),('cse',2020),('cse',2021)]
x=series=pd.Series([20,34,89,120,160,1000],index=index_columns)

In [3]:
x

(ME, 2019)       20
(ME, 2020)       34
(ME, 2021)       89
(cse, 2019)     120
(cse, 2020)     160
(cse, 2021)    1000
dtype: int64

# THE BEST WAY TO CREATE MULTIINDEX IN PYTHON

In [4]:
#1.BY USING PD.MULTIINDEX.FROM_TUPLES() 
index_columns=[('ME',2019),('ME',2020),('ME',2021),('cse',2019),('cse',2020),('cse',2021)]
multiindex=pd.MultiIndex.from_tuples(index_columns)

In [5]:
multiindex

MultiIndex([( 'ME', 2019),
            ( 'ME', 2020),
            ( 'ME', 2021),
            ('cse', 2019),
            ('cse', 2020),
            ('cse', 2021)],
           )

In [6]:
multiindex.levels

FrozenList([['ME', 'cse'], [2019, 2020, 2021]])

In [7]:
multiindex.levels[0]

Index(['ME', 'cse'], dtype='object')

In [8]:
multiindex.levels[1]

Int64Index([2019, 2020, 2021], dtype='int64')

# WE CAN ALSO CREATE THIS FUNCTION BY USING THE MultiIndex.from_tuples(). which works on the basis of cartesian product. 

In [9]:
pd.MultiIndex.from_product([['ME','CSE'],[2019,2020,2021]])

MultiIndex([( 'ME', 2019),
            ( 'ME', 2020),
            ( 'ME', 2021),
            ('CSE', 2019),
            ('CSE', 2020),
            ('CSE', 2021)],
           )

# creating a series with miltiindex series

In [10]:
series=pd.Series([1,2,3,4,5,6],index=multiindex)

In [11]:
series

ME   2019    1
     2020    2
     2021    3
cse  2019    4
     2020    5
     2021    6
dtype: int64

In [12]:
series['ME']

2019    1
2020    2
2021    3
dtype: int64

In [13]:
series['ME',2020]

2

# we can convert the multiindex series into the dataframe by using the unstack function.

In [14]:
#unstack()
temp=series.unstack()

In [15]:
temp

Unnamed: 0,2019,2020,2021
ME,1,2,3
cse,4,5,6


# we can also convert the dataframe into the multiindex series by using the stack function.

In [16]:
#stack()
temp.stack()

ME   2019    1
     2020    2
     2021    3
cse  2019    4
     2020    5
     2021    6
dtype: int64

# MULTIINDEXING ON THE DATAFRAME 

In [17]:
df=pd.DataFrame(
[
 [1,2],
  [3,4],
  [5,6],
    [7,8],
    [9,10],
    [11,12]
],
    index=multiindex,columns=['avg_package','students']
    
)

In [18]:
df

Unnamed: 0,Unnamed: 1,avg_package,students
ME,2019,1,2
ME,2020,3,4
ME,2021,5,6
cse,2019,7,8
cse,2020,9,10
cse,2021,11,12


In [19]:
df.loc['cse']

Unnamed: 0,avg_package,students
2019,7,8
2020,9,10
2021,11,12


In [20]:
df.loc['ME']

Unnamed: 0,avg_package,students
2019,1,2
2020,3,4
2021,5,6


In [21]:
df['avg_package']

ME   2019     1
     2020     3
     2021     5
cse  2019     7
     2020     9
     2021    11
Name: avg_package, dtype: int64

In [22]:
df2=pd.DataFrame([
    [1,2,6,7],
    [3,4,9,8],
    [5,6,2,3],
    [7,8,9,7]
],index=[2010,2011,2012,2013],
columns=pd.MultiIndex.from_product([['delhi','mumbai'],['avg_package','students']]))

In [23]:
df2

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2010,1,2,6,7
2011,3,4,9,8
2012,5,6,2,3
2013,7,8,9,7


In [24]:
df2['delhi']

Unnamed: 0,avg_package,students
2010,1,2
2011,3,4
2012,5,6
2013,7,8


In [25]:
df2['mumbai']

Unnamed: 0,avg_package,students
2010,6,7
2011,9,8
2012,2,3
2013,9,7


In [26]:
df2['delhi']['avg_package']

2010    1
2011    3
2012    5
2013    7
Name: avg_package, dtype: int64

In [27]:
df2.loc[2010]

delhi   avg_package    1
        students       2
mumbai  avg_package    6
        students       7
Name: 2010, dtype: int64

In [28]:
df

Unnamed: 0,Unnamed: 1,avg_package,students
ME,2019,1,2
ME,2020,3,4
ME,2021,5,6
cse,2019,7,8
cse,2020,9,10
cse,2021,11,12


In [29]:
df.unstack().stack()

Unnamed: 0,Unnamed: 1,avg_package,students
ME,2019,1,2
ME,2020,3,4
ME,2021,5,6
cse,2019,7,8
cse,2020,9,10
cse,2021,11,12


In [30]:
df.unstack().unstack()

avg_package  2019  ME      1
                   cse     7
             2020  ME      3
                   cse     9
             2021  ME      5
                   cse    11
students     2019  ME      2
                   cse     8
             2020  ME      4
                   cse    10
             2021  ME      6
                   cse    12
dtype: int64

In [31]:
df.unstack().stack().stack()

ME   2019  avg_package     1
           students        2
     2020  avg_package     3
           students        4
     2021  avg_package     5
           students        6
cse  2019  avg_package     7
           students        8
     2020  avg_package     9
           students       10
     2021  avg_package    11
           students       12
dtype: int64

In [32]:
df2

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2010,1,2,6,7
2011,3,4,9,8
2012,5,6,2,3
2013,7,8,9,7


In [33]:
df2.unstack()

delhi   avg_package  2010    1
                     2011    3
                     2012    5
                     2013    7
        students     2010    2
                     2011    4
                     2012    6
                     2013    8
mumbai  avg_package  2010    6
                     2011    9
                     2012    2
                     2013    9
        students     2010    7
                     2011    8
                     2012    3
                     2013    7
dtype: int64

In [34]:
df2.stack()

Unnamed: 0,Unnamed: 1,delhi,mumbai
2010,avg_package,1,6
2010,students,2,7
2011,avg_package,3,9
2011,students,4,8
2012,avg_package,5,2
2012,students,6,3
2013,avg_package,7,9
2013,students,8,7


In [35]:
df2.stack().stack()

2010  avg_package  delhi     1
                   mumbai    6
      students     delhi     2
                   mumbai    7
2011  avg_package  delhi     3
                   mumbai    9
      students     delhi     4
                   mumbai    8
2012  avg_package  delhi     5
                   mumbai    2
      students     delhi     6
                   mumbai    3
2013  avg_package  delhi     7
                   mumbai    9
      students     delhi     8
                   mumbai    7
dtype: int64

# working with multiIndex dataframes

In [36]:
df2.head()

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2010,1,2,6,7
2011,3,4,9,8
2012,5,6,2,3
2013,7,8,9,7


In [37]:
df2.shape

(4, 4)

In [38]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 2010 to 2013
Data columns (total 4 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   (delhi, avg_package)   4 non-null      int64
 1   (delhi, students)      4 non-null      int64
 2   (mumbai, avg_package)  4 non-null      int64
 3   (mumbai, students)     4 non-null      int64
dtypes: int64(4)
memory usage: 332.0 bytes


In [39]:
df2.describe

<bound method NDFrame.describe of            delhi               mumbai         
     avg_package students avg_package students
2010           1        2           6        7
2011           3        4           9        8
2012           5        6           2        3
2013           7        8           9        7>

In [40]:
df2.isnull().sum()

delhi   avg_package    0
        students       0
mumbai  avg_package    0
        students       0
dtype: int64

In [41]:
df2.duplicated()

2010    False
2011    False
2012    False
2013    False
dtype: bool

In [42]:
df2

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2010,1,2,6,7
2011,3,4,9,8
2012,5,6,2,3
2013,7,8,9,7


# EXTRACTING ROWS SINGLE

In [43]:
df2

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2010,1,2,6,7
2011,3,4,9,8
2012,5,6,2,3
2013,7,8,9,7


In [44]:
#single
df2.loc[2010]

delhi   avg_package    1
        students       2
mumbai  avg_package    6
        students       7
Name: 2010, dtype: int64

In [45]:
df2['delhi']['students']

2010    2
2011    4
2012    6
2013    8
Name: students, dtype: int64

In [46]:
df

Unnamed: 0,Unnamed: 1,avg_package,students
ME,2019,1,2
ME,2020,3,4
ME,2021,5,6
cse,2019,7,8
cse,2020,9,10
cse,2021,11,12


# SORTING THE INDEX VALUES

In [47]:
#sorting perform on the both index level
df.sort_index(ascending=False)

Unnamed: 0,Unnamed: 1,avg_package,students
cse,2021,11,12
cse,2020,9,10
cse,2019,7,8
ME,2021,5,6
ME,2020,3,4
ME,2019,1,2


In [48]:
df.sort_index(ascending=[False,True])

Unnamed: 0,Unnamed: 1,avg_package,students
cse,2019,7,8
cse,2020,9,10
cse,2021,11,12
ME,2019,1,2
ME,2020,3,4
ME,2021,5,6


In [49]:
df.sort_index(level=1,ascending=[False])

Unnamed: 0,Unnamed: 1,avg_package,students
ME,2021,5,6
cse,2021,11,12
ME,2020,3,4
cse,2020,9,10
ME,2019,1,2
cse,2019,7,8


# TRANSPOSE 

In [50]:
df2

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2010,1,2,6,7
2011,3,4,9,8
2012,5,6,2,3
2013,7,8,9,7


In [51]:
df2.transpose()

Unnamed: 0,Unnamed: 1,2010,2011,2012,2013
delhi,avg_package,1,3,5,7
delhi,students,2,4,6,8
mumbai,avg_package,6,9,2,9
mumbai,students,7,8,3,7


In [52]:
#SWAPLEVEL
df2.swaplevel

<bound method DataFrame.swaplevel of            delhi               mumbai         
     avg_package students avg_package students
2010           1        2           6        7
2011           3        4           9        8
2012           5        6           2        3
2013           7        8           9        7>