In [46]:
import numpy as np
import pandas as pd

from pandas import Series,DataFrame
from numpy.random import randn

In [47]:
ser = Series(randn(6),index=[[1,1,1,2,2,2],['a','b','c','a','b','c']])

ser

1  a    0.417404
   b    1.572289
   c    1.477079
2  a   -2.950258
   b   -2.075021
   c    1.936730
dtype: float64

In [48]:
ser.index

MultiIndex([(1, 'a'),
            (1, 'b'),
            (1, 'c'),
            (2, 'a'),
            (2, 'b'),
            (2, 'c')],
           )

In [49]:
ser[1]

a    0.417404
b    1.572289
c    1.477079
dtype: float64

In [50]:
ser[2]

a   -2.950258
b   -2.075021
c    1.936730
dtype: float64

In [51]:
# Call from internal-index level:

# Here, we specify 'everything' using a colon ':' in the higher index level, 
# and specifying all 'a' in the lower index level.
ser[:,'a']

1    0.417404
2   -2.950258
dtype: float64

In [52]:
# Create DataFrames from a series with multiple levels: we do so with a method called 'unstack':

dframe = ser.unstack()

dframe

Unnamed: 0,a,b,c
1,0.417404,1.572289,1.477079
2,-2.950258,-2.075021,1.93673


In [53]:
# Apply multi-level indexing to dataframes from the beginning by constructing the dataframe with multiple levels:

dframe2 = DataFrame(np.arange(16).reshape(4,4),
                    index=[['a','a','b','b'],[1,2,1,2]],
                    columns=[['NY','NY','LA','SF'],['cold','hot','cold','hot']])

# np.arange creates a 16-point array. .reshape it as a matrix (4,4);
# Passed two index lists creating an index hierarchy for the rows, [['a','a','b','b'],[1,2,1,2]], which were matched up;
# Created an index hierarchy for the columns with [['NY','NY','LA','SF'],['cold','hot','cold','hot']]

dframe2

Unnamed: 0_level_0,Unnamed: 1_level_0,NY,NY,LA,SF
Unnamed: 0_level_1,Unnamed: 1_level_1,cold,hot,cold,hot
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [54]:
# Naming the index levels can assist with keeing track of them:

dframe2.index.names = ['INDEX_1','INDEX_2']

dframe2.columns.names = ['CITIES','TEMP']

dframe2

Unnamed: 0_level_0,CITIES,NY,NY,LA,SF
Unnamed: 0_level_1,TEMP,cold,hot,cold,hot
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [55]:
# Interchange level orders (for some reason)

dframe2.swaplevel('CITIES','TEMP',axis=1)

# 'axis=1' is used to specifically swap columns, as compared to the default 'axis=0' which represents rows.

dframe2

Unnamed: 0_level_0,CITIES,NY,NY,LA,SF
Unnamed: 0_level_1,TEMP,cold,hot,cold,hot
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [56]:
# We may also sort levels:

# dframe2.sortlevel(1)

# Returns: AttributeError: 'DataFrame' object has no attribute 'sortlevel'

'''
The attribute 'sortlevel' is now depriciated. 

Solutions: https://stackoverflow.com/questions/44123874/dataframe-object-has-no-attribute-sort

    Sort() was deprecated for DataFrames in favor of either:
    
        sort_values() to sort by column(s)
        
        sort_index() to sort by the index

'''

"\nThe attribute 'sortlevel' is now depriciated. \n\nSolutions: https://stackoverflow.com/questions/44123874/dataframe-object-has-no-attribute-sort\n\n    Sort() was deprecated for DataFrames in favor of either:\n    \n        sort_values() to sort by column(s)\n        \n        sort_index() to sort by the index\n\n"

In [57]:
dframe2.sort_values(by='INDEX_1')

Unnamed: 0_level_0,CITIES,NY,NY,LA,SF
Unnamed: 0_level_1,TEMP,cold,hot,cold,hot
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [58]:
dframe2.sort_values(by='INDEX_2')

Unnamed: 0_level_0,CITIES,NY,NY,LA,SF
Unnamed: 0_level_1,TEMP,cold,hot,cold,hot
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
b,1,8,9,10,11
a,2,4,5,6,7
b,2,12,13,14,15


In [59]:
# Perform operations on particular levels:

dframe2.sum(level='TEMP',axis=1)

'''
/var/folders/6r/1s2mh7cn0f987nkj0d9g2yjc0000gn/T/ipykernel_26716/3851920675.py:3: 
FutureWarning: Using the level keyword in DataFrame and Series aggregations is deprecated;
it will be removed in a future version.

Use groupby instead. df.sum(level=1) should use df.groupby(level=1).sum().
  dframe2.sum(level='TEMP',axis=1)
'''

  dframe2.sum(level='TEMP',axis=1)




In [60]:
dframe2.groupby(level='TEMP',axis=1).sum()

Unnamed: 0_level_0,TEMP,cold,hot
INDEX_1,INDEX_2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,2,4
a,2,10,12
b,1,18,20
b,2,26,28


In [61]:
# ******** SELF PRACTICE ********

In [70]:
dframe3 = DataFrame(randn(36).reshape(6,6),
                    index=[['a','a','a','b','b','b'],[1,2,1,2,1,2]],
                    columns=[['DC','DC','PARIS','PARIS','BERLIN','BERLIN'],['cold','hot','cold','hot','cold','hot']])

dframe3

Unnamed: 0_level_0,Unnamed: 1_level_0,DC,DC,PARIS,PARIS,BERLIN,BERLIN
Unnamed: 0_level_1,Unnamed: 1_level_1,cold,hot,cold,hot,cold,hot
a,1,1.615166,0.389211,0.674901,0.210045,-1.125679,0.172758
a,2,0.873697,-0.850463,1.153173,-1.715181,-0.253198,-1.738641
a,1,-0.881129,1.616968,-1.53603,-0.430213,0.540416,0.647078
b,2,-0.439933,0.371387,1.564706,0.53698,0.549252,0.350163
b,1,-0.42751,2.157864,1.109415,0.120461,-0.469845,0.004436
b,2,0.008027,-1.071187,1.608704,0.508797,0.370295,-0.138747


In [71]:
dframe3.index.names = ['INDEX_1','INDEX_2']

dframe3.columns.names = ['CITIES','TEMP']

dframe3

Unnamed: 0_level_0,CITIES,DC,DC,PARIS,PARIS,BERLIN,BERLIN
Unnamed: 0_level_1,TEMP,cold,hot,cold,hot,cold,hot
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,1.615166,0.389211,0.674901,0.210045,-1.125679,0.172758
a,2,0.873697,-0.850463,1.153173,-1.715181,-0.253198,-1.738641
a,1,-0.881129,1.616968,-1.53603,-0.430213,0.540416,0.647078
b,2,-0.439933,0.371387,1.564706,0.53698,0.549252,0.350163
b,1,-0.42751,2.157864,1.109415,0.120461,-0.469845,0.004436
b,2,0.008027,-1.071187,1.608704,0.508797,0.370295,-0.138747
