In [3]:
import numpy as np
import pandas as pd

from pandas import DataFrame, Series
from numpy.random import randn

In [7]:
# using double index
ser = Series(randn(6), index=[[1,1, 1,2,2,2], ['a', 'b', 'c', 'a', 'b', 'c']])
ser

1  a    0.618479
   b    0.696432
   c    1.561701
2  a    0.977929
   b   -1.102987
   c   -2.269898
dtype: float64

In [9]:
ser.index # There are a multi-level of index

MultiIndex(levels=[[1, 2], ['a', 'b', 'c']],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [10]:
# 1 and 2 are the outer index
ser[1]

a    0.618479
b    0.696432
c    1.561701
dtype: float64

In [11]:
ser[2]

a    0.977929
b   -1.102987
c   -2.269898
dtype: float64

In [12]:
# to get info. from the lower index
ser[:,'a']

1    0.618479
2    0.977929
dtype: float64

In [14]:
# If you have multi-level index
# you can use the unstack library to create a dataframe
dframe = ser.unstack()
dframe

Unnamed: 0,a,b,c
1,0.618479,0.696432,1.561701
2,0.977929,-1.102987,-2.269898


In [17]:
dframe2 = DataFrame(np.arange(16).reshape(4,4), index=[['a', 'a', 'b', 'b'], [1,2,1,2]],
                   columns=[['NY','NY','LA','SF'], ['Cold', 'Cold', 'Hot', 'Hot']])
dframe2

Unnamed: 0_level_0,Unnamed: 1_level_0,NY,NY,LA,SF
Unnamed: 0_level_1,Unnamed: 1_level_1,Cold,Cold.1,Hot,Hot
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [18]:
# Adding the label of the index
# Being able to tell the difference btw. the outer and inner index
dframe2.index.names = ['INDEX_1', 'INDEX_2']
dframe2.columns.names = ['Cities', 'Temp']

dframe2

Unnamed: 0_level_0,Cities,NY,NY,LA,SF
Unnamed: 0_level_1,Temp,Cold,Cold,Hot,Hot
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [19]:
dframe2.swaplevel('Cities', 'Temp', axis=1)

Unnamed: 0_level_0,Temp,Cold,Cold,Hot,Hot
Unnamed: 0_level_1,Cities,NY,NY,LA,SF
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [23]:
# Changing how we sort it
# 0 would mean the outer
# 1 is the inner
dframe2.sort_index(level=1)

Unnamed: 0_level_0,Cities,NY,NY,LA,SF
Unnamed: 0_level_1,Temp,Cold,Cold,Hot,Hot
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
b,1,8,9,10,11
a,2,4,5,6,7
b,2,12,13,14,15


In [26]:
# only want to focus on the sum of the cold and hot
# It's a bit weird why axis in the case is adding up the row
# and not the column

# 0: think of COLUMNS (C is closer to 0)
# 1: Think of ROWS (R is closer to 1)

dframe2.sum(level='Temp', axis=1)

Unnamed: 0_level_0,Temp,Cold,Hot
INDEX_1,INDEX_2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,5
a,2,9,13
b,1,17,21
b,2,25,29
