In [1]:
index = [('Kolkata', 2000), ('Bangalore', 2010), ('Delhi', 2000), ('Mumbai', 2010), ('Japan', 2000), ('Texes', 2010),]

In [2]:
populations = [12121212, 34343434, 4545455, 98989342, 454545, 565656]

In [3]:
import pandas as pd

In [5]:
data = pd.Series(populations, index=index)

In [6]:
data[('Bangalore', 2010):('Japan', 2000)]

(Bangalore, 2010)    34343434
(Delhi, 2000)         4545455
(Mumbai, 2010)       98989342
(Japan, 2000)          454545
dtype: int64

In [9]:
[i for i in data.index if i[1] == 2010]

[('Bangalore', 2010), ('Mumbai', 2010), ('Texes', 2010)]

In [8]:
data.index

Index([  ('Kolkata', 2000), ('Bangalore', 2010),     ('Delhi', 2000),
          ('Mumbai', 2010),     ('Japan', 2000),     ('Texes', 2010)],
      dtype='object')

### Pandas MultiIndex

In [11]:
index = pd.MultiIndex.from_tuples(index)

In [13]:
data = data.reindex(index)

In [14]:
data

Kolkata    2000    12121212
Bangalore  2010    34343434
Delhi      2000     4545455
Mumbai     2010    98989342
Japan      2000      454545
Texes      2010      565656
dtype: int64

In [16]:
data[:,2010]

Bangalore    34343434
Mumbai       98989342
Texes          565656
dtype: int64

In [17]:
data['Bangalore',:]

2010    34343434
dtype: int64

In [18]:
index = [('Kolkata', 2000), ('Kolkata', 2010), ('Delhi', 2000), ('Delhi', 2010), ('Japan', 2000), ('Japan', 2010),]

In [19]:
populations = [12121212, 34343434, 4545455, 98989342, 454545, 565656]

In [21]:
data = pd.Series(populations, index=index)

In [22]:
index = pd.MultiIndex.from_tuples(index)
data = data.reindex(index)

In [24]:
data.unstack()

Unnamed: 0,2000,2010
Delhi,4545455,98989342
Japan,454545,565656
Kolkata,12121212,34343434


In [25]:
data

Kolkata  2000    12121212
         2010    34343434
Delhi    2000     4545455
         2010    98989342
Japan    2000      454545
         2010      565656
dtype: int64

In [27]:
pop_df = pd.DataFrame({'total':data, 'under18':[32323,232323,56565,10909,532323,121212]})

In [28]:
pop_df

Unnamed: 0,Unnamed: 1,total,under18
Kolkata,2000,12121212,32323
Kolkata,2010,34343434,232323
Delhi,2000,4545455,56565
Delhi,2010,98989342,10909
Japan,2000,454545,532323
Japan,2010,565656,121212


In [29]:
f_u18 = pop_df['under18'] / pop_df['total']

In [None]:
f_u18.unstack()

### Methods on multi-index creation

In [32]:
import numpy as np

In [33]:
pd.DataFrame(np.random.rand(4,2), index=[ ['a','a','b','b'],[1,2,1,2] ], columns=['data1','data2'])

Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.688829,0.72996
a,2,0.12585,0.02862
b,1,0.729703,0.946425
b,2,0.298804,0.838065


### Ways to create multi-index -  from array, from tuples and from product

In [35]:
pd.MultiIndex.from_arrays([['a','a','b','b'],[1,2,1,2]])

MultiIndex(levels=[['a', 'b'], [1, 2]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]])

In [36]:
pd.MultiIndex.from_tuples([('a',1), ('a',2), ('b',1), ('b',2)])

MultiIndex(levels=[['a', 'b'], [1, 2]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]])

In [39]:
midx = pd.MultiIndex.from_product([['a','b'],[1,2]])

In [41]:
p = pd.DataFrame(np.random.rand(4,2), index=midx, columns=['data1','data2'])

In [43]:
p.index.names = ['state', 'year']

In [44]:
p

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
state,year,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0.338606,0.980432
a,2,0.767436,0.957376
b,1,0.804152,0.980883
b,2,0.642302,0.354279


### MultiIndex for columns

In [45]:
index = pd.MultiIndex.from_product([[2013,2014],[1,2]] , names=['year', 'visit'])

In [46]:
index

MultiIndex(levels=[[2013, 2014], [1, 2]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
           names=['year', 'visit'])

In [47]:
columns = pd.MultiIndex.from_product([ ['Bob','Mac','Sue'], ['HR','Temp']], names=['subject','type'])

In [48]:
columns

MultiIndex(levels=[['Bob', 'Mac', 'Sue'], ['HR', 'Temp']],
           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
           names=['subject', 'type'])

In [51]:
data = np.round(np.random.randn(4,6),1)

In [55]:
data[:, ::2] *= 10

In [57]:
data += 30

In [58]:
data

array([[ 19. ,  30.1,  35. ,  31.4,  38. ,  30. ],
       [ 54. ,  29.4,  18. ,  30.1,  38. ,  28.7],
       [ 19. ,  30.3,  22. ,  32. ,  40. ,  30.2],
       [ 30. ,  29.3,  33. ,  30.7,  37. ,  29.6]])

In [60]:
hd = pd.DataFrame(data, index=index, columns=columns)

In [61]:
hd

Unnamed: 0_level_0,subject,Bob,Bob,Mac,Mac,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,19.0,30.1,35.0,31.4,38.0,30.0
2013,2,54.0,29.4,18.0,30.1,38.0,28.7
2014,1,19.0,30.3,22.0,32.0,40.0,30.2
2014,2,30.0,29.3,33.0,30.7,37.0,29.6


In [63]:
hd['Bob']['HR']

year  visit
2013  1        19.0
      2        54.0
2014  1        19.0
      2        30.0
Name: HR, dtype: float64