In [1]:
import numpy as np
import pandas as pd

In [24]:
index = [('California', 2000), ('California', 2010),
         ('New York', 2000), ('New York', 2010),
         ('Taiwan', 2000), ('Taiwan', 2010)]
populations = [300000000, 3545788006,
               142467897, 1937424222,
               20851220, 22222261]
pop = pd.Series(populations, index=index)
pop

(California, 2000)     300000000
(California, 2010)    3545788006
(New York, 2000)       142467897
(New York, 2010)      1937424222
(Taiwan, 2000)          20851220
(Taiwan, 2010)          22222261
dtype: int64

In [25]:
pop[('California', 2010):('Taiwan', 2000)]


(California, 2010)    3545788006
(New York, 2000)       142467897
(New York, 2010)      1937424222
(Taiwan, 2000)          20851220
dtype: int64

In [26]:
# 建立多重 index
index = pd.MultiIndex.from_tuples(index)
index

MultiIndex([('California', 2000),
            ('California', 2010),
            (  'New York', 2000),
            (  'New York', 2010),
            (    'Taiwan', 2000),
            (    'Taiwan', 2010)],
           )

In [27]:
# 使用多重 index
pop = pop.reindex(index)
pop

California  2000     300000000
            2010    3545788006
New York    2000     142467897
            2010    1937424222
Taiwan      2000      20851220
            2010      22222261
dtype: int64

In [28]:
pop[:, 2000]

California    300000000
New York      142467897
Taiwan         20851220
dtype: int64

In [29]:
# 把多重 Index 的最後一個 index 拉出作新欄位為
pop_df = pop.unstack()
pop_df

Unnamed: 0,2000,2010
California,300000000,3545788006
New York,142467897,1937424222
Taiwan,20851220,22222261


In [30]:
pop_df.stack()

California  2000     300000000
            2010    3545788006
New York    2000     142467897
            2010    1937424222
Taiwan      2000      20851220
            2010      22222261
dtype: int64

In [10]:
# 建立多重 index 的三種方法
pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]])

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [11]:
pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)])


MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [12]:
pd.MultiIndex.from_product([['a', 'b'], [1, 2]])


MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [13]:
# 建立行, 列分層索引
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],
                                   names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],
                                     names=['subject', 'type'])
# 建立數據
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37
# 建立 DataFrame
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,50.0,37.3,44.0,36.0,24.0,35.7
2013,2,40.0,37.7,29.0,38.2,39.0,37.6
2014,1,24.0,37.4,36.0,36.2,49.0,37.4
2014,2,48.0,37.2,50.0,38.0,53.0,37.1


In [14]:
health_data['Guido']


Unnamed: 0_level_0,type,HR,Temp
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
2013,1,44.0,36.0
2013,2,29.0,38.2
2014,1,36.0,36.2
2014,2,50.0,38.0


In [15]:
health_data['Guido', 'HR']


year  visit
2013  1        44.0
      2        29.0
2014  1        36.0
      2        50.0
Name: (Guido, HR), dtype: float64

In [16]:
health_data.iloc[:2, :2]


Unnamed: 0_level_0,subject,Bob,Bob
Unnamed: 0_level_1,type,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2
2013,1,50.0,37.3
2013,2,40.0,37.7
