# 7 层级索引（hierarchical indexing）

In [1]:
import pandas as pd
import numpy as np

#MultiIndex是层级索引，索引类型的一种
index1 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'],
                [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], names=['cloth', 'size'])

ser_obj = pd.Series(np.random.randn(12),index=index1)
print(ser_obj)
print(type(ser_obj)) #Series
print(type(ser_obj.index)) #索引类型，MultiIndex
print(ser_obj.index)
print(ser_obj.index.levels) #层级索引的索引值
ser_obj.index.codes  #没那么重要，代表索引的位置


cloth  size
a      0      -0.220064
       1      -1.066040
       2       0.401037
b      0       0.617559
       1      -0.061682
       2       1.613820
c      0      -0.244883
       1      -0.005319
       2      -0.866422
d      0       0.816196
       1      -0.458512
       2      -0.196433
dtype: float64
<class 'pandas.core.series.Series'>
<class 'pandas.core.indexes.multi.MultiIndex'>
MultiIndex([('a', 0),
            ('a', 1),
            ('a', 2),
            ('b', 0),
            ('b', 1),
            ('b', 2),
            ('c', 0),
            ('c', 1),
            ('c', 2),
            ('d', 0),
            ('d', 1),
            ('d', 2)],
           names=['cloth', 'size'])
[['a', 'b', 'c', 'd'], [0, 1, 2]]


FrozenList([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])

In [2]:
ser_obj

cloth  size
a      0      -0.220064
       1      -1.066040
       2       0.401037
b      0       0.617559
       1      -0.061682
       2       1.613820
c      0      -0.244883
       1      -0.005319
       2      -0.866422
d      0       0.816196
       1      -0.458512
       2      -0.196433
dtype: float64

In [3]:
#层级索引如何取数据
print('-'*50)
print(ser_obj['c']) #取出c的所有数据，取出的是series
print('-'*50)
print(ser_obj['a', 2])
print('-'*50)
print(ser_obj[:, 2]) #取出所有行的内层索引为2的数据

--------------------------------------------------
size
0   -0.244883
1   -0.005319
2   -0.866422
dtype: float64
--------------------------------------------------
0.4010371494446873
--------------------------------------------------
cloth
a    0.401037
b    1.613820
c   -0.866422
d   -0.196433
dtype: float64


## 交换层级（数据分析用）

In [5]:
print(ser_obj.swaplevel())
print('-'*50)
print(ser_obj)
print('-'*50)
ser_obj=ser_obj.swaplevel()
print(ser_obj)

size  cloth
0     a       -0.220064
1     a       -1.066040
2     a        0.401037
0     b        0.617559
1     b       -0.061682
2     b        1.613820
0     c       -0.244883
1     c       -0.005319
2     c       -0.866422
0     d        0.816196
1     d       -0.458512
2     d       -0.196433
dtype: float64
--------------------------------------------------
cloth  size
a      0      -0.220064
       1      -1.066040
       2       0.401037
b      0       0.617559
       1      -0.061682
       2       1.613820
c      0      -0.244883
       1      -0.005319
       2      -0.866422
d      0       0.816196
       1      -0.458512
       2      -0.196433
dtype: float64
--------------------------------------------------
size  cloth
0     a       -0.220064
1     a       -1.066040
2     a        0.401037
0     b        0.617559
1     b       -0.061682
2     b        1.613820
0     c       -0.244883
1     c       -0.005319
2     c       -0.866422
0     d        0.816196
1     d       -0

In [6]:
print(ser_obj.sort_index(level=0))  #层级索引按那个索引级别排序,level=0表示按最外层索引排序

size  cloth
0     a       -0.220064
      b        0.617559
      c       -0.244883
      d        0.816196
1     a       -1.066040
      b       -0.061682
      c       -0.005319
      d       -0.458512
2     a        0.401037
      b        1.613820
      c       -0.866422
      d       -0.196433
dtype: float64


In [10]:
#把0索引（最外层索引）变为列索引
df_obj=ser_obj.unstack(1)  #unstack可以放索引名，或者索引位置
print(df_obj)


cloth         a         b         c         d
size                                         
0     -0.220064  0.617559 -0.244883  0.816196
1     -1.066040 -0.061682 -0.005319 -0.458512
2      0.401037  1.613820 -0.866422 -0.196433


In [11]:
print(df_obj)
#对df进行stack，就会把行，列索引进行堆叠，变为series
#把列索引放入内层,只能放到内层
print(df_obj.stack())  #stack变为series和unstack保持一致的
# df_obj=df_obj.transpose()

cloth         a         b         c         d
size                                         
0     -0.220064  0.617559 -0.244883  0.816196
1     -1.066040 -0.061682 -0.005319 -0.458512
2      0.401037  1.613820 -0.866422 -0.196433
size  cloth
0     a       -0.220064
      b        0.617559
      c       -0.244883
      d        0.816196
1     a       -1.066040
      b       -0.061682
      c       -0.005319
      d       -0.458512
2     a        0.401037
      b        1.613820
      c       -0.866422
      d       -0.196433
dtype: float64
