# 7 层级索引（hierarchical indexing）（机器学习，深度学习不重要）

In [15]:
import pandas as pd
import numpy as np

#MultiIndex是层级索引，索引类型的一种
index1 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'],
                [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], names=['cloth', 'size'])

ser_obj = pd.Series(np.random.randn(12),index=index1)
print(ser_obj)
print(type(ser_obj)) #Series
print(type(ser_obj.index)) #索引类型，MultiIndex
print(ser_obj.index)
print(ser_obj.index.levels) #层级索引的索引值
ser_obj.index.codes  #没那么重要，代表索引的位置


FrozenList([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])

In [16]:
ser_obj

cloth  size
a      0      -1.532821
       1       0.216744
       2       0.294479
b      0       0.721492
       1      -0.103774
       2      -0.691011
c      0       0.546125
       1       0.650341
       2      -0.621007
d      0      -1.056814
       1       0.670992
       2      -0.862172
dtype: float64

In [17]:
#层级索引如何取数据
print('-'*50)
print(ser_obj['c']) #取出c的所有数据，取出的是series
print('-'*50)
print(ser_obj.loc['a', 2])
print('-'*50)
print(ser_obj[:, 2]) #取出所有行的内层索引为2的数据

--------------------------------------------------
size
0    0.546125
1    0.650341
2   -0.621007
dtype: float64
--------------------------------------------------
0.2944793007991245
--------------------------------------------------
cloth
a    0.294479
b   -0.691011
c   -0.621007
d   -0.862172
dtype: float64


## 交换层级

In [18]:
print(ser_obj.swaplevel())
print('-'*50)
print(ser_obj)
print('-'*50)
ser_obj=ser_obj.swaplevel()
print(ser_obj)

size  cloth
0     a       -1.532821
1     a        0.216744
2     a        0.294479
0     b        0.721492
1     b       -0.103774
2     b       -0.691011
0     c        0.546125
1     c        0.650341
2     c       -0.621007
0     d       -1.056814
1     d        0.670992
2     d       -0.862172
dtype: float64
--------------------------------------------------
cloth  size
a      0      -1.532821
       1       0.216744
       2       0.294479
b      0       0.721492
       1      -0.103774
       2      -0.691011
c      0       0.546125
       1       0.650341
       2      -0.621007
d      0      -1.056814
       1       0.670992
       2      -0.862172
dtype: float64
--------------------------------------------------
size  cloth
0     a       -1.532821
1     a        0.216744
2     a        0.294479
0     b        0.721492
1     b       -0.103774
2     b       -0.691011
0     c        0.546125
1     c        0.650341
2     c       -0.621007
0     d       -1.056814
1     d        0

In [13]:
print(ser_obj.sort_index(level=0))  #层级索引按那个索引级别排序,level=0表示按最外层索引排序

size  cloth
0     a       -0.593011
      b       -2.037711
      c       -0.302516
      d        1.126788
1     a       -0.101094
      b       -0.448893
      c       -0.951245
      d       -0.517378
2     a        0.213685
      b       -0.763513
      c       -1.686484
      d       -2.106224
dtype: float64


In [12]:
ser_obj

size  cloth
0     a       -0.593011
1     a       -0.101094
2     a        0.213685
0     b       -2.037711
1     b       -0.448893
2     b       -0.763513
0     c       -0.302516
1     c       -0.951245
2     c       -1.686484
0     d        1.126788
1     d       -0.517378
2     d       -2.106224
dtype: float64

In [8]:
#把最大索引变为列索引
df_obj=ser_obj.unstack(0)  #unstack的level参数是索引层级
print(df_obj)


size          0         1         2
cloth                              
a     -0.029255  1.088389 -0.111176
b     -0.004056  1.929172 -0.694952
c     -0.415109 -0.623776  1.100320
d     -1.020832  0.760383 -0.144796


In [14]:
print(df_obj)
#对df进行stack，就会把行，列索引进行堆叠，变为series
#把列索引放入内层,只能放到内层
print(df_obj.stack())  #stack变为series和unstack保持一致的
# df_obj=df_obj.transpose()

size          0         1         2
cloth                              
a     -1.872730  0.895565 -0.442679
b      0.001602 -0.541091 -0.801088
c      0.218049  0.429384 -1.262778
d     -0.713024  0.434239 -1.062186
cloth  size
a      0      -1.872730
       1       0.895565
       2      -0.442679
b      0       0.001602
       1      -0.541091
       2      -0.801088
c      0       0.218049
       1       0.429384
       2      -1.262778
d      0      -0.713024
       1       0.434239
       2      -1.062186
dtype: float64
