# 7 层级索引（hierarchical indexing）（机器学习，深度学习不重要）

In [3]:
import pandas as pd
import numpy as np

#MultiIndex是层级索引，索引类型的一种
index1 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'],
                [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], names=['cloth', 'size'])

ser_obj = pd.Series(np.random.randn(12),index=index1)
print(ser_obj)
print(type(ser_obj)) #Series
print(type(ser_obj.index)) #索引类型，MultiIndex
print(ser_obj.index)
print(ser_obj.index.levels) #层级索引的索引值
ser_obj.index.codes  #没那么重要，代表索引的位置


cloth  size
a      0       0.169780
       1      -1.334513
       2      -0.028918
b      0      -0.473801
       1       0.341257
       2      -0.395695
c      0      -1.131568
       1      -0.399659
       2      -0.864357
d      0      -1.933152
       1      -1.277217
       2      -0.669645
dtype: float64
<class 'pandas.core.series.Series'>
<class 'pandas.core.indexes.multi.MultiIndex'>
MultiIndex([('a', 0),
            ('a', 1),
            ('a', 2),
            ('b', 0),
            ('b', 1),
            ('b', 2),
            ('c', 0),
            ('c', 1),
            ('c', 2),
            ('d', 0),
            ('d', 1),
            ('d', 2)],
           names=['cloth', 'size'])
[['a', 'b', 'c', 'd'], [0, 1, 2]]


FrozenList([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])

In [4]:
ser_obj

cloth  size
a      0       0.169780
       1      -1.334513
       2      -0.028918
b      0      -0.473801
       1       0.341257
       2      -0.395695
c      0      -1.131568
       1      -0.399659
       2      -0.864357
d      0      -1.933152
       1      -1.277217
       2      -0.669645
dtype: float64

In [5]:
#层级索引如何取数据
print('-'*50)
print(ser_obj['c']) #取出c的所有数据，取出的是series
print('-'*50)
print(ser_obj.loc['a', 2])
print('-'*50)
print(ser_obj[:, 2]) #取出所有行的内层索引为2的数据

--------------------------------------------------
size
0   -1.131568
1   -0.399659
2   -0.864357
dtype: float64
--------------------------------------------------
-0.02891786744311461
--------------------------------------------------
cloth
a   -0.028918
b   -0.395695
c   -0.864357
d   -0.669645
dtype: float64


## 交换层级

In [6]:
print(ser_obj.swaplevel())
print('-'*50)
print(ser_obj)
print('-'*50)
ser_obj=ser_obj.swaplevel()
print(ser_obj)

size  cloth
0     a        0.169780
1     a       -1.334513
2     a       -0.028918
0     b       -0.473801
1     b        0.341257
2     b       -0.395695
0     c       -1.131568
1     c       -0.399659
2     c       -0.864357
0     d       -1.933152
1     d       -1.277217
2     d       -0.669645
dtype: float64
--------------------------------------------------
cloth  size
a      0       0.169780
       1      -1.334513
       2      -0.028918
b      0      -0.473801
       1       0.341257
       2      -0.395695
c      0      -1.131568
       1      -0.399659
       2      -0.864357
d      0      -1.933152
       1      -1.277217
       2      -0.669645
dtype: float64
--------------------------------------------------
size  cloth
0     a        0.169780
1     a       -1.334513
2     a       -0.028918
0     b       -0.473801
1     b        0.341257
2     b       -0.395695
0     c       -1.131568
1     c       -0.399659
2     c       -0.864357
0     d       -1.933152
1     d       -1

In [7]:
print(ser_obj.sort_index(level=0))  #层级索引按那个索引级别排序,level=0表示按最外层索引排序

size  cloth
0     a        0.169780
      b       -0.473801
      c       -1.131568
      d       -1.933152
1     a       -1.334513
      b        0.341257
      c       -0.399659
      d       -1.277217
2     a       -0.028918
      b       -0.395695
      c       -0.864357
      d       -0.669645
dtype: float64


In [8]:
ser_obj

size  cloth
0     a        0.169780
1     a       -1.334513
2     a       -0.028918
0     b       -0.473801
1     b        0.341257
2     b       -0.395695
0     c       -1.131568
1     c       -0.399659
2     c       -0.864357
0     d       -1.933152
1     d       -1.277217
2     d       -0.669645
dtype: float64

In [9]:
#把最大索引变为列索引
df_obj=ser_obj.unstack()  #unstack的level参数是索引层级
print(df_obj)


cloth         a         b         c         d
size                                         
0      0.169780 -0.473801 -1.131568 -1.933152
1     -1.334513  0.341257 -0.399659 -1.277217
2     -0.028918 -0.395695 -0.864357 -0.669645


In [10]:
print(df_obj)
#对df进行stack，就会把行，列索引进行堆叠，变为series
#把列索引放入内层,只能放到内层
print(df_obj.stack())  #stack变为series和unstack保持一致的
# df_obj=df_obj.transpose()

cloth         a         b         c         d
size                                         
0      0.169780 -0.473801 -1.131568 -1.933152
1     -1.334513  0.341257 -0.399659 -1.277217
2     -0.028918 -0.395695 -0.864357 -0.669645
size  cloth
0     a        0.169780
      b       -0.473801
      c       -1.131568
      d       -1.933152
1     a       -1.334513
      b        0.341257
      c       -0.399659
      d       -1.277217
2     a       -0.028918
      b       -0.395695
      c       -0.864357
      d       -0.669645
dtype: float64
