In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"

import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np

import pandas as pd

pd.__version__

'1.2.1'

# Pandas对象简介


## pandas的Series对象
Pandas的Series对象是一个**带索引**数据构成的一维数组
Numpy数组通过隐式定义的整数索引获取数组，Pandas的Series对象用一种显式定义的索引与数值关联

### Series是通用的Numpy数组

In [None]:
data=pd.Series([0.25,0.5,0.75,1.0])
data
data.values
data.index

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

array([0.25, 0.5 , 0.75, 1.  ])

RangeIndex(start=0, stop=4, step=1)

### Series是特殊的字典

In [None]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

### 创建Series对象

In [None]:
pd.Series([2,4,6])

0    2
1    4
2    6
dtype: int64

In [None]:
pd.Series(5,index=[100,200,300])

100    5
200    5
300    5
dtype: int64

In [None]:
pd.Series({2:'a',1:'b',3:'c'})

2    a
1    b
3    c
dtype: object

In [None]:
pd.Series({2:'a',1:'b',3:'c'},index=[3,2])#Series对象只会保留显式定义的键值对

## Pandas的DataFrame对象

### DataFrame是通用的Numpy

In [None]:

area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area
states=pd.DataFrame({'population':population,'area':area})
states
states.index
states.columns

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

Index(['population', 'area'], dtype='object')

### DataFrame是特殊的字典
在numpy二维数组中data[0]返回第一行，dataframe\[col0\]返回第一列

In [None]:
states['population']
states.loc['Texas']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
Name: population, dtype: int64

population    26448193
area            695662
Name: Texas, dtype: int64

### 创建DataFrame对象

In [None]:
#通过单个DataFrame对象创建
pd.DataFrame(population,columns=['population'])

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [None]:
#通过字典列表创建,缺失的值会用Nan(not a number)来表示
data=[{'a':i,'b':2*i} for i in range(3)]
pd.DataFrame(data)
pd.DataFrame([{'a':1,'b':2},{'b':3,'c':4}])

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [None]:
#通过Series对象字典创建
pd.DataFrame({'population':population,'area':area})

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [None]:
#通过numpy二维数组创建
pd.DataFrame(np.random.rand(3,2),columns=['foo','bar'],index=['a','b','c'])

Unnamed: 0,foo,bar
a,0.920428,0.397977
b,0.363318,0.030885
c,0.11177,0.589904


In [None]:
#通过numpy结构化数组创建
A=np.zeros(3,dtype=[('A','i8'),('B','f8')])
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


## Pandas的Index对象
可以将Index对象看作一个**不可变数组或有序集合**
- 当看作不可变数组时，可以进行切片操作
- 当看作有序集合时，可以进行集合操作如join

In [None]:
ind=pd.Index([2,3,5,7,11])
#当作不可变数组
ind[1]
ind[::2]
ind.size
ind.shape
ind.ndim
ind.dtype

3

Int64Index([2, 5, 11], dtype='int64')

5

(5,)

1

dtype('int64')

In [None]:
#看作有序集合
indA=pd.Index([1,3,5,7,9])
indB=pd.Index([2,3,5,7,11])
indA & indB
indA | indB
indA ^ indB

  after removing the cwd from sys.path.


Int64Index([3, 5, 7], dtype='int64')

Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

Int64Index([1, 2, 9, 11], dtype='int64')

# 数据取值与选择

## Series数据选择方法

### 将Series看作字典

In [None]:
data=pd.Series([0.25,0.5,0.75,1.0],index=['a','b','c','d'])
data
data['b']
'a' in data
data.keys()
list(data.items())
data['e']=1.25
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

0.5

True

Index(['a', 'b', 'c', 'd'], dtype='object')

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64

### 将Series看作一维数组
当使用显示索引作切片时，结果包含最后一个索引，使用隐式索引作切片时，结果**不包含**最后一个索引

In [None]:
#显式索引作为切片
data['a':'c']
#隐式索引作为切片
data[0:2]
#掩码
data[(data>0.3)&(data<0.8)]
#花哨的索引
data[['a','c']]

a    0.25
b    0.50
c    0.75
dtype: float64

a    0.25
b    0.50
dtype: float64

b    0.50
c    0.75
dtype: float64

a    0.25
c    0.75
dtype: float64

### 索引器：loc、iloc和ix

In [None]:
data=pd.Series(['a','b','c'],index=[1,3,5])
data
#取值操作是显式索引
data[1]
#切片操作是隐式索引
data[1:3]


1    a
3    b
5    c
dtype: object

'a'

3    b
5    c
dtype: object

In [None]:
#索引器不是Series对象的函数方法，而是暴露切片接口的属性
#loc属性，切片和取值都是显式的
data.loc[1]
data.loc[1:3]

'a'

1    a
3    b
dtype: object

In [None]:
#第二种是iloc属性，取值和切片都是隐式

data.iloc[1]
data.iloc[1:3]
#第三种ix是前两种的混合形式，主要用于DataFrame对象

'b'

3    b
5    c
dtype: object

## DataFrame数据选择方法

### 将DataFrame看作字典

In [None]:

area = pd.Series({'California': 423967, 'Texas': 695662,
                  'New York': 141297, 'Florida': 170312,
                  'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
                 'New York': 19651127, 'Florida': 19552860,
                 'Illinois': 12882135})
data = pd.DataFrame({'area':area, 'pop':pop})
data

Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [None]:
data['area']
data.area#避免对用属性形式选取的列直接复制

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [None]:
data['density']=data['pop']/data['area']

### 将DataFrame看作二维数组 

In [None]:
data.values

array([[4.23967000e+05, 3.83325210e+07, 9.04139261e+01],
       [6.95662000e+05, 2.64481930e+07, 3.80187404e+01],
       [1.41297000e+05, 1.96511270e+07, 1.39076746e+02],
       [1.70312000e+05, 1.95528600e+07, 1.14806121e+02],
       [1.49995000e+05, 1.28821350e+07, 8.58837628e+01]])

In [None]:
data.T

Unnamed: 0,California,Texas,New York,Florida,Illinois
area,423967.0,695662.0,141297.0,170312.0,149995.0
pop,38332520.0,26448190.0,19651130.0,19552860.0,12882140.0
density,90.41393,38.01874,139.0767,114.8061,85.88376


In [None]:
data.values[0]#使用单个行索引获取一行数据
data['area']#使用列索引获取一列数据

array([4.23967000e+05, 3.83325210e+07, 9.04139261e+01])

In [None]:
data.iloc[:3,:2]#隐式
data.loc[:'Illinois',:'pop']#显式
#ix方法从1.0.0版开始移除

Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127


Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


AttributeError: 'DataFrame' object has no attribute 'ix'

### 其他取值方法

In [None]:
#对单个标签取值就选择列，对多个标签切片就选择阿航
data[1:3]#行数
data['Florida':'Illinois']#索引
data[data.density>100]#掩码

Unnamed: 0,area,pop,density
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746


Unnamed: 0,area,pop,density
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


Unnamed: 0,area,pop,density
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121


# Pandas数值运算
- 对于一元运算，输出结果保留索引和列标签
- 对于二元运算，pandas在传递通用函数时会自动对齐索引

## 通用函数：保留索引

In [None]:
rng=np.random.RandomState(42)
ser=pd.Series(rng.randint(0,10,4))
ser

0    6
1    3
2    7
3    4
dtype: int64

In [None]:
df=pd.DataFrame(rng.randint(0,10,(3,4)),columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
0,6,9,2,6
1,7,4,3,7
2,7,2,5,4


In [None]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [None]:
np.sin(df*np.pi/4)

Unnamed: 0,A,B,C,D
0,-1.0,0.7071068,1.0,-1.0
1,-0.707107,1.224647e-16,0.707107,-0.7071068
2,-0.707107,1.0,-0.707107,1.224647e-16


## 通用函数：对齐索引

### Series索引对齐

In [None]:

area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')
population/area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [None]:
area.index|population.index

Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')

In [None]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
A + B
A.add(B,fill_value=0)

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

### DataFrame索引对齐

In [None]:
A = pd.DataFrame(rng.randint(0, 20, (2, 2)),
                 columns=list('AB'))
A

Unnamed: 0,A,B
0,1,11
1,5,1


In [None]:
B = pd.DataFrame(rng.randint(0, 10, (3, 3)),
                 columns=list('BAC'))
B

Unnamed: 0,B,A,C
0,4,0,9
1,5,8,0
2,9,2,6


In [None]:
A+B

Unnamed: 0,A,B,C
0,1.0,15.0,
1,13.0,6.0,
2,,,


In [None]:
fill=A.stack().mean()
A.add(B,fill_value=fill)

Unnamed: 0,A,B,C
0,1.0,15.0,13.5
1,13.0,6.0,4.5
2,6.5,13.5,10.5



Python运算符与Pandas方法映射关系

|python运算符| pandas方法|
|---|---|
|+|	add()|
|-|	sub(), subtract()|
|*|	mul(), multiply()|
|/|	truediv(), div(), divide()|
|//|	floordiv()|
|%|	mod()|
|**|	pow()|

## 通用函数：DataFrame与Series的运算

In [None]:
A = rng.randint(10, size=(3, 4))
A

array([[3, 8, 2, 4],
       [2, 6, 4, 8],
       [6, 1, 3, 8]])

In [None]:
A-A[0]

array([[ 0,  0,  0,  0],
       [-1, -2,  2,  4],
       [ 3, -7,  1,  4]])

In [None]:
df = pd.DataFrame(A, columns=list('QRST'))
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-1,-2,2,4
2,3,-7,1,4


In [None]:
df.subtract(df['R'],axis=0)

Unnamed: 0,Q,R,S,T
0,-5,0,-6,-4
1,-4,0,-2,2
2,5,0,2,7


# 处理缺失值

## 选择处理缺失值的方法
- 用覆盖全局的掩码表示缺失值
- 用标签值表示缺失值

## Pandas的缺失值
pandas采用标签方法（nan或None）表示缺失值

### None:Python对象类型的缺失值
由于None是一个Python对象，所以不能作为任何Numpy/Pandas数组类型的缺失值，只能用于'object'数组类型，即由Python对象构成的数组

### Nan：数值类型的缺失值
Nan是一种按照IEEE浮点数标准设计、在任何系统中都兼容的特殊浮点数
无论和Nan进行何种操作，最终结果都是Nan

In [None]:
1+np.nan

nan

In [None]:
vals2=np.array([1,np.nan,3,4])
vals2.sum()
vals2.min()
vals2.max()

nan

nan

nan

In [None]:
np.nansum(vals2)
np.nanmin(vals2)
np.nanmax(vals2)

8.0

1.0

4.0

### Pandas中Nan与None的差异
Pandas会将没有标签值的数据类型自动转换为NA，也就是浮点型
Pandas会自动将None转换为Nan

## 处理缺失值
- isnull()
- notnull()
- dropna()
- fillna()

### 发现缺失值
isnull()和notnull(),返回布尔类型的掩码数据

In [None]:
data=pd.Series([1,np.nan,'hello',None])
data.isnull()
data[data.notnull()]

0    False
1     True
2    False
3     True
dtype: bool

0        1
2    hello
dtype: object

### 剔除缺失值
dropna()和fillna()

In [None]:
data
data.dropna()


0        1
1      NaN
2    hello
3     None
dtype: object

0        1
2    hello
dtype: object

In [None]:
#dataframe中没法单独剔除一个值，要么剔除缺失值所在的行要么是列。
df = pd.DataFrame([[1,      np.nan, 2],
                   [2,      3,      5],
                   [np.nan, 4,      6]])
df
df.dropna()
df.dropna(axis='columns')

Unnamed: 0,0,1,2
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


Unnamed: 0,0,1,2
1,2.0,3.0,5


Unnamed: 0,2
0,2
1,5
2,6


In [None]:
#通过设置how或者thresh参数来设置剔除行或者列缺失值的数量阈值

In [None]:
df[3]=np.nan
df
df.dropna(axis='columns',how='all')#设置为how=any，就是只要有缺失值就剔除整行或整列

Unnamed: 0,0,1,2,3
0,1.0,,2,
1,2.0,3.0,5,
2,,4.0,6,


Unnamed: 0,0,1,2
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [None]:
df.dropna(axis='rows',thresh=3)#通过thresh参数设置行或列中非缺失值的最小数量

Unnamed: 0,0,1,2,3
1,2.0,3.0,5,


### 填充缺失值

In [None]:
data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
data

a    1.0
b    NaN
c    2.0
d    NaN
e    3.0
dtype: float64

In [None]:
data.fillna(0)

a    1.0
b    0.0
c    2.0
d    0.0
e    3.0
dtype: float64

In [None]:
data.fillna(method='ffill')#forward-fill

a    1.0
b    1.0
c    2.0
d    2.0
e    3.0
dtype: float64

In [None]:
data.fillna(method='bfill')#back-fill

a    1.0
b    2.0
c    2.0
d    3.0
e    3.0
dtype: float64

In [None]:
df.fillna(method='ffill',axis=1)

Unnamed: 0,0,1,2,3
0,1.0,1.0,2.0,2.0
1,2.0,3.0,5.0,5.0
2,,4.0,6.0,6.0


# 层级索引
pandas提供了panel和panel4D对象解决三维和四维数据
在实践中，更直观的形式是通过层级索引(hierarchical indexing)或称为多级索引(multi-indexing)配合多个有不同等级的一级索引一起使用

## 多级索引

### 笨办法

In [None]:
index = [('California', 2000), ('California', 2010),
         ('New York', 2000), ('New York', 2010),
         ('Texas', 2000), ('Texas', 2010)]
populations = [33871648, 37253956,
               18976457, 19378102,
               20851820, 25145561]
pop = pd.Series(populations, index=index)
pop

(California, 2000)    33871648
(California, 2010)    37253956
(New York, 2000)      18976457
(New York, 2010)      19378102
(Texas, 2000)         20851820
(Texas, 2010)         25145561
dtype: int64

In [None]:
pop[('California',2010):('Texas',2000)]

(California, 2010)    37253956
(New York, 2000)      18976457
(New York, 2010)      19378102
(Texas, 2000)         20851820
dtype: int64

In [None]:
pop[[i for i in pop.index if i[1]==2010]]

(California, 2010)    37253956
(New York, 2010)      19378102
(Texas, 2010)         25145561
dtype: int64

### 好办法：Pandas多级索引

In [None]:
index = pd.MultiIndex.from_tuples(index)
index

MultiIndex([('California', 2000),
            ('California', 2010),
            (  'New York', 2000),
            (  'New York', 2010),
            (     'Texas', 2000),
            (     'Texas', 2010)],
           )

In [None]:
pop=pop.reindex(index)
pop

California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

In [None]:
pop[:,2010]

California    37253956
New York      19378102
Texas         25145561
dtype: int64

### 高维数据的多级索引

In [None]:
pop_df=pop.unstack()
pop
pop_df

California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

Unnamed: 0,2000,2010
California,33871648,37253956
New York,18976457,19378102
Texas,20851820,25145561


In [None]:
pop_df.stack()

California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

## 多级索引的创建方法

### 隐式创建 
- 将index参数设置为至少二维的数组
- 将元组作为键的字典

In [None]:
df=pd.DataFrame(np.random.rand(4,2),index=[['a','a','b','b'],[1,2,1,2]],columns=['data1','data2'])
df

Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.353225,0.704443
a,2,0.638047,0.823289
b,1,0.827483,0.307069
b,2,0.150272,0.339869


In [None]:
data = {('California', 2000): 33871648,
        ('California', 2010): 37253956,
        ('Texas', 2000): 20851820,
        ('Texas', 2010): 25145561,
        ('New York', 2000): 18976457,
        ('New York', 2010): 19378102}
pd.Series(data)

California  2000    33871648
            2010    37253956
Texas       2000    20851820
            2010    25145561
New York    2000    18976457
            2010    19378102
dtype: int64

### 显式的创建多级索引

In [None]:
pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]])
pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)])
pd.MultiIndex.from_product([['a', 'b'], [1, 2]])
pd.MultiIndex(levels=[['a', 'b'], [1, 2]],codes=[[0, 0, 1, 1], [0, 1, 0, 1]])#新版pandas的labels变为codes

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [None]:
x=pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]])
x.levels
x.codes


FrozenList([['a', 'b'], [1, 2]])

FrozenList([[0, 0, 1, 1], [0, 1, 0, 1]])

###多级索引的等级名称

In [None]:
pop.index.names=['state','year']
pop

state       year
California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

### 多级列索引

In [None]:
# hierarchical indices and columns
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],
                                   names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],
                                     names=['subject', 'type'])

# mock some data
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37

# create the DataFrame
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,46.0,37.8,52.0,37.3,28.0,36.2
2013,2,30.0,37.5,28.0,35.9,48.0,37.2
2014,1,25.0,37.1,37.0,37.2,52.0,37.3
2014,2,38.0,37.6,23.0,39.4,33.0,37.8


## 多级索引的取值和切片

### Series多级索引

In [None]:
pop
pop['California',2000]
pop[pop>22000000]

state       year
California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

33871648

state       year
California  2000    33871648
            2010    37253956
Texas       2010    25145561
dtype: int64

### Dataframe多级索引

###

In [None]:
health_data
health_data['Guido','HR']
health_data.iloc[:2,:2]

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,46.0,37.8,52.0,37.3,28.0,36.2
2013,2,30.0,37.5,28.0,35.9,48.0,37.2
2014,1,25.0,37.1,37.0,37.2,52.0,37.3
2014,2,38.0,37.6,23.0,39.4,33.0,37.8


year  visit
2013  1        52.0
      2        28.0
2014  1        37.0
      2        23.0
Name: (Guido, HR), dtype: float64

Unnamed: 0_level_0,subject,Bob,Bob
Unnamed: 0_level_1,type,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2
2013,1,46.0,37.8
2013,2,30.0,37.5


In [None]:
idx=pd.IndexSlice
health_data.loc[idx[:,1],idx[:,'HR']]

Unnamed: 0_level_0,subject,Bob,Guido,Sue
Unnamed: 0_level_1,type,HR,HR,HR
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2013,1,46.0,52.0,28.0
2014,1,25.0,37.0,52.0


## 多级索引行列转换

### 有序的索引和无序的索引
如果MultiIndex不是有序的索引，那么大多数切片操作都会失败

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3b7540ca-7a16-49df-8552-111430ed5259' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>