# 选择数据

首先生成数据以供使用：

In [21]:
import numpy as np
import pandas as pd

dates = pd.date_range('20200101', periods=6, freq='M')
dates

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30'],
              dtype='datetime64[ns]', freq='M')

In [22]:
df = pd.DataFrame(np.random.rand(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2020-01-31,0.545802,0.010205,0.176416,0.402558
2020-02-29,0.815496,0.838292,0.706108,0.002344
2020-03-31,0.707482,0.947923,0.264894,0.134783
2020-04-30,0.812991,0.122319,0.032697,0.339973
2020-05-31,0.621796,0.909555,0.484812,0.768949
2020-06-30,0.449415,0.031591,0.622136,0.109499


## 列选取

可以通过 df['A'] 选择单列A，得到一个 Series 对象，等同于 df.A：

In [29]:
df['A']

2020-01-31    0.545802
2020-02-29    0.815496
2020-03-31    0.707482
2020-04-30    0.812991
2020-05-31    0.621796
2020-06-30    0.449415
Freq: M, Name: A, dtype: float64

In [32]:
df[['A', 'D']]

Unnamed: 0,A,D
2020-01-31,0.545802,0.402558
2020-02-29,0.815496,0.002344
2020-03-31,0.707482,0.134783
2020-04-30,0.812991,0.339973
2020-05-31,0.621796,0.768949
2020-06-30,0.449415,0.109499


In [34]:
df[list('BC')]

Unnamed: 0,B,C
2020-01-31,0.010205,0.176416
2020-02-29,0.838292,0.706108
2020-03-31,0.947923,0.264894
2020-04-30,0.122319,0.032697
2020-05-31,0.909555,0.484812
2020-06-30,0.031591,0.622136


## 行选取

可以使用切片操作：

In [25]:
df[0:3]

Unnamed: 0,A,B,C,D
2020-01-31,0.545802,0.010205,0.176416,0.402558
2020-02-29,0.815496,0.838292,0.706108,0.002344
2020-03-31,0.707482,0.947923,0.264894,0.134783


In [26]:
df['20200201':'20200701']

Unnamed: 0,A,B,C,D
2020-02-29,0.815496,0.838292,0.706108,0.002344
2020-03-31,0.707482,0.947923,0.264894,0.134783
2020-04-30,0.812991,0.122319,0.032697,0.339973
2020-05-31,0.621796,0.909555,0.484812,0.768949
2020-06-30,0.449415,0.031591,0.622136,0.109499


## 按标签获取 df.loc

df.loc 要求接受参数必须为标签值，不能用位置序号：

In [35]:
df.loc[dates[0]]

A    0.545802
B    0.010205
C    0.176416
D    0.402558
Name: 2020-01-31 00:00:00, dtype: float64

In [37]:
df.loc[:, ['A', 'C']]

Unnamed: 0,A,C
2020-01-31,0.545802,0.176416
2020-02-29,0.815496,0.706108
2020-03-31,0.707482,0.264894
2020-04-30,0.812991,0.032697
2020-05-31,0.621796,0.484812
2020-06-30,0.449415,0.622136


In [41]:
df.loc['20200201': '20200501', ['B', 'C']]

Unnamed: 0,B,C
2020-02-29,0.838292,0.706108
2020-03-31,0.947923,0.264894
2020-04-30,0.122319,0.032697


In [42]:
df.loc['20200229', 'C']

0.7061081726568768

等同于：

In [44]:
df.at['20200229', 'C']

0.7061081726568768

## 按位置获取 df.iloc

df.iloc 可以接受位置作为参数：

In [48]:
df.iloc[3]

A    0.812991
B    0.122319
C    0.032697
D    0.339973
Name: 2020-04-30 00:00:00, dtype: float64

In [50]:
df.iloc[:, range(2, 4)]

Unnamed: 0,C,D
2020-01-31,0.176416,0.402558
2020-02-29,0.706108,0.002344
2020-03-31,0.264894,0.134783
2020-04-30,0.032697,0.339973
2020-05-31,0.484812,0.768949
2020-06-30,0.622136,0.109499


In [54]:
df.iloc[3, 3]

0.3399728599783527

等同于：

In [55]:
df.loc[dates[3], 'D']

0.3399728599783527

In [56]:
df.iat[3, 3]

0.3399728599783527