# Chap03 索引

In [1]:
import numpy as np
import pandas as pd

## 索引器
### **表**的列索引
1. 取出单列：通过`df[列名]`，返回值为Series
   - 如果列名不包含空格，也可以通过`df.列名`取出
2. 取出多列：通过`df[多个列名组成的列表]`，返回值为DataFrame
### **序列**的行索引
1. 取出单个索引的对应元素：`s[item]`，如果只有一个值，返回标量，如果有多个值，返回一个Series
2. 取出多个索引的对应元素：`s[items的列表]`
3. 切片`s[n1:n2:n3]`，如果前后端点的值重复出现，需要经过排序`sort_index`才能使用切片
### loc索引器
1. 基于元素的loc索引器：`loc[*,*]`
   - 第一个\*代表行的选择，第二个\*代表列的选择。如果第二个\*省略，则只筛选行
   - \*的位置有五类合法的对象：单个元素、元素列表、元素切片、布尔列表以及函数
   - 这里的函数必须以前面的四种合法形式之一为返回值，并且函数的输入值为DataFrame本身；也支持**lambda**表达式
   - 函数无法返回如start:end:step的切片形式，返回切片时要用slice对象进行包装
   - Series也可以使用loc索引
2. 基于位置的iloc索引器：`iloc[*,*]`
   - iloc的使用与loc完全类似，只不过是针对位置进行筛选
   - \*的位置有五类合法对象：整数、整数列表、整数切片、布尔列表以及函数
   -  Series也可以使用iloc索引
### **注意**
1. 不要使用链式赋值，在对表或者序列赋值时，应当在使用一层索引器后直接进行赋值操作，这样做是由于进行多次索引后赋值是赋在临时返回的copy副本上，而没有真正修改元素
   ```python
   df_chain = pd.DataFrame([[0,0],[1,0],[-1,0]], columns = list('AB'))
   df_chain[df_chain.A!=0].B=1
   # 使用方括号列索引后，再使用点的列索引
   ```


In [2]:
df = pd.read_csv('./data/learn_pandas.csv', usecols = ['School', 'Grade', 'Name', 'Gender', 'Weight', 'Transfer'])
df.head()

Unnamed: 0,School,Grade,Name,Gender,Weight,Transfer
0,Shanghai Jiao Tong University,Freshman,Gaopeng Yang,Female,46.0,N
1,Peking University,Freshman,Changqiang You,Male,70.0,N
2,Shanghai Jiao Tong University,Senior,Mei Sun,Male,89.0,N
3,Fudan University,Sophomore,Xiaojuan Sun,Female,41.0,N
4,Fudan University,Sophomore,Gaojuan You,Male,74.0,N


In [3]:
df_demo = df.set_index('Name')
df_demo.head()

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gaopeng Yang,Shanghai Jiao Tong University,Freshman,Female,46.0,N
Changqiang You,Peking University,Freshman,Male,70.0,N
Mei Sun,Shanghai Jiao Tong University,Senior,Male,89.0,N
Xiaojuan Sun,Fudan University,Sophomore,Female,41.0,N
Gaojuan You,Fudan University,Sophomore,Male,74.0,N


In [7]:
df_demo.loc[df_demo.Weight>70,'School'].head()

Name
Mei Sun          Shanghai Jiao Tong University
Gaojuan You                   Fudan University
Xiaopeng Zhou    Shanghai Jiao Tong University
Xiaofeng Sun               Tsinghua University
Qiang Zheng      Shanghai Jiao Tong University
Name: School, dtype: object

In [8]:
df_demo.loc[df_demo.Grade.isin(['Freshman','Senior'])].head()

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gaopeng Yang,Shanghai Jiao Tong University,Freshman,Female,46.0,N
Changqiang You,Peking University,Freshman,Male,70.0,N
Mei Sun,Shanghai Jiao Tong University,Senior,Male,89.0,N
Xiaoli Qian,Tsinghua University,Freshman,Female,51.0,N
Qiang Chu,Shanghai Jiao Tong University,Freshman,Female,52.0,N


In [11]:
# 复合条件：|或、&且、~取反
condition = df.dtypes=='object'
df.loc[:,~condition]

Unnamed: 0,Weight
0,46.0
1,70.0
2,89.0
3,41.0
4,74.0
...,...
195,46.0
196,50.0
197,45.0
198,71.0


In [12]:
# *为函数的例子
def condition(x):
    condition_1_1 = x.School == 'Fudan University'
    condition_1_2 = x.Grade == 'Senior'
    condition_1_3 = x.Weight > 70
    condition_1 = condition_1_1 & condition_1_2 & condition_1_3
    condition_2_1 = x.School == 'Peking University'
    condition_2_2 = x.Grade == 'Senior'
    condition_2_3 = x.Weight > 80
    condition_2 = condition_2_1 & (~condition_2_2) & condition_2_3
    result = condition_1 | condition_2
    return result
df_demo.loc[condition]

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Qiang Han,Peking University,Freshman,Male,87.0,N
Chengpeng Zhou,Fudan University,Senior,Male,81.0,N
Changpeng Zhao,Peking University,Freshman,Male,83.0,N
Chengpeng Qian,Fudan University,Senior,Male,73.0,Y


In [13]:
df_demo.loc[lambda x:'Quan Zhao', lambda x:'Gender']

'Female'

In [14]:
df_demo.loc[lambda x: slice('Gaojuan You', 'Gaoqiang Qian')]

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gaojuan You,Fudan University,Sophomore,Male,74.0,N
Xiaoli Qian,Tsinghua University,Freshman,Female,51.0,N
Qiang Chu,Shanghai Jiao Tong University,Freshman,Female,52.0,N
Gaoqiang Qian,Tsinghua University,Junior,Female,50.0,N


In [15]:
df_demo.iloc[1,1]

'Freshman'

In [16]:
df_demo.iloc[[0,1],[0,1]]

Unnamed: 0_level_0,School,Grade
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Gaopeng Yang,Shanghai Jiao Tong University,Freshman
Changqiang You,Peking University,Freshman


In [17]:
df_demo.iloc[1:4, 2:4]

Unnamed: 0_level_0,Gender,Weight
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Changqiang You,Male,70.0
Mei Sun,Male,89.0
Xiaojuan Sun,Female,41.0


In [18]:
df_demo.iloc[lambda x: slice(1,4)]

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Changqiang You,Peking University,Freshman,Male,70.0,N
Mei Sun,Shanghai Jiao Tong University,Senior,Male,89.0,N
Xiaojuan Sun,Fudan University,Sophomore,Female,41.0,N


In [22]:
# 在使用布尔列表的时候要注意，不能传入Series而必须传入序列的Values，因此在使用布尔筛选的时候还是应当优先考虑loc
df_demo.iloc[(df_demo.Weight>80).values].head()

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Mei Sun,Shanghai Jiao Tong University,Senior,Male,89.0,N
Qiang Zheng,Shanghai Jiao Tong University,Senior,Male,87.0,N
Qiang Han,Peking University,Freshman,Male,87.0,N
Chengpeng Zhou,Fudan University,Senior,Male,81.0,N
Feng Han,Shanghai Jiao Tong University,Sophomore,Male,82.0,N
