In [1]:
import numpy as np
import pandas as pd

## 一、索引器
## 1.表的列索引

In [2]:
df = pd.read_csv('learn_pandas.csv',usecols=['School','Grade','Name','Gender','Weight','Transfer'])
df.head()   # uescols 仅读取列表里的这些列

Unnamed: 0,School,Grade,Name,Gender,Weight,Transfer
0,Shanghai Jiao Tong University,Freshman,Gaopeng Yang,Female,46.0,N
1,Peking University,Freshman,Changqiang You,Male,70.0,N
2,Shanghai Jiao Tong University,Senior,Mei Sun,Male,89.0,N
3,Fudan University,Sophomore,Xiaojuan Sun,Female,41.0,N
4,Fudan University,Sophomore,Gaojuan You,Male,74.0,N


In [4]:
df['Name'].head()

0      Gaopeng Yang
1    Changqiang You
2           Mei Sun
3      Xiaojuan Sun
4       Gaojuan You
Name: Name, dtype: object

In [5]:
df[['Name','Gender']].head()

Unnamed: 0,Name,Gender
0,Gaopeng Yang,Female
1,Changqiang You,Male
2,Mei Sun,Male
3,Xiaojuan Sun,Female
4,Gaojuan You,Male


In [6]:
df.Name.head()

0      Gaopeng Yang
1    Changqiang You
2           Mei Sun
3      Xiaojuan Sun
4       Gaojuan You
Name: Name, dtype: object

## 2.序列的行索引
#### 【a】以字符串为索引的Series

In [7]:
s = pd.Series([1,2,3,4,5,6],index = ['a','b','a','a','a','c'])
s

a    1
b    2
a    3
a    4
a    5
c    6
dtype: int64

In [8]:
s['a']

a    1
a    3
a    4
a    5
dtype: int64

In [9]:
s['b']

2

In [11]:
s[['b','c']]

b    2
c    6
dtype: int64

In [15]:
s['c':'b':-2] # -2 step 步长为-2，表示从右开始，跳过一个 

c    6
a    4
b    2
dtype: int64

#### 【b】以整数为索引的Series

In [16]:
s = pd.Series(['a','b','c','d','e','f'],index = [1,3,1,2,5,4])
s

1    a
3    b
1    c
2    d
5    e
4    f
dtype: object

In [17]:
d = pd.Series(['a','b','c','d','e','f'])
d

0    a
1    b
2    c
3    d
4    e
5    f
dtype: object

In [18]:
s[1]

1    a
1    c
dtype: object

In [19]:
s[[2,3]]

2    d
3    b
dtype: object

In [22]:
s[1:-2:2]

3    b
2    d
dtype: object

In [23]:
s[1:-2]

3    b
1    c
2    d
dtype: object

## 3.loc索引器

In [3]:
df_demo = df.set_index('Name')  # 将 Name 列设为索引
df_demo.head()

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gaopeng Yang,Shanghai Jiao Tong University,Freshman,Female,46.0,N
Changqiang You,Peking University,Freshman,Male,70.0,N
Mei Sun,Shanghai Jiao Tong University,Senior,Male,89.0,N
Xiaojuan Sun,Fudan University,Sophomore,Female,41.0,N
Gaojuan You,Fudan University,Sophomore,Male,74.0,N


#### 【a】*为单个元素

In [5]:
df_demo.loc['Qiang Sun']   #多人叫此名字

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Qiang Sun,Tsinghua University,Junior,Female,53.0,N
Qiang Sun,Tsinghua University,Sophomore,Female,40.0,N
Qiang Sun,Shanghai Jiao Tong University,Junior,Female,,N


In [6]:
df_demo.loc['Quan Zhao']  # 名字唯一，为Series

School      Shanghai Jiao Tong University
Grade                              Junior
Gender                             Female
Weight                                 53
Transfer                                N
Name: Quan Zhao, dtype: object

In [7]:
df_demo.loc['Qiang Sun','School'] # 返回的Series

Name
Qiang Sun              Tsinghua University
Qiang Sun              Tsinghua University
Qiang Sun    Shanghai Jiao Tong University
Name: School, dtype: object

In [8]:
df_demo.loc['Quan Zhao','School']  # 返回单个元素

'Shanghai Jiao Tong University'

#### 【b】 * 为元素列表

In [9]:
df_demo.loc[['Qiang Sun','Quan Zhao'],['School','Gender']]

Unnamed: 0_level_0,School,Gender
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Qiang Sun,Tsinghua University,Female
Qiang Sun,Tsinghua University,Female
Qiang Sun,Shanghai Jiao Tong University,Female
Quan Zhao,Shanghai Jiao Tong University,Female


#### 【c】*为切片

In [10]:
df_demo.loc['Gaojuan You':'Gaoqiang Qian','School':'Gender']

Unnamed: 0_level_0,School,Grade,Gender
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Gaojuan You,Fudan University,Sophomore,Male
Xiaoli Qian,Tsinghua University,Freshman,Female
Qiang Chu,Shanghai Jiao Tong University,Freshman,Female
Gaoqiang Qian,Tsinghua University,Junior,Female


In [18]:
df_loc_slice_demo = df_demo.copy() #深拷贝，对原数据不产生影响
df_loc_slice_demo.index = range(df_demo.shape[0],0,-1) #包头不包尾
df_loc_slice_demo.loc[5:1]

Unnamed: 0,School,Grade,Gender,Weight,Transfer
5,Fudan University,Junior,Female,46.0,N
4,Tsinghua University,Senior,Female,50.0,N
3,Shanghai Jiao Tong University,Senior,Female,45.0,N
2,Shanghai Jiao Tong University,Senior,Male,71.0,N
1,Tsinghua University,Sophomore,Male,51.0,N


In [16]:
df_loc_slice_demo.loc[5:0]   # 若超过， 取最大子集

Unnamed: 0,School,Grade,Gender,Weight,Transfer
5,Fudan University,Junior,Female,46.0,N
4,Tsinghua University,Senior,Female,50.0,N
3,Shanghai Jiao Tong University,Senior,Female,45.0,N
2,Shanghai Jiao Tong University,Senior,Male,71.0,N
1,Tsinghua University,Sophomore,Male,51.0,N


#### 【d】*为布尔列表

In [19]:
df_demo.loc[df_demo.Weight>70].head()   #注意传入的写法，df_demo.Weight>70，是布尔列表,筛选体重大于70的数据

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Mei Sun,Shanghai Jiao Tong University,Senior,Male,89.0,N
Gaojuan You,Fudan University,Sophomore,Male,74.0,N
Xiaopeng Zhou,Shanghai Jiao Tong University,Freshman,Male,74.0,N
Xiaofeng Sun,Tsinghua University,Senior,Male,71.0,N
Qiang Zheng,Shanghai Jiao Tong University,Senior,Male,87.0,N


In [20]:
df_demo.loc[df_demo.Grade.isin(['Freshman','Senior'])].head() # 不难但要记住该用法

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gaopeng Yang,Shanghai Jiao Tong University,Freshman,Female,46.0,N
Changqiang You,Peking University,Freshman,Male,70.0,N
Mei Sun,Shanghai Jiao Tong University,Senior,Male,89.0,N
Xiaoli Qian,Tsinghua University,Freshman,Female,51.0,N
Qiang Chu,Shanghai Jiao Tong University,Freshman,Female,52.0,N


In [21]:
condition_1_1 = df_demo.School =='Fudan University'
condition_1_2 = df_demo.Grade == 'Senior'
condition_1_3 = df_demo.Weight>70
condition_1 = condition_1_1 & condition_1_2 & condition_1_3
condition_2_1 = df_demo.School == 'Peking University'
condition_2_2 = df_demo.Grade == 'Senior'
condition_2_3 = df_demo.Weight > 80
condition_2 = condition_2_1 & (~condition_2_2) & condition_2_3
df_demo.loc[condition_1 | condition_2]

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Qiang Han,Peking University,Freshman,Male,87.0,N
Chengpeng Zhou,Fudan University,Senior,Male,81.0,N
Changpeng Zhao,Peking University,Freshman,Male,83.0,N
Chengpeng Qian,Fudan University,Senior,Male,73.0,Y


#### 【e】*为函数

In [24]:
def condition(x):
    condition_1_1 = x.School =='Fudan University'
    condition_1_2 = x.Grade == 'Senior'
    condition_1_3 = x.Weight>70
    condition_1 = condition_1_1 & condition_1_2 & condition_1_3
    condition_2_1 = x.School == 'Peking University'
    condition_2_2 = x.Grade == 'Senior'
    condition_2_3 = x.Weight > 80
    condition_2 = condition_2_1 & (~condition_2_2) & condition_2_3
    result = condition_1 | condition_2
    return result
df_demo.loc[condition]    

Unnamed: 0_level_0,School,Grade,Gender,Weight,Transfer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Qiang Han,Peking University,Freshman,Male,87.0,N
Chengpeng Zhou,Fudan University,Senior,Male,81.0,N
Changpeng Zhao,Peking University,Freshman,Male,83.0,N
Chengpeng Qian,Fudan University,Senior,Male,73.0,Y
