In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

### 3.2.1 Series对象
Series是类似数组的一维数据结构，由index索引数组和value值数组组成。
创建Series方法如下:
`Series([data, index, ...])` 无index则默认从0开始

In [2]:
# 创建5名球员身高的Series对象height,值是身高,索引为球衣号码
height=Series([187,190,185,178,185], ['13','14','7','2','9'])
height

13    187
14    190
7     185
2     178
9     185
dtype: int64

In [3]:
# Series对象与字典类型类似，可将index和values数组中序号相同的元素视为字典的键-值对
# 用字典创建Series对象，将字典的key作为索引
height1=Series({'13':187,'14':190,'7':185,'2':178,'9':185})
height1

13    187
14    190
7     185
2     178
9     185
dtype: int64

### 3.2.2 Series数据访问
基于索引名: `obj[index(List)]`  
基于位置: `obj[loc(List)]` `obj[a:b, c,d]`  
条件筛选: `obj[cond]`
思考：如果index本身为数字类型,则索引名位置均为数字，哪种访问会被覆盖?

#### 查询身高

In [4]:
height['13']

187

In [5]:
height[['13','2','7']]

13    187
2     178
7     185
dtype: int64

In [6]:
height[1:3]

14    190
7     185
dtype: int64

In [7]:
height[ height.values >= 186 ]

13    187
14    190
dtype: int64

#### 修改身高

In [8]:
height['13']=188
height['13']

188

In [9]:
height[1:3]=160
height

13    188
14    160
7     160
2     178
9     185
dtype: int64

#### 增加球员
Series不能直接添加新数据，需将新增数据单独创建为一个Series对象，然后用原对象.append()产生新的Series对象

In [10]:
a = Series([190,187], ['23','5'])
newheight = height.append(a)
newheight

13    188
14    160
7     160
2     178
9     185
23    190
5     187
dtype: int64

In [11]:
height

13    188
14    160
7     160
2     178
9     185
dtype: int64

#### 删除球员
Series的drop()函数不删除原始对象的数据

In [12]:
newheight = height.drop(['13','9'])
newheight

14    160
7     160
2     178
dtype: int64

#### 更改球衣号码(索引)

In [13]:
height.index = [1,2,3,4,5]
height

1    188
2    160
3    160
4    178
5    185
dtype: int64

**当索引是数字类型时，基于位置的访问需要使用iloc[]**

In [14]:
height=Series([187,190,185,178,185],[13,14,7,2,9])
print(height)
print(height[ [14,7] ])
print(height.iloc[0])

13    187
14    190
7     185
2     178
9     185
dtype: int64
14    190
7     185
dtype: int64
187


### 3.2.3 DataFrame对象
Series是类似表格的二维数据结构，包括values值,index行索引和columns三部分。
值由ndarray的二维数组对象构成,行、列索引则保存为ndarray一维数组
创建DataFrame方法如下:
`Series([data, index=[...], columns=[...])`

In [15]:
data=[[19,170,168],[20,165,65],[18,175,65]]
students=DataFrame(data,index=[1,2,3],columns=['age','height','weight'])
students

Unnamed: 0,age,height,weight
1,19,170,168
2,20,165,65
3,18,175,65


### 3.2.4 DataFrame数据访问
类似ndarray二维数组，可通过值的位置序号获取，也可通过索引访问

#### 学生信息查询

In [16]:
students.loc[1,'age']

19

In [17]:
students.loc[[1,3],['height','weight']]

Unnamed: 0,height,weight
1,170,168
3,175,65


In [18]:
students.iloc[[0,2],[0,1]]

Unnamed: 0,age,height
1,19,170
3,18,175


In [19]:
students.loc[:,['height','weight']]

Unnamed: 0,height,weight
1,170,168
2,165,65
3,175,65


In [20]:
students[['height','weight']]

Unnamed: 0,height,weight
1,170,168
2,165,65
3,175,65


In [21]:
students.iloc[1:, 0:2]

Unnamed: 0,age,height
2,20,165
3,18,175


In [22]:
students[1:3]

Unnamed: 0,age,height,weight
2,20,165,65
3,18,175,65


In [23]:
mask=students['height']>=168
print(mask)
students.loc[mask, ['height','weight']]

1     True
2    False
3     True
Name: height, dtype: bool


Unnamed: 0,height,weight
1,170,168
3,175,65


#### 增加学生信息
DataFrame对象可以添加新列，但不能直接增加新行。
当新增的列索引标签不存在时，添加新列；若存在则修改列值。
增加行需要通过两个对象的合并实现

In [24]:
students['expense']=[1500,1600,1200]

#### 修改学生信息

In [25]:
students['expense']=1000
students

Unnamed: 0,age,height,weight,expense
1,19,170,168,1000
2,20,165,65,1000
3,18,175,65,1000


In [26]:
students.loc[1, :]=[21,188,70,20]

In [27]:
students.loc[students['expense']<500, 'expense']=1200
students

Unnamed: 0,age,height,weight,expense
1,21,188,70,1200
2,20,165,65,1000
3,18,175,65,1000


#### 删除学生信息
drop()通过参数axis指明按照行或列删除,且不修改原始对象的数据

In [28]:
students.drop(1, axis=0) # axis=0表示行

Unnamed: 0,age,height,weight,expense
2,20,165,65,1000
3,18,175,65,1000


In [29]:
students.drop('expense', axis=1) #axis=1表示列

Unnamed: 0,age,height,weight
1,21,188,70
2,20,165,65
3,18,175,65


In [30]:
students.drop([1,2], axis=0) # axis=0表示行

Unnamed: 0,age,height,weight,expense
3,18,175,65,1000


In [31]:
# 若需直接删除,使用参数inplace=True
students.drop(['age','weight'], axis=1, inplace=True)