In [1]:
import numpy as np
import pandas as pd

## 序列

In [2]:
obj = pd.Series(np.arange(4.),index=['a','b','c','d'])
print(obj)

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64


In [3]:
# 用Series的index索引
print(obj['b'])

1.0


In [4]:
# 用数字索引
print(obj[1])

1.0


In [5]:
obj2 = pd.Series(np.arange(4.),index=np.arange(1,5))
print(obj2)

1    0.0
2    1.0
3    2.0
4    3.0
dtype: float64


<font color="red" size=5>注意，若Series索引为数字时，Series索引优先级高于默认索引</font>

In [6]:
print(obj2[1])

0.0


In [7]:
# 切片
print(obj[2:4])

c    2.0
d    3.0
dtype: float64


In [8]:
print(obj[['b','a','d']])

b    1.0
a    0.0
d    3.0
dtype: float64


In [9]:
# 利用布尔值索引
print(obj[obj<2])

a    0.0
b    1.0
dtype: float64


<font color="red" size=6>Series的索引为闭区间</font>

In [10]:
print(obj['a':'c'])

a    0.0
b    1.0
c    2.0
dtype: float64


## DataFrame

In [11]:
data = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['Ohio','Colorado','Utah','New York'],
                    columns=['one','two','three','four'])
print(data)

          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


In [12]:
# 列索引
print(data['two'])

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32


In [13]:
# 选择某几行，左闭右开
print(data[1:2])

          one  two  three  four
Colorado    4    5      6     7


In [14]:
# 根据某一列的数据提取信息
print(data[data['three']>5])

          one  two  three  four
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


## 使用loc, iloc选择数据
+ loc（轴标签），iloc（整数标签）

In [15]:
# loc
res = data.loc['Colorado',['two','three']]
print(res)

two      5
three    6
Name: Colorado, dtype: int32


In [16]:
# iloc
res = data.iloc[1,[1,2]]
print(res)

two      5
three    6
Name: Colorado, dtype: int32


DataFrame的其他索引选项见P143

## 通过map修改一列中的一些值

In [17]:
data = pd.DataFrame(np.array([[1,2,3],[1,5,6],[1,8,9],[2,5,1],[3,7,3]]), columns=['a','b','c'])
data

Unnamed: 0,a,b,c
0,1,2,3
1,1,5,6
2,1,8,9
3,2,5,1
4,3,7,3


In [18]:
# 传字典
data['a'] = data['a'].map({1:10,2:20,3:30})
data

Unnamed: 0,a,b,c
0,10,2,3
1,10,5,6
2,10,8,9
3,20,5,1
4,30,7,3


In [19]:
# 传匿名函数
data['b'] = data['b'].map(lambda x: x*2)
data

Unnamed: 0,a,b,c
0,10,4,3
1,10,10,6
2,10,16,9
3,20,10,1
4,30,14,3


### 从DataFrame中取某一行某一列的数据
<font color="red">注意，默认选择的这一列没有重复值的时候才可以获取唯一数据</font>

In [20]:
# 选取这一行的数据
cut = data[data['b'] == 4]
# 由于现在这个切片里只有一行数据，选择它的第一行之后选择所在的列
result = cut.iloc[0].at['a']
print(result)

10
