# SciPy
## 官网
<a href='scipy'>https://scipy.org/</a>
## 特征
- 基于Python的软件生态系统
- 开源免费
- 主要为数学、科学和工程服务
## 包括
- Numpy
- SciPy library
- Matplotlib
- IPython
- Sympy
- pandas
## SciPy中的数据结构
- ndarray（N维数组）
- Series（变长字典）
- DataFrame（数据框）

# numpy

In [3]:
import numpy as np
from scipy import linalg

In [11]:
np.ones((3, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [14]:
np.zeros((4, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [8]:
np.arange(1, 5, 0.5)

array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

In [9]:
np.random.random((2, 2))

array([[0.35368919, 0.58049302],
       [0.19730932, 0.20716912]])

In [10]:
np.linspace(1, 2, 10, endpoint=False)

array([1. , 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9])

In [15]:
np.fromfunction(lambda i, j: (i + 1) * (j + 1), (9, 9))

array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 2.,  4.,  6.,  8., 10., 12., 14., 16., 18.],
       [ 3.,  6.,  9., 12., 15., 18., 21., 24., 27.],
       [ 4.,  8., 12., 16., 20., 24., 28., 32., 36.],
       [ 5., 10., 15., 20., 25., 30., 35., 40., 45.],
       [ 6., 12., 18., 24., 30., 36., 42., 48., 54.],
       [ 7., 14., 21., 28., 35., 42., 49., 56., 63.],
       [ 8., 16., 24., 32., 40., 48., 56., 64., 72.],
       [ 9., 18., 27., 36., 45., 54., 63., 72., 81.]])

In [16]:
arr = np.array([[1, 2], [3, 4]])

In [17]:
# 计算行列式
linalg.det(arr)

-2.0

In [18]:
# 计算秩
arr.ndim

2

In [19]:
# 计算维度
arr.shape

(2, 2)

In [20]:
# 元素个数
arr.size

4

In [21]:
x = np.array([(1, 2, 3), (4, 5, 6)])

In [22]:
x

array([[1, 2, 3],
       [4, 5, 6]])

In [23]:
x[1]

array([4, 5, 6])

In [25]:
x[0:1]

array([[1, 2, 3]])

In [26]:
x[:, [0, 1]]

array([[1, 2],
       [4, 5]])

In [27]:
x[1, [0, 1]]

array([4, 5])

In [28]:
for row in x:
    print(row)

[1 2 3]
[4 5 6]


In [30]:
# 选择第一列和第三列数据
x[:, ::2]

array([[1, 3],
       [4, 6]])

In [31]:
# 交换行
x[::-1]

array([[4, 5, 6],
       [1, 2, 3]])

In [32]:
# 交换列
x[:, ::-1]

array([[3, 2, 1],
       [6, 5, 4]])

In [33]:
x

array([[1, 2, 3],
       [4, 5, 6]])

In [34]:
y = np.arange(1, 17).reshape(4, 4)

In [35]:
y

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16]])

In [36]:
y.reshape(2, -1)

array([[ 1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16]])

In [37]:
y.reshape(-1, 1)

array([[ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16]])

In [38]:
# 数组的拼接
a = np.array([1, 3, 7])
b = np.array([3, 5, 8])

In [40]:
# 垂直方向拼接
np.vstack((a, b))

array([[1, 3, 7],
       [3, 5, 8]])

In [41]:
# 水平方向拼接
np.hstack((a, b))

array([1, 3, 7, 3, 5, 8])

In [42]:
# 广播
a = np.array([1, 2, 3])
b = np.array([[1, 2, 3], [4, 5, 6]])
a + b

array([[2, 4, 6],
       [5, 7, 9]])

In [43]:
10 + b

array([[11, 12, 13],
       [14, 15, 16]])

In [44]:
# ndarray的运算

In [45]:
b

array([[1, 2, 3],
       [4, 5, 6]])

In [46]:
b.sum()

21

In [47]:
# 按列求和
b.sum(axis=0)

array([5, 7, 9])

In [48]:
# 按行求和
b.sum(axis=1)

array([ 6, 15])

In [51]:
b.min()  #return value

1

In [52]:
b.argmax()  #return index

5

In [53]:
b.mean()

3.5

In [54]:
# 方差
b.var()

2.9166666666666665

In [55]:
# 标准差
b.std()

1.707825127659933

## ndarray的专门应用——线性代数
- dot           矩阵内积
- linalg.det    行列式
- linalg.inv    逆矩阵
- linalg.solve  多元一次方程组求根
- linalg.eig    求特征值和特征向量

In [56]:
# ndarray的ufunc函数
# 它是一种能对数组的每个元素进行操作的函数
# numpy内置的许多ufunc函数都是在C语言级别实现的，计算速度非常快

In [58]:
import time
import math
import numpy as np

In [79]:
x = np.arange(0, 100000, 0.01)
t_m1 = time.process_time()
for i, t in enumerate(x):
    x[i] = math.pow(math.sin(t), 2)
t_m2 = time.process_time()
print("Running time of math:", t_m2 - t_m1)

Running time of math: 3.65625


In [81]:
y = np.arange(0, 100000, 0.01)
t_n1 = time.process_time()
y = np.power(np.sin(y), 2)
t_n2 = time.process_time()
print("Running time of numpy:", t_n2 - t_n1)

Running time of numpy: 0.25


In [82]:
# numpy比math快20多倍
(t_m2 - t_m1) / (t_n2 - t_n1)

14.625

# Series
## 基本特征
- 类似一维数组的对象
- 由数据和索引组成

In [91]:
import pandas as pd
import numpy as np

In [84]:
aSer = pd.Series([1, 2.5, 'hello'])

In [85]:
aSer

0        1
1      2.5
2    hello
dtype: object

In [86]:
# 定义的同时指定索引
bSer = pd.Series(['apple', 'peach', 'lemon'], index=[1, 2, 3])

In [87]:
bSer

1    apple
2    peach
3    lemon
dtype: object

In [88]:
bSer.index

Int64Index([1, 2, 3], dtype='int64')

In [89]:
bSer.values

array(['apple', 'peach', 'lemon'], dtype=object)

In [92]:
cSer = pd.Series([3, 5, 7], index=['x', 'y', 'z'])

In [93]:
cSer

x    3
y    5
z    7
dtype: int64

In [94]:
cSer * 2

x     6
y    10
z    14
dtype: int64

In [95]:
# 计算自然对数的N次方
np.exp(cSer)

x      20.085537
y     148.413159
z    1096.633158
dtype: float64

In [141]:
# 生成边界值为1内部值为0的二维数组
X = np.ones((5, 5))

In [145]:
X[1:-1, 1:-1] = 0

In [146]:
X

array([[1., 1., 1., 1., 1.],
       [1., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 1.],
       [1., 1., 1., 1., 1.]])

In [149]:
Y = np.full((5, 4), np.pi)

In [150]:
Y

array([[3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265]])

In [152]:
np.full_like(Y, 4)

array([[4., 4., 4., 4.],
       [4., 4., 4., 4.],
       [4., 4., 4., 4.],
       [4., 4., 4., 4.],
       [4., 4., 4., 4.]])

In [155]:
# 如何创建一个单位数组
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [156]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [157]:
np.eye(5, k=1)

array([[0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0.]])

In [158]:
np.eye(5, k=-2)

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.]])

In [159]:
np.random.normal(0, 5, 100)

array([  1.47336667,   1.17066118,  -4.97667096,   6.40478348,
         4.39025923,   9.65439943,   6.73512225,  10.64249231,
         0.5775333 ,   9.550564  ,  -3.51488084,   8.04472325,
        -3.19288913,  -3.55859018,   1.42533455,   5.4175658 ,
         6.36862003,   4.95256141,  -0.09875305,   3.13474767,
         4.86940072,  -3.78952517,  -0.85558648,  -3.72735837,
         1.0517577 ,  -6.51980868,   2.81313743,   7.84782192,
         4.93883835,  -4.05675472,  -0.12147592,   3.39530644,
         6.84843046,  -2.30497406,  -7.57646156,  -6.75593844,
        12.98883817,  -4.73691193,   3.78134723,  -4.14730192,
        -0.83343165,   1.33120618,   4.07787771,   3.73322504,
        -6.264769  ,   6.2004356 ,   4.93315578,   2.61526557,
         2.7572439 ,   2.63192228,   3.44028212,   3.04523728,
         7.83501936,  -4.90368217,   4.04339044,   6.57764442,
         5.12855998,  -1.01968081,  -0.86519808,  -2.70667424,
        -0.45921414,  -5.22273776,   2.8934427 , -11.25

In [160]:
np.random.uniform(-5, 5, 100)

array([-1.55795512,  4.39217991, -3.19184825, -3.8880171 , -3.97841984,
        3.15122298,  4.13750021, -4.2996853 , -3.49935563, -1.51308651,
        2.1751786 , -2.10314232, -1.10139594,  4.83621725, -4.66751418,
        3.58207658,  1.20910848, -4.62267819, -2.09970491,  1.8352397 ,
       -1.27320509, -1.45073267, -4.7818354 ,  4.15527308, -1.88216739,
       -4.21609246,  4.76308204, -0.29052024, -3.58177452, -3.94143393,
        0.5671481 , -2.54105636, -0.31679373,  4.56221951, -3.10449413,
        0.00665411,  0.29707315,  4.70060747,  0.91483707, -3.80443879,
        1.27595124,  3.45725865,  2.95152108,  2.75046886, -2.85920764,
       -1.43080792, -2.62188122,  3.78623151,  3.23250328,  1.58690567,
       -4.08237802, -1.45244235,  1.26727424,  3.73740422, -4.48212489,
        4.32544318,  2.68789959, -0.05209099, -3.03660653, -1.46040904,
        3.20228528, -2.6705636 , -2.59800764,  2.42070296,  3.58922663,
       -1.3652033 , -3.09987617,  3.73056312,  3.77109051, -2.41

In [162]:
scores = np.array([[98, 100, 89], [88, 79, 99]])

In [164]:
# 矢量化运算和广播
scores_mean = scores.mean(axis=1, keepdims=True)

In [166]:
scores - scores_mean

array([[ 2.33333333,  4.33333333, -6.66666667],
       [-0.66666667, -9.66666667, 10.33333333]])

## Series的数据对齐
在运算中自动对齐不同索引的数据

In [108]:
data = {'AXP': 86.40, 'CSCO': 122.64, 'BA': 99.44}

In [101]:
sindex = ['AXP', 'CSCO', 'BA', 'AAPL']

In [109]:
dSer = pd.Series(data, index=sindex)

In [110]:
dSer

AXP      86.40
CSCO    122.64
BA       99.44
AAPL       NaN
dtype: float64

In [104]:
pd.isnull(dSer)

AXP     False
CSCO    False
BA      False
AAPL     True
dtype: bool

In [111]:
eSer = pd.Series({'AXP': 86.40, 'CSCO': 122.64, 'CVX': 23.78})

In [116]:
eSer

AXP      86.40
CSCO    122.64
CVX      23.78
dtype: float64

In [117]:
dSer + eSer

AAPL       NaN
AXP     172.80
BA         NaN
CSCO    245.28
CVX        NaN
dtype: float64

# DataFrame
## 基本特征
- 一个表格型的数据结构
- 含有一组有序的列（类似于index）
- 大致可看成共享同一个index的Series集合

In [118]:
data = {'name': ['ZhangSan', 'LiSi', 'WangWu'], 'pay': [4000, 5000, 6000]}

In [119]:
frame = pd.DataFrame(data)

In [120]:
frame

Unnamed: 0,name,pay
0,ZhangSan,4000
1,LiSi,5000
2,WangWu,6000


In [121]:
data = np.array([('ZhangSan', 4000), ('LiSi', 5000), ('WangWu', 6000)])

In [122]:
frame = pd.DataFrame(data, index=range(1, 4), columns=['name', 'pay'])

In [123]:
frame

Unnamed: 0,name,pay
1,ZhangSan,4000
2,LiSi,5000
3,WangWu,6000


In [125]:
frame['name']

1    ZhangSan
2        LiSi
3      WangWu
Name: name, dtype: object

In [126]:
frame.pay

1    4000
2    5000
3    6000
Name: pay, dtype: object

In [127]:
# 选择第0行和第1行的第1列
frame.iloc[:2, 1]

1    4000
2    5000
Name: pay, dtype: object

In [128]:
# DataFrame对象的修改和删除
frame['name'] = 'admin'

In [129]:
frame

Unnamed: 0,name,pay
1,admin,4000
2,admin,5000
3,admin,6000


In [130]:
del frame['pay']

In [131]:
frame

Unnamed: 0,name
1,admin
2,admin
3,admin


In [133]:
frame = pd.DataFrame({'name': ['ZhangSan', 'LiSi', 'WangWu'], 'pay': [4000, 5000, 6000]})

In [134]:
frame

Unnamed: 0,name,pay
0,ZhangSan,4000
1,LiSi,5000
2,WangWu,6000


In [136]:
# DataFrame对象成员找出最低工资和高工资人群信息
frame['pay'].min()

4000

In [137]:
frame[frame.pay >= 5000]

Unnamed: 0,name,pay
1,LiSi,5000
2,WangWu,6000
