# numpy基础

```
NumPy is the fundamental package for scientific computing with Python. 

It contains among other things:

- a powerful N-dimensional array object
- sophisticated (broadcasting) functions
- tools for integrating C/C++ and Fortran code
- useful linear algebra, Fourier transform, and random number capabilities

Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data.

Arbitrary data-types can be defined. 

This allows NumPy to seamlessly and speedily integrate with a wide variety of databases.
```

废话不多说，numpy的重要性不言而喻，直接看代码。

In [1]:
import numpy as np

In [2]:
data = np.arange(12).reshape(3,4)

In [3]:
data

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

- 默认的操作是element wise的

In [4]:
data * 10

array([[  0,  10,  20,  30],
       [ 40,  50,  60,  70],
       [ 80,  90, 100, 110]])

In [5]:
data.shape

(3, 4)

In [6]:
data.dtype

dtype('int64')

In [7]:
data.ndim

2

- 布尔型数组用于索引

In [8]:
data[data<5] = 0
data

array([[ 0,  0,  0,  0],
       [ 0,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [9]:
data.T

array([[ 0,  0,  8],
       [ 0,  5,  9],
       [ 0,  6, 10],
       [ 0,  7, 11]])

In [10]:
np.dot(data.T,data)

array([[ 64,  72,  80,  88],
       [ 72, 106, 120, 134],
       [ 80, 120, 136, 152],
       [ 88, 134, 152, 170]])

## 一元ufunc：

In [11]:
np.sqrt(data)

array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  2.23606798,  2.44948974,  2.64575131],
       [ 2.82842712,  3.        ,  3.16227766,  3.31662479]])

In [12]:
np.exp(data)

array([[  1.00000000e+00,   1.00000000e+00,   1.00000000e+00,
          1.00000000e+00],
       [  1.00000000e+00,   1.48413159e+02,   4.03428793e+02,
          1.09663316e+03],
       [  2.98095799e+03,   8.10308393e+03,   2.20264658e+04,
          5.98741417e+04]])

## 二元ufunc

In [13]:
x = np.random.randn(8)
x

array([ 1.19911478, -0.87448992,  0.32508143, -0.29231038,  0.78727289,
       -0.57976679, -0.65701734, -0.03742968])

In [14]:
y = np.random.randn(8)
y

array([ 0.83002571, -0.11149519, -0.26294768, -0.18584947,  0.04441277,
       -1.46969178, -0.84550823, -1.481617  ])

In [15]:
z = np.power(x,y)
z

  if __name__ == '__main__':


array([ 1.16266997,         nan,  1.34375634,         nan,  0.98943356,
               nan,         nan,         nan])

## 三元ufunc where

In [16]:
x = np.array([1.1,1.2,1.3,1.4,1.5])
y = np.array([2.1,2.2,2.3,2.4,2.5])
cond = np.array([True,False,True,True,False])

In [17]:
result = np.where(cond,x,y)
result

array([ 1.1,  2.2,  1.3,  1.4,  2.5])

## 数学和统计方法

In [18]:
data = np.arange(12).reshape(3,4)
data

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [19]:
data.mean()

5.5

In [20]:
data.mean(axis=0)

array([ 4.,  5.,  6.,  7.])

In [21]:
data.mean(axis=1)

array([ 1.5,  5.5,  9.5])

In [22]:
data.sum(axis=1)

array([ 6, 22, 38])

## 累加

In [23]:
data.cumsum(0)

array([[ 0,  1,  2,  3],
       [ 4,  6,  8, 10],
       [12, 15, 18, 21]])

## 累乘

In [24]:
data.cumprod(0)

array([[  0,   1,   2,   3],
       [  0,   5,  12,  21],
       [  0,  45, 120, 231]])

## 用于布尔型数组的方法

In [25]:
arr = np.random.randn(100)
(arr>0).sum() #正值的数量

57

any 和 all ：

- any用于测试数组中是否存在一个或者多个True，
- all用于检测数组中所有值是否都是True

In [26]:
bools = np.array([False,False,True,False])
bools.any()

True

In [27]:
bools.all()

False

In [28]:
arr = np.random.randn(5,3)
arr

array([[ 0.3300801 , -0.1068226 , -0.45340813],
       [-1.70614802, -1.15734883, -0.45054381],
       [ 1.04189531, -0.40779194, -0.95224459],
       [-0.88946027,  1.61607895,  0.53116522],
       [ 0.35602213,  0.61704588, -0.63842772]])

In [29]:
arr.sort(1)
arr

array([[-0.45340813, -0.1068226 ,  0.3300801 ],
       [-1.70614802, -1.15734883, -0.45054381],
       [-0.95224459, -0.40779194,  1.04189531],
       [-0.88946027,  0.53116522,  1.61607895],
       [-0.63842772,  0.35602213,  0.61704588]])

## 线性代数

![]()

## 唯一化

找出数组中的唯一值，并且返回已经排序的结果。

In [30]:
names = np.array(['Bob','Bob','Cer',"Vivien","Cer"])
np.unique(names)

array(['Bob', 'Cer', 'Vivien'], 
      dtype='|S6')

## 文件读写

In [31]:
np.save("names",names)

In [32]:
np.load("names.npy")

array(['Bob', 'Bob', 'Cer', 'Vivien', 'Cer'], 
      dtype='|S6')

## 使用 np.c\_[] 和 np.r\_[] 分别添加行和列

In [33]:
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
b = np.ones(3)
c = np.arange(10,19).reshape(3,3)

In [34]:
np.c_[a,b] #添加列

array([[ 1.,  2.,  3.,  1.],
       [ 4.,  5.,  6.,  1.],
       [ 7.,  8.,  9.,  1.]])

In [35]:
np.r_[a,b] #添加行（错误）

ValueError: all the input arrays must have same number of dimensions

In [36]:
np.r_[a,b.reshape(1,3)] #添加行（正确），维度要一致，形状要对

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.],
       [ 7.,  8.,  9.],
       [ 1.,  1.,  1.]])

In [37]:
np.c_[a,b.reshape(3,1)]

array([[ 1.,  2.,  3.,  1.],
       [ 4.,  5.,  6.,  1.],
       [ 7.,  8.,  9.,  1.]])

## 使用insert

## 使用insert更简洁

In [38]:
np.insert(a, 0, values=b, axis=1)

array([[1, 1, 2, 3],
       [1, 4, 5, 6],
       [1, 7, 8, 9]])

In [39]:
np.insert(a, 1, values=b, axis=1)

array([[1, 1, 2, 3],
       [4, 1, 5, 6],
       [7, 1, 8, 9]])

In [40]:
np.insert(a, 0, values=b, axis=0)

array([[1, 1, 1],
       [1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])