# 1 install

> numpy + mkl(intel math kernel library)

# 2 start

## 2.1 numpy核心－多维数组

In [7]:
import datetime as dt
import numpy as np


def py_sum(n):
    L1 = [i ** 2 for i in range(n)]
    L2 = [i ** 3 for i in range(n)]
    L3 = []
    for i in range(n):
        L3.append(L1[i] + L2[i])
    return L3


def np_sum(n):
    return np.arange(n) ** 2 + np.arange(n) ** 3


def main():
    start1 = dt.datetime.now()
    py_sum(100000)
    end1 = dt.datetime.now()
    print('py-->', (end1 - start1).microseconds)

    start2 = dt.datetime.now()
    np_sum(100000)
    end2 = dt.datetime.now()
    print('np-->', (end2 - start2).microseconds)


main()


py--> 102560
np--> 1614


## 2.2 numpy基础

### 2.2.1 数组对象

- `numpy`的数组是`numpy.ndarray`类型的对象，其中包括**实际数据**，以及描述这些数据的**元数据**，大部分针对数组的操作实际上仅仅是对其**元数据**的操作，以此提升性能．
- `numpy`数组的元素必须是同质的，即**类型**完全一致．
- `numpy`数组对象的`dtype`属性表示元素的**数据类型**．
- `numpy`数组对象的`shape`属性表示数组的**维度**,return tuple．


> `arange([start, ]stop[, step,], dtype=None)`

In [12]:
# 一维
one_dim = np.arange(1, 5)
print(one_dim)
print(type(one_dim))
print(one_dim.dtype)
print(one_dim.shape)

[1 2 3 4]
<class 'numpy.ndarray'>
int64
(4,)


In [17]:
# 二维
two_dim = np.array(
    [np.arange(1, 5),
    np.arange(6, 10)]
)
print(two_dim)
print(type(two_dim))
print(two_dim.dtype)
print(two_dim.shape)

[[1 2 3 4]
 [6 7 8 9]]
<class 'numpy.ndarray'>
int64
(2, 4)


In [25]:
# 三维
# three_dim = np.array([
#     [
#         np.arange(1, 5),
#         np.arange(6, 10),
#         np.arange(11, 15)
#     ],
#     [
#         np.arange(16, 20),
#         np.arange(21, 25),
#         np.arange(26, 30)
#     ]
# ])

# three_dim = np.arange(1,25, dtype=float).reshape(2, 3, 4)

three_dim = np.arange(1,25).reshape(2, 3, 4).astype(float)
print(three_dim)
print(type(three_dim))
print(three_dim.dtype)
print(three_dim.shape)

[[[  1.   2.   3.   4.]
  [  5.   6.   7.   8.]
  [  9.  10.  11.  12.]]

 [[ 13.  14.  15.  16.]
  [ 17.  18.  19.  20.]
  [ 21.  22.  23.  24.]]]
<class 'numpy.ndarray'>
float64
(2, 3, 4)


### 2.2.2 下标访问

- dim[x, y, z, ...]

In [29]:
# 读取多维数组

three_dim = np.arange(1,25).reshape(2, 3, 4).astype(float)
sh = three_dim.shape
for i in range(sh[0]):
    for j in range(sh[1]):
        for k in range(sh[2]):
            print('{:4}'.format(three_dim[i, j, k]), end=' ')
        print()
    print()

 1.0  2.0  3.0  4.0 
 5.0  6.0  7.0  8.0 
 9.0 10.0 11.0 12.0 

13.0 14.0 15.0 16.0 
17.0 18.0 19.0 20.0 
21.0 22.0 23.0 24.0 



### 2.2.3 数据类型

1. python内置类型
2. numpy内置类型
    - `bool_`
    - `int8/int16/int32/int64      **eg: int16:[-32768, 32768]**`
    - `uint8/uint16/uint32/uint64  **eg:  uint16:[0, 65535]**`
    - `float16/float32/float64`
    - `complex64/complex128`
    - `str_`
3. 自定义类型（numpy.dtype）
    - `t = numpy.dtype(T)`
        - **T**:
            1. python或numpy的内置类型
                - `t = numpy.dtype(int)` **or** `t = numpy.dtype(numpy.int32)`
            2. 类型字符串
                - `t = numpy.dtype('int')` **or** `t = numpy.dtype('int32')`
            3. 类型字符编码（dtype.str）
                - `t = numpy.dtype('>(2,3)4i4')`
                    - `>     : 大端字节序`
                    - `(2, 3): 维度`
                    - `4     : 分量数`
                    - `i     : 分量类型`
                    - `4     : 分量字节数`
            4. (变长类型, 长度)
                - `t = numpy.dtype((numpy.str_, 14))`
            5. (定长类型, 维度)
                - `t = numpy.dtype((int, 5))`
                - `t = numpy.dtype((int, 3), 2)`
            6. 逗号分隔的多个类型字符串
                - `t = numpy.dtype('U14', i4)`
            7. [(名称，类型，维度),(),...]
                - `t = numpy.dtype([('name'), ()])`
    - byteorder
        - `=` : native
        - `<` : little-endin
        - `>` : big-endin
        - `|` : not applicable
    - kind
        - b : boolean
        - i : signed integer
        - u : unsigned integer
        - f : floating-point
        - c : complex floating-point
        - m : timedelta
        - M : datetime
        - O : object
        - S : (byte-)string
        - U : Unicode
        - V : void
4. 切片
5. 改变维度（元数据，描述性数据）
    - 视图变维
        - `reshape()`, `ravel()`
    - 复制变维
        - `flatten() # 会复制数据`
    - 就地变维
        - `a.shape = (x, x) # 直接对属性赋值`
        - `a.resize((x, y))`
    - 视图转置
        - `a.transpose()`
6. 组合
    - 垂直组合
        - `v = numpy.vstack((a, b))`
    - 水平组合
        - `v = numpy.hstack((a, b))`
        - `v = numpy.dstack((a, b)) # `
    - 行组合(对于一维操作)
        - `r = numpy.row_stack((a, b))`
    - 列组合(一维)
        - `r = numpy.column_stack(a, b)`
7. 分割
    - 垂直分割
        - `a, b = numpy.vsplit(v, 2)`
    - 水平分割
        - `a, b, c = numpy.hsplit(v, 3)`
    - 深度分割
        - `a, b = numpy.dsplit(v, 2)`
8. ndarray
    - dtype : 元素类型
    - shape : 维度
    - ndim  : 维数
    - size  : 元素数
    - itemsize : 每个元素的字节数
    - nbytes : 数组的总字节数
    - T : 转置视图
    - real : 复数数组的实部视图
    - imag : 复数数组的虚部视图
    - flat : 扁平迭代器
    - tolist : 转成python列表

In [46]:
six = np.array([('abc', 123),('def', 456)], dtype='U14, i4')
print(six)
print(six.shape)
print(six.dtype)
ss = np.array([[1,2,3],[4,5,6]])
ss.shape
s1 = ss.reshape(6)
s1

[('abc', 123) ('def', 456)]
(2,)
[('f0', '<U14'), ('f1', '<i4')]


array([1, 2, 3, 4, 5, 6])

# 3 numpy的通用函数

## 3.1 读取csv

    numpy.loadtxt(fname, dtype=<class 'float'>, comments='#',
                  delimiter=None, converters=None, skiprows=0,
                  usecols=None, unpack=False, ndim=0)
- fname : file, str, or pathlib.path,
- dtype : data-type of the resulting array;the number of columns used must match the number of fields in the data-type.
- comments : str or sequence, optional. used to indicate the start of a comment.
- delimiter : str, optional. the string used to separate values, by default, this is any whitespace.
- converters : dict, option. a dictionary mapping column number to a function that will convert that column to a float. eg, converters{index, str2num}
- skiprows : int, optional. skip the first 'skiprows' lines; default 0;
- usecols : int or sequence, optional. which columns to read, with 0 being the first.
- unpack : bool, optional. if true ,the return array is trancposed, we can use `x, y, z = loadtxt(...)`.
- ndmin : int, optional. the returned array will have at least 'ndmin' dimensions.
------------
**returns**
- out : ndarray

## 3.2 算数平均值

    np.mean(a, axis=None, dtype=None, out=None, keepdims=<class 'numpy._globals._NoValue'> )

## 3.3 加权平均值

    average(a, axis=None, weights=None, returned=False) 

## 3.4最大值，最小值

    numpy.max(arr1)
    numpy.min(arr1)
    numpy.maximum(arr1, arr2,...) # 对比多个数组的对应元素
    numpy.minimum(arr1, arr2,...)

## 3.5 中位数

    median(a)
$$\frac{a[(size - 1)//2] + a[size//2]}{2}$$
    

## 3.6 统计指标

1. 样本　$$s = [s_1, s_2,\ldots, s_n]$$
2. 均值　$$m = \frac{(s_1 + s_2 + \ldots + s_n)}{n}$$
1. 离差　$$d = [d_1, d_2,\ldots, d_n], d_i = s_i - m$$
4. 离差方: $$q = [q_1, q_2,\ldots, q_n], q_i = d_i^2$$
5. (总体)方差: $$p = \frac{q_1 + q_2 + \ldots + q_n}{n}$$
6. (总体)标准差: $$std = \sqrt{p}$$
5. 样本方差: $$p = \frac{q_1 + q_2 + \ldots + q_n}{n-1}$$
6. 样本标准差: $$std = \sqrt{p}$$
-------------
    1. numpy.var(a) # 总体方差
    2. numpy.std(a) # 总体标准差
    3. numpy.diff(a) # a相邻元素差值　

## 3.7 移动平均值和数组卷积

    convolve(a, v, mode='full')
        Returns the discrete , linear convolution of two one-dimensional sequences.
        parameters
        ----
        mode : {'full', 'valid', 'same'}, optional
        
        note:
        ----
        权重值从后往前取

## 3.8 平均真实波幅（ATR）

取最近N(20)个交易日的最高价，最低价和前日收盘价


## 3.9 布林带

    布林带分为上中下三个轨道，
    中轨： 移动平均线
    上轨： 中轨+ 标准差 * 2
    下轨： 中轨 - 标准差 * 2

## 3.10 线性模型
1. 线性预测．`numpy.linalg,lstsq(a, b)`
2. 趋势线　day04/trendline.py

## 3.11 ndarray对象的方法

> ## numpy.clip(a, a_min, a_max, out)
    return an array whose values are limited to [min, max]
    one of min or max must be given
    return ndarry

> ## numpy.compress(condition, a, axis=None, out=None)
    return selected slices of an array along given axis.


In [10]:
np.cumprod([[1, 4],[5, 6]])

array([  1,   4,  20, 120])

> ## numpy.prod(a, axis=None, dtyoe=None, out=None, keepdims=<...>)
    return the product of array elements over a given axis.
    eg:
        np.prod([[1,4],[5,6]]) ---> 1 * 4 * 5 * 6
> ## numpy.cumprod(a, axis=None, dtype=None, out=None)
    return the cumulative product of elements along a given axis
    return ndarray.
    eg:
        np.cumprod([[1, 4],[5, 6]])  --> [1, 4, 20, 120]

# 4 numpy的便捷函数

## 4.1 相关性
> 协方差

数组a,b
$$a = (a_1, a_2,\ldots,a_n)$$
$$b = (b_1, b_2,\ldots,b_n)$$

平均值ave
$$ave(a) = \frac{a_1 + a_2 + \ldots + a_n}{n}$$
$$ave(b) = \frac{b_1 + b_2 + \ldots + b_n}{n}$$

离差dev
$$dev(a) = a - \bar a$$
$$dev(b) = b - \bar b$$

方差var
$$var(a) = ave(dev(a) * dev(a)) = \frac{(a_1 - \bar a)^2 + \ldots + (a_n - \bar a)^2}{n}$$
$$var(b) = ave(dev(b) * dev(b)) = \frac{(b_1 - \bar b)^2 + \ldots + (b_n - \bar b)^2}{n}$$

标准差std
$$std(a) = \sqrt{var(a)}$$
$$std(b) = \sqrt{var(b)}$$

协方差cov
$$cov(a, b) = ave(dev(a) * dev(b)) = \frac{(a_1 - \bar a)(b_1 - \bar b) + \ldots + (a_n - \bar a)(b_n - \bar b)}{n}$$

协方差矩阵
$$\left[ \begin{array}{cccc}
\frac{cov(a, a)}{std(a)std(a)}&\frac{cov(a, b)}{std(a)std(b)}\\
\frac{cov(b, a)}{std(b)std(a)}&\frac{cov(b, b)}{std(b)std(a)}
\end{array}\right]$$

    主对角线为１，辅对角线为相关系数

协方差相关系数　
$$coco(a, b) = \frac{cov(a, b)}{std(a)std(b)}$$
$$coco(a, b) = \frac{(a_1 - \bar a)(b_1 - \bar b) + \ldots + (a_n - \bar a)(b_n - \bar b)}{\sqrt{(a_1 - \bar a)^2 + \ldots + (a_n - \bar a)^2}\sqrt{(b_1 - \bar b)^2 + \ldots + (b_n - \bar b)^2}}$$



> ## numpy.cov(a, b) 
    Estimate a covariance matrix, given data and weights.
    协方差矩阵的分子部分．
    采用的样本方差．

> ## numpy.corrcoef(a, b)
    return the correlation coefficien matrix of the variables.（协方差矩阵）
    
> corr.py

## 4.2 多项式拟合
    任何可微的函数都可以用一个n次多项式来拟合，而比Ｎ次幂更高介的部分作为无穷小量而被忽略不计．
    numpy.poly()
    

## 4.３ 符号数组
    numpy.sign()返回参数数组中的每个元素的符号，分别用+1,0,-1表示．
    

## 4.4 矢量化
    numpy.vectorize()函数可以将一个针对单个数值的处理函数变成针对数组的处理函数．　
    eg:
        vfunc = np.vectorize(myfunc)
        vfunc([1, 2, 3, 4], 2)
        array([3, 4, 1, 2])
 　 

# 4.5 数据平滑
---
## 4.5.1 numpy.hanning()
---

# 5 矩阵和通用函数
---


    np.matrix()
    np.mat()
    np.bmat()
    np.add.
    

In [16]:
import numpy as np
a = np.array([1,4,3,2])
help(np.vectorize)

Help on class vectorize in module numpy.lib.function_base:

class vectorize(builtins.object)
 |  vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False,
 |            signature=None)
 |  
 |  Generalized function class.
 |  
 |  Define a vectorized function which takes a nested sequence of objects or
 |  numpy arrays as inputs and returns an single or tuple of numpy array as
 |  output. The vectorized function evaluates `pyfunc` over successive tuples
 |  of the input arrays like the python map function, except it uses the
 |  broadcasting rules of numpy.
 |  
 |  The data type of the output of `vectorized` is determined by calling
 |  the function with the first element of the input.  This can be avoided
 |  by specifying the `otypes` argument.
 |  
 |  Parameters
 |  ----------
 |  pyfunc : callable
 |      A python function or method.
 |  otypes : str or list of dtypes, optional
 |      The output data type. It must be specified as either a string of
 |      typecode ch

# 整理箱

    np.take()
    np.where()
    np.argmax()
    np.argmin()
    np.apply_along_axis()