In [2]:
"""
导入基本库
"""
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

In [12]:
a = np.concatenate(([3], [0]*5, np.arange(-1, 1.002, 2/9.0)))
a = np.c_[[1,4],[3,4]]
print(a)

[[1 3]
 [4 4]]


In [17]:
x = [1,2,3,4,5]
y = [5,6,7,8,9,10]
print(np.meshgrid(x,y))
print(np.mgrid[1:6:2, 2:4:3j])

[array([[1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5]]), array([[ 5,  5,  5,  5,  5],
       [ 6,  6,  6,  6,  6],
       [ 7,  7,  7,  7,  7],
       [ 8,  8,  8,  8,  8],
       [ 9,  9,  9,  9,  9],
       [10, 10, 10, 10, 10]])]
[[[1. 1. 1.]
  [3. 3. 3.]
  [5. 5. 5.]]

 [[2. 3. 4.]
  [2. 3. 4.]
  [2. 3. 4.]]]


## 稀疏矩阵
### 如何选择不同的稀疏矩阵
不同的稀疏矩阵适用的操作不同。如果想创建一个新的稀疏矩阵，lil_matrix，dok_matrix 和 coo_matrix 会比高效，但是它们不适合做矩阵运算。如果想做矩阵运算，例如矩阵乘法、求逆等，应该用 CSC 或者 CSR 类型的稀疏矩阵。由于在内存中存储顺序的差异，csc_matrix 矩阵更适合取列切片，而 csr_matrix 矩阵更适合用来取行切片。


### coo_matrix
coo_matrix 是最直观的稀疏矩阵定义方式。它的全称是 coordinate sparse matrix，从名字可以看出，它通过非零元素在矩阵中的坐标存储稀疏矩阵。 

In [44]:
from scipy import sparse

# 创建方法
row = np.array([0,0,1,1,2,2,3,3])
col = np.array([0,0,1,2,3,4,5,6])
data = np.array([1,4,1,1,1,1,1,1])
coo = sparse.coo_matrix((data,(row,col)))
mat_dense = coo.toarray()
print("稠密矩阵:\n", mat_dense)

# 稠密矩阵与稀疏矩阵、稀疏矩阵之间均可互相转换
coo_matrix = sparse.coo_matrix(mat_dense)
print("coo_matrix:\n",coo_matrix)
lil_matrix = sparse.lil_matrix(coo_matrix)
print("lil_matrix:\n", lil_matrix)

稠密矩阵:
 [[5 0 0 0 0 0 0]
 [0 1 1 0 0 0 0]
 [0 0 0 1 1 0 0]
 [0 0 0 0 0 1 1]]
coo_matrix:
   (0, 0)	5
  (1, 1)	1
  (1, 2)	1
  (2, 3)	1
  (2, 4)	1
  (3, 5)	1
  (3, 6)	1
lil_matrix:
   (0, 0)	5
  (1, 1)	1
  (1, 2)	1
  (2, 3)	1
  (2, 4)	1
  (3, 5)	1
  (3, 6)	1


### lil_matrix
il_matrix 的全称是 row-based linked list sparse matrix 。它包含两个要素：rows 和 data

Sparse matrices can be used in arithmetic operations: they support addition, subtraction, multiplication, division, and matrix power.

**Advantages of the LIL format**
- supports flexible slicing
- changes to the matrix sparsity structure are efficient

**Disadvantages of the LIL format**
- arithmetic operations LIL + LIL are slow (consider CSR or CSC)
- slow column slicing (consider CSC)
- slow matrix vector products (consider CSR or CSC)

**Intended Usage**
- LIL is a convenient format for constructing sparse matrices
- once a matrix has been constructed, convert to CSR or CSC format for fast arithmetic and matrix vector operations
- consider using the COO format when constructing large matrices

**Data Structure**
- An array (self.rows) of rows, each of which is a sorted list of column indices of non-zero elements.
- The corresponding nonzero values are stored in similar fashion in self.data.


In [48]:
# 只能由其他矩阵转换而来
lil_matrix = sparse.lil_matrix(mat_dense)

# rows 是一个列表，元素个数为矩阵行数，其中每个元素都是一个列表，代表对应行非零元素的列下标
lil_matrix.rows
# data 是一个列表，元素个数为矩阵行数，其中每个元素都是一个列表，代表对应行从左到右非零元素的值
lil_matrix.data

array([list([5]), list([1, 1]), list([1, 1]), list([1, 1])], dtype=object)

### csc_matrix
全称为 Compressed Sparse Column matrix。数据按列存，所以适合做列切片。

Sparse matrices can be used in arithmetic operations: they support addition, subtraction, multiplication, division, and matrix power.

**Advantages of the CSC format**
- efficient arithmetic operations CSC + CSC, CSC * CSC, etc.
- efficient column slicing
- fast matrix vector products (CSR, BSR may be faster)

**Disadvantages of the CSC format**
- slow row slicing operations (consider CSR)
- changes to the sparsity structure are expensive (consider LIL or DOK)

**mat_csc 也有三个要素：data，indices 和 indptr**

indptr[i] 和 indptr[i+1] 表示的是第 i 列非零元素在 mat_csc.data 中存储的起止位置，也就是说：mat_csc.data[indptr[i]: indptr[i+1]] 将把矩阵中第 i 列所有非零元素取出来。然而光是取出来还不行，需要把它们安放到不同的行，而 mat_csc.indices 则是存储了这些非零元素对应的行下标：mat_csc.indices[indptr[i]:indptr[i+1]]。

In [52]:
from scipy.sparse import csc_matrix

# 初始化方法
# 初始化空的 csc_matrix
csc_matrix((3, 4), dtype=np.int8).toarray()

# 1. csc_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
row = np.array([0, 2, 2, 0, 1, 2])
col = np.array([0, 0, 1, 2, 2, 2])
data = np.array([1, 2, 3, 4, 5, 6])
csc_matrix((data, (row, col)), shape=(3, 3)).toarray()

# 2. csc_matrix((data, indices, indptr), [shape=(M, N)])
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
csc_matrix((data, indices, indptr), shape=(3, 3)).toarray()

array([[1, 0, 4],
       [0, 0, 5],
       [2, 3, 6]])

### csr_matrix
与 csc_matrix 类似。区别：
- csc_matrix 数据按行存，适合做行切片
- indices 中元素表示位于哪一列