Numpy and pandas

sparse matrix

In [1]:
# load libraries
import numpy as np
from scipy import sparse

# create a matrix
matrix = np.array([[0, 0],
                  [0, 1],
                  [3, 0]])

# create compressed sparse row (CSR) matrix
matrix_sparse = sparse.csr_matrix(matrix)

In [2]:
# view sparse matrix
print(matrix_sparse)

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 2 stored elements and shape (3, 2)>
  Coords	Values
  (1, 1)	1
  (2, 0)	3


In [3]:
# create larger matrix
matrix_large = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                         [3, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

# create compressed sparse row (CSR) matrix
matrix_large_sparse = sparse.csr_matrix(matrix_large)

# view original sparse matrix
print(matrix_sparse)

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 2 stored elements and shape (3, 2)>
  Coords	Values
  (1, 1)	1
  (2, 0)	3


In [4]:
# load library
import numpy as np

# create row vector
vector = np.array([1, 2, 3, 4, 5, 6])

# create matrix
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

# select the third element of vector
vector[2]

np.int64(3)

In [5]:
# Select all elements of a vector
vector[:]

# select everything up to and including the third element
vector[:3]

# select the last element
vector[-1]

# select all rows and the second column
matrix[:,1:2]

array([[2],
       [5],
       [8]])

Numpy's vecorize

In [6]:
# load library
import numpy as np

# create matrix
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

# create function that adds 1000 to something
add_1000 = lambda i: i + 1000

# create vectorized function
vectorized_add_1000 = np.vectorize(add_1000)

# apply function to all elementsin matrix
vectorized_add_1000(matrix)

array([[1001, 1002, 1003],
       [1004, 1005, 1006],
       [1007, 1008, 1009]])

In [7]:
# find maximum element in each column
np.max(matrix, axis=0)

array([7, 8, 9])

In [8]:
# find maximum element in each row
np.max(matrix, axis=1)

array([3, 6, 9])

In [9]:
# load library
import numpy as np

# 创建一个普通的稠密矩阵（大部分为0）
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

# return variance
np.var(matrix)

np.float64(6.666666666666667)

In [10]:
# 转换为CSR稀疏矩阵
matrix_sparse = sparse.csr_matrix(matrix)
print(matrix_sparse)

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 9 stored elements and shape (3, 3)>
  Coords	Values
  (0, 0)	1
  (0, 1)	2
  (0, 2)	3
  (1, 0)	4
  (1, 1)	5
  (1, 2)	6
  (2, 0)	7
  (2, 1)	8
  (2, 2)	9


In [11]:
# return standard deviation 标准差
np.std(matrix)

np.float64(2.581988897471611)

(column, row)

In [12]:
matrix.reshape(1, -1)  #把原数组“拉平”为一行，列数自动计算

array([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [14]:
matrix.reshape(9)  # 把原来的 4x3 矩阵，变成了一个 1 维、长度为 12 的数组

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

Transposing a matrix

In [None]:
# load library
import numpy as np

# create matrix
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

# transpose matrix
matrix.T

In [None]:
# transpose vector 一维数组（shape: (6,)）
np.array([1, 2, 3, 4, 5, 6]).T

In [None]:
# transpose row vector  二维数组，一行六列（shape: (1, 6)）
np.array([[1, 2, 3, 4, 5, 6]]).T

In [15]:
# load library
import numpy as np

# create matrix
matrix = np.array([[1, 1, 1],
                   [1, 1, 10],
                   [1, 1, 15]])

# return matrix rank
np.linalg.matrix_rank(matrix)

np.int64(2)

线性无关：如果一行（或一列）不能用其它行（或列）的线性组合表示，那么它就是“独立”的。
秩的定义：矩阵的秩就是最大线性无关行数（或列数），行秩=列秩

高斯消元法
（行变换，把矩阵变成行最简形式），最后有多少个非零行

A=  [1, 2, 3]
    [2, 4, 6]
    [3, 8, 9]

det(A)=aei+bfg+cdh−ceg−bdi−afh=0
矩阵不可逆，它的三行（或三列）是线性相关的（比如第2行是第1行的2倍）

In [16]:
# return diagonal one above the main diagonal
matrix.diagonal(offset=1)

array([ 1, 10])

主对角线：指的是从左上到右下（如 [0,0], [1,1], [2,2]...）的那条线。

offset=1：表示“上偏移1”，即取主对角线上方的那条对角线上的元素。
offset=0 表示主对角线
offset=1 表示主对角线上面一条
offset=-1 表示主对角线下面一条

In [17]:
sum(matrix.diagonal())

np.int64(17)

Eigenvalues and Eigenvectors
#### Problem
You need to find the eigenvalues and eigenvectors of a square matrix.

#### Solution
Use NumPy's linalg.eig:

In [18]:
# load library
import numpy as np

# create matrix
matrix = np.array([[1, -1, 3],
                   [1, 1, 6],
                   [3, 8, 9]])

# calculate eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(matrix)

# view eigenvalues 计算并输出一个3x3矩阵的特征值
eigenvalues

array([13.55075847,  0.74003145, -3.29078992])

特征值：满足 Av=λv的标量 
λ特征向量：满足 Av=λv 的非零向量 v
A 是你的矩阵，v 是特征向量，λ 是特征值。

Dot products

In [19]:
# load library
import numpy as np

# create two vectors
vector_a = np.array([1, 2, 3])
vector_b = np.array([4, 5, 6])

# calculate dot product
np.dot(vector_a, vector_b)

np.int64(32)

In [20]:
# calculate dot product
vector_a @ vector_b

np.int64(32)

Add and substract matrics

a @ b 这个新运算符来计算点积

In [21]:
# load library
import numpy as np

# create matrix
matrix = np.array([[1, 4],
                  [2, 5]])

# calculate inverse of matrix
np.linalg.inv(matrix)

array([[-1.66666667,  1.33333333],
       [ 0.66666667, -0.33333333]])

$A * A^{-1} = I$

(mean, standard deviation, number)

In [None]:
# draw three numbers from a normal distribution with mean 0.0
# and standard deviation of 1.0
np.random.normal(0.0, 1.0, 3)

load a jason

In [22]:
# load library
import pandas as pd

# create url
url = 'http://api.nobelprize.org/v1/prize.json'
# load data
df = pd.read_json(url)

# view first two rows
df.head(2)

HTTPError: HTTP Error 403: Forbidden