### Brief introduction
Multi-dimensional array library
1-D, 2-D, 3-D array....
![image.png](attachment:image.png)

In [2]:
import numpy as np
import matplotlib.pylab as plt
#如果失败，在Terminal：pip install numpy

### The Basics

In [3]:
# initialize an array: one dimensional，there is a list inside the ''()''
a = np.array([1,2,3], dtype='int32')
# dtype可以设置小一些，如果没有big number，这样不占用太多memory，比如16
print(a)

[1 2 3]


In [4]:
#如果不用print
a

array([1, 2, 3], dtype=int32)

In [5]:
# two dimensional，用"," 连接两个列表；同理，在list里再写一个list就是3-dimensional
b = np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]])
print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


In [6]:
# Get Dimension, ndim = number of dimensions
a.ndim 

1

In [7]:
b.ndim

2

In [8]:
# Get Shape
ashape = a.shape
# a is a one-dimension and size 3 array
bshape = b.shape 
# b is a 2-dimension and size 2*3 (2 rows * 3 colunms) array
print(ashape)
print(bshape)

(3,)
(2, 3)


In [9]:
# Get Type，find our how mach memory array is take up，dtype = date type
b.dtype
# int32 is by default
# float is bigger than int

dtype('float64')

In [12]:
# Get Size
b.itemsize
# 因为date type是32，所有size就是4，因为是4个bytes（4*8=32）
# 对于b，就是8（8*8=64）

8

In [13]:
# Get total size
a.nbytes
# a.nbytes = a.size * a.itemsize

12

In [14]:
# Get number of elements
a.size

3

### Accessing/Changing specific elements, rows, columns, etc

In [15]:
# initialize a two by seven array 
a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [16]:
# Get a specific element [row, column]
a[1, 5]
# 因为python start with 0，第一行为0，第二行为1

13

In [17]:
a[0,-1]
# 7的index是-1

7

In [27]:
# Get a specific row，就像list一样，直接打一个 ' : '
a[0, :]

array([1, 2, 3, 4, 5, 6, 7])

In [16]:
# Get a specific column
a[:, 2]

array([ 3, 10])

In [28]:
# Getting a little more fancy [startindex:endindex:stepsize] 
#第一行第二个数字：到6(exclusive不包含7)步长2
a[0, 1:-1:2]

array([2, 4, 6])

In [36]:
a[1,5] = 20
a[:,2] = [4,5]
print(a)

[[ 1  2  4  4  5  6  7]
 [ 8  9  5 11 12 20 14]]


*3-d example

In [48]:
b = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [49]:
# Get specific element (work outside in)
b[0,1,1]

4

In [51]:
# replace 
b[:,1,:] = [[9,9],[8,8]]

In [52]:
b

array([[[1, 2],
        [9, 9]],

       [[5, 6],
        [8, 8]]])

### Initializing Different Types of Arrays

In [18]:
# All 0s matrix：use build in functions - zeros
# Specify a shape, a is like a vector of 5 columns 
a = np.zeros(5)
b = np.zeros((2,3))
print(a)
print(b)

[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]


In [19]:
# All 1s matrix
np.ones((4,2,2), dtype='int32')

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]], dtype=int32)

In [20]:
# Any other number   (2,2)表示2 by 2，99表示all 99 matrix
np.full((2,2), 99)

array([[99, 99],
       [99, 99]])

In [22]:
a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
np.full(a.shape, 4)
# get an array that the same shape with a but all 4  

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

In [23]:
# Any other number (full_like), 等价于以上的表示
np.full_like(a, 4)

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

In [56]:
# Random decimal numbers
# Specify the shape
np.random.rand(4,2)

array([[0.07805642, 0.53385716],
       [0.02494273, 0.99955252],
       [0.48588042, 0.91247437],
       [0.27779213, 0.16597751]])

In [61]:
# Random Integer values
# Randint(startvalue,endvalue,size), 如果不写startvalue默认为0
np.random.randint(-4,8, size=(3,3)) 

array([[ 6,  3, -1],
       [ 7, -2, -4],
       [ 2,  2,  1]])

In [24]:
# The identity matrix 单位矩阵
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [27]:
np.eye(5) == np.identity(5)

array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

In [28]:
# Repeat an array
arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3, axis=0)
r2 = np.repeat(arr,3, axis=1)
print(r1)
print(r2)

[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1 1 1 2 2 2 3 3 3]]


In [63]:
output = np.ones((5,5))
print(output)

z = np.zeros((3,3))
z[1,1] = 9
print(z)

output[1:-1,1:-1] = z # replace
print(output)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[0. 0. 0.]
 [0. 9. 0.]
 [0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


In [64]:
a = np.array([1,2,3])
b = a
b[0] = 100
print(a)
# 如果用b直接copy a，会导致b上发生的变化同时在a上发生

[100   2   3]


##### Be careful when copying arrays!!!

In [29]:
a = np.array([1,2,3])
b = a.copy()
b[0] = 100

print(a)
# just copy the cotents in a

[1 2 3]


### Mathematics

In [67]:
# Element wise
a = np.array([1,2,3,4])
print(a)

[1 2 3 4]


In [68]:
a + 2

array([3, 4, 5, 6])

In [69]:
a - 2

array([-1,  0,  1,  2])

In [70]:
a * 2

array([2, 4, 6, 8])

In [71]:
a / 2

array([0.5, 1. , 1.5, 2. ])

In [72]:
b = np.array([1,0,1,0])
a + b

array([2, 2, 4, 4])

In [113]:
a ** 2

array([ 1,  4,  9, 16], dtype=int32)

In [116]:
# Take the sin
np.cos(a)



array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])

In [117]:
# For a lot more (https://docs.scipy.org/doc/numpy/reference/routines.math.html)

##### Linear Algebra

In [73]:
a = np.ones((2,3))
print(a)

b = np.full((3,2), 2)
print(b)
# matrix multiple function
np.matmul(a,b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


array([[6., 6.],
       [6., 6.]])

In [75]:
# Find the determinant 矩阵的行列式
c = np.identity(3)
print(c)
np.linalg.det(c)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


1.0

In [133]:
## Reference docs (https://docs.scipy.org/doc/numpy/reference/routines.linalg.html)

# Determinant
# Trace
# Singular Vector Decomposition
# Eigenvalues
# Matrix Norm
# Inverse
# Etc...

##### Statistics

In [77]:
stats = np.array([[1,2,3],[4,5,6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [79]:
# 最小值
np.min(stats)

1

In [80]:
# The min of the first row and second row
np.min(stats, axis = 1)

array([1, 4])

In [83]:
# The min of the first column and second column
np.max(stats, axis=0)

array([4, 5, 6])

In [84]:
# Sum all elements in the matrix
np.sum(stats)

21

In [143]:
# Sum all elements in each column
np.sum(stats, axis=0)

array([5, 7, 9])

### Reorganizing Arrays

In [86]:
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)
print(before.shape)

[[1 2 3 4]
 [5 6 7 8]]
(2, 4)


In [87]:
after = before.reshape((1,8))
print(after)
# 注意elements个数不能变

[[1 2 3 4 5 6 7 8]]


In [158]:
# Vertically stacking vectors 垂直叠加
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2,v1,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])

In [164]:
# Horizontal  stack 水平叠加
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

np.hstack((h1,h2))
# 用 () 或者 [] 都可以

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

### Miscellaneous
##### Load Data from File

In [89]:
# 把files里面的data编程numpy array
# delimiter 就是 seperater，数据在files里面用逗号分隔开
filedata = np.genfromtxt('data.txt', delimiter=',')
filedata = filedata.astype('int32')
# copy all the data into a format you specify
print(filedata) 

[[  1  13  21  11 196  75   4   3  34   6   7   8   0   1   2   3   4   5]
 [  3  42  12  33 766  75   4  55   6   4   3   4   5   6   7   0  11  12]
 [  1  22  33  11 999  11   2   1  78   0   1   2   9   8   7   1  76  88]]


##### Boolean Masking and Advanced Indexing

In [196]:
# Where in the files data that value is greater than 50
(~((filedata > 50) & (filedata < 100)))

array([[ True,  True,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

In [90]:
filedata > 50
# F or T based on specific location

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [91]:
filedata[filedata > 50]
# grab only the elements that > 50

array([196,  75, 766,  75,  55, 999,  78,  76,  88], dtype=int32)

In [93]:
# You can index with a list in Numpy
ex = np.array([1,2,3,4,5,6,7,8])
ex[[1,4,6]]

array([2, 5, 7])

In [95]:
# if any of these data in colunms > 50, 每一列只要有一个data大于50就是True
np.any(filedata > 50, axis = 0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [96]:
np.any(filedata > 50, axis = 1)

array([ True,  True,  True])

In [97]:
# if all of these data in colunms > 50, 每一列全部data大于50就是True
np.any(filedata > 50, axis = 0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [101]:
((filedata > 50) & (filedata < 100))

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [102]:
# Not >50 and <100
(~((filedata > 50) & (filedata < 100)))

array([[ True,  True,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

# Excerise

In [104]:
a = np.array([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15],[16,17,18,19,20],[21,22,23,24,25]])

In [105]:
a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [106]:
a[3,4]
# 最简单的index

20

In [108]:
a[2:4,0:2]
#index的是2-3列 * 0-1行

array([[11, 12],
       [16, 17]])

In [110]:
a[[0,1,2,3],[1,2,3,4]]
# 用两个列表index，也就是得到[0,1],[1,2],[2,3] and [3,4]

array([ 2,  8, 14, 20])

In [111]:
a[[0,-2,-1], 3:]

array([[ 4,  5],
       [19, 20],
       [24, 25]])