In [1]:
# Installations and import 
# pip install numpy 
import numpy as np 

Numpy Basics

In [2]:
# Create an array and get basics information
a = np.array([1, 2, 3])
print(f"Array = {a}") 
print(f"Number of dimensions = {a.ndim}")
print(f"Shape = {a.shape}")
print(f"Data type = {a.dtype}")
print(f"Item size in bytes = {a.itemsize}")

Array = [1 2 3]
Number of dimensions = 1
Shape = (3,)
Data type = int64
Item size in bytes = 8


In [3]:
# Create a new array with different data types
a = np.array([[1, 2, 3], [3.2, 4, 5], [1, 3, 5.2]])
print(a) 
print(f"Data type = {a.dtype}")

[[1.  2.  3. ]
 [3.2 4.  5. ]
 [1.  3.  5.2]]
Data type = float64


In [4]:
# Indexing and slicing 
a = np.array([[1, 2, 3], [3, 4, 5], [1, 3, 5]])
a

array([[1, 2, 3],
       [3, 4, 5],
       [1, 3, 5]])

In [5]:
# Get number at a specific row and column 
print(a[0,2])
print(a[1,2])

3
5


In [6]:
# Get numbers from one row or one column 
print(f"Numbers from 1st row = {a[0, :]}")
print(f"Numbers from 2nd column = {a[:, 1]}")

Numbers from 1st row = [1 2 3]
Numbers from 2nd column = [2 4 3]


In [7]:
# Get a sub matrix 
b = a[1:3,1:3]
b

array([[4, 5],
       [3, 5]])

In [8]:
# Get random rows or columns 
b = a[:,[0,2]]
b

array([[1, 3],
       [3, 5],
       [1, 5]])

In [9]:
# Boolean masking 
a = np.array([[4,1,6],[5,2,5],[7,10,11]])
b = a[a > 4]
b

array([ 6,  5,  5,  7, 10, 11])

In [10]:
b = a[(a > 2) & (a < 10)]
b

array([4, 6, 5, 5, 7])

Initialize different types of arrays

In [11]:
zeros = np.zeros((3, 2))
zeros

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [12]:
ones = np.ones((3,3))
ones

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [13]:
twos = np.full((3,4), 2)
twos

array([[2, 2, 2, 2],
       [2, 2, 2, 2],
       [2, 2, 2, 2]])

In [14]:
identity = np.identity(4) # 4x4 matrix
identity

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [15]:
# Create a random matrix will values between 0 and 1
random_3x2_matrix = np.random.rand(3,2)
print("Random 3x2 matrix")
print(random_3x2_matrix)
random_3x2x2_matrix = np.random.rand(3,2,2)
print("\nRandom 3x2x2 matrix")
print(random_3x2x2_matrix)

Random 3x2 matrix
[[0.83271249 0.58218417]
 [0.33441797 0.05632928]
 [0.48616002 0.67543143]]

Random 3x2x2 matrix
[[[0.59835648 0.52728203]
  [0.87888195 0.51509995]]

 [[0.67363927 0.19227186]
  [0.9656397  0.2858845 ]]

 [[0.09592197 0.76381005]
  [0.18346077 0.62304725]]]


In [16]:
# Create an even-space distributed array 
a = np.linspace(start=1, stop=10, num=15)
a

array([ 1.        ,  1.64285714,  2.28571429,  2.92857143,  3.57142857,
        4.21428571,  4.85714286,  5.5       ,  6.14285714,  6.78571429,
        7.42857143,  8.07142857,  8.71428571,  9.35714286, 10.        ])

In [17]:
# Create an array with start and stop positions
a = np.arange(start=2, stop=10, step=2)
a

array([2, 4, 6, 8])

Reorganize arrays

In [18]:
a = np.array([[1,2,3],[3,1,2]])
print(a) 
print(f"Shape = {a.shape}")

[[1 2 3]
 [3 1 2]]
Shape = (2, 3)


In [19]:
b = a.reshape((3,2))
b

array([[1, 2],
       [3, 3],
       [1, 2]])

In [20]:
# BE CAREFUL: Cannot reshape a 3x2 array to a 3x3 array
b = a.reshape((3,3))
b

ValueError: cannot reshape array of size 6 into shape (3,3)

In [21]:
# Stack different arrays
a = np.array([[1,2,3],[2,3,4]])
b = np.array([[2,3,4],[5,1,6]])
print(a) 
print(b) 

[[1 2 3]
 [2 3 4]]
[[2 3 4]
 [5 1 6]]


In [22]:
# Vertical stack
vertical_stack = np.vstack((a, b))
vertical_stack

array([[1, 2, 3],
       [2, 3, 4],
       [2, 3, 4],
       [5, 1, 6]])

In [23]:
# Horizontal stack
horizontal_stack = np.hstack((a,b))
horizontal_stack

array([[1, 2, 3, 2, 3, 4],
       [2, 3, 4, 5, 1, 6]])

In [24]:
# BE CAREFUL: we can only stack arrays with compatible sizes 
a = np.array([[1,2,3],[1,2,3]])
b = np.array([[1,1]])
vertical_stack = np.vstack((a,b))
vertical_stack

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 3 and the array at index 1 has size 2

Mathematics

In [25]:
# Statistics 
a = np.array([[1,2,3],[2,3,1]])
print(f"Min = {np.min(a)}")
print(f"Max = {np.max(a)}")
print(f"Sum = {np.sum(a)}")
print(f"Mean = {np.mean(a)}")
print(f"Variance = {np.var(a)}")
print(f"Standard deviation = {np.std(a)}")

Min = 1
Max = 3
Sum = 12
Mean = 2.0
Variance = 0.6666666666666666
Standard deviation = 0.816496580927726


In [26]:
# Broadcasting
a = np.array([[1,2,3],[2,3,1]])
b = a + 1
print(f"Add 1 to all elements in a")
print(b) 

Add 1 to all elements in a
[[2 3 4]
 [3 4 2]]


In [27]:
b = a - 2
print(f"Subtract 2 to all elements in a")
print(b) 

Subtract 2 to all elements in a
[[-1  0  1]
 [ 0  1 -1]]


In [28]:
b = a * 4
print(f"Multiple 4 to all elements in a")
print(b) 

Multiple 4 to all elements in a
[[ 4  8 12]
 [ 8 12  4]]


In [29]:
# Broadcasting across multiple arrays 
a = np.array([[1,2,3],[2,3,1],[4,2,3]]) # 3 x 3
b = np.array([[2,3,4]]) # 1x3
a + b

array([[3, 5, 7],
       [4, 6, 5],
       [6, 5, 7]])

Linear Algebra

In [30]:
# Matrix multiplication
a = np.array([1,2,3])
b = np.array([3,4,2])
a * b

array([3, 8, 6])

In [31]:
# Dot product 
print(f"a.b = {a.dot(b)}") 

a.b = 17


In [32]:
# Cross product 
a = np.array([[1,2,3], [4,1,2], [1,2,3]]) # 3x3
b = np.array([[3,4],[2,1],[5,1]]) #3x2

# Note: the column of a = the row of b
print(f"axb =")
print(np.matmul(a, b))

axb =
[[22  9]
 [24 19]
 [22  9]]


In [33]:
# Determinant 
a = np.array([[1,2],[3,4]])
determinant = np.linalg.det(a) 
determinant

-2.0000000000000004

In [34]:
# Inverse matrix 
a = np.array([[1,2],[3,4]])
b = np.linalg.inv(a) 
print("b = ")
print(b) 
print("axb=")
print(np.matmul(a,b))

b = 
[[-2.   1. ]
 [ 1.5 -0.5]]
axb=
[[1.0000000e+00 0.0000000e+00]
 [8.8817842e-16 1.0000000e+00]]


In [35]:
# Diagonal matrix
x = np.arange(9).reshape((3,3))
x

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [36]:
print(f"0th diagonal = {np.diag(x)}")
print(f"1st diagonal = {np.diag(x, k=1)}")
print(f"-1th diagonal = {np.diag(x, k=-1)}")

0th diagonal = [0 4 8]
1st diagonal = [1 5]
-1th diagonal = [3 7]


Load data to a file

In [38]:
a = np.genfromtxt("numbers.csv", delimiter=",")
a

array([[1.5, 2.1, 5.4, 6.1],
       [1.2, 3.1, 5.1, 5.1],
       [1. , 5.1, 5.1, 3.2]])

Speed test with Python lists

In [39]:
a = [i for i in range(1000000)]
b = np.arange(1000000)
import time

start = time.time() 
for i in range(len(a)): 
    a[i] += 1
end = time.time() 
print(f"Time to process Python list = {end - start}s")

start = time.time() 
b = b + 1
end = time.time() 
print(f"Time to process Numpy array = {end - start}s")


Time to process Python list = 0.10660815238952637s
Time to process Numpy array = 0.0013051033020019531s
