# NumPy for Scientific Computing


Hello again GDSC members and welcome to our third session in the "Python for Machine Learning" workshops series. 🥳🥳

In [1]:
import numpy as np

In [2]:
def show_info(ndarray):
    print(f'a {ndarray.ndim}-D array of shape {ndarray.shape}')

## Create an ndarray

In [3]:
# 1-D array
A = np.array([1, 2, 3, 4])

print(A)
print()
show_info(A)

[1 2 3 4]

a 1-D array of shape (4,)


In [21]:
# 2-D array
B = np.array([[1, 2],
             [3, 4],
             [5, 6]])

print(B)
print()
show_info(B)

[[1 2]
 [3 4]
 [5 6]]

a 2-D array of shape (3, 2)


In [22]:
# 3-D array
C = np.array([[[1, 2, 3], [4, 5, 6]],
             [[7, 8, 9], [10, 11, 12]]])

print(C)
print()
show_info(C)

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]

a 3-D array of shape (2, 2, 3)


## Other ndarray Constructors

In [31]:
zeros = np.zeros(shape=(3, 2))

print(zeros)
print()
show_info(zeros)

[[0. 0.]
 [0. 0.]
 [0. 0.]]
a 2-D array of shape (3, 2)


In [32]:
ones = np.ones(shape=(2, 3))

print(ones)
print()
show_info(ones)

[[1. 1. 1.]
 [1. 1. 1.]]
a 2-D array of shape (2, 3)


In [33]:
eye = np.eye(4)

print(eye)
print()
show_info(eye)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
a 2-D array of shape (4, 4)


In [27]:
normal_dist = np.random.randn(3, 3)

print(normal_dist)
print()
show_info(normal_dist)

[[-1.78158225 -1.42816247 -0.0532788 ]
 [ 0.61217805 -0.72653542 -0.50191042]
 [ 0.41304839  2.19472331 -1.75379614]]
a 2-D array of shape (3, 3)


## 1D array Constructors

In [34]:
array_1 = np.linspace(-10, 10, 50)

print(array_1)
print()
show_info(array_1)

[-10.          -9.59183673  -9.18367347  -8.7755102   -8.36734694
  -7.95918367  -7.55102041  -7.14285714  -6.73469388  -6.32653061
  -5.91836735  -5.51020408  -5.10204082  -4.69387755  -4.28571429
  -3.87755102  -3.46938776  -3.06122449  -2.65306122  -2.24489796
  -1.83673469  -1.42857143  -1.02040816  -0.6122449   -0.20408163
   0.20408163   0.6122449    1.02040816   1.42857143   1.83673469
   2.24489796   2.65306122   3.06122449   3.46938776   3.87755102
   4.28571429   4.69387755   5.10204082   5.51020408   5.91836735
   6.32653061   6.73469388   7.14285714   7.55102041   7.95918367
   8.36734694   8.7755102    9.18367347   9.59183673  10.        ]

a 1-D array of shape (50,)


In [35]:
array_2 = np.arange(-10, 10, 1)

print(array_2)
print()
show_info(array_2)

[-10  -9  -8  -7  -6  -5  -4  -3  -2  -1   0   1   2   3   4   5   6   7
   8   9]

a 1-D array of shape (20,)


## dtypes

In [38]:
float_array = np.array([1, 2, 3, 4], dtype=np.float32)
int_array = np.array([1, 2, 3, 4], dtype=np.int32)
boolean_array = np.array([True, False, False, False])

print('Float array')
print(float_array)
print(f'Array of type {float_array.dtype}')
print()
print('Int array')
print(int_array)
print(f'Array of type {int_array.dtype}')
print()
print('Boolean array')
print(boolean_array)
print(f'Array of type {boolean_array.dtype}')

Float array
[1. 2. 3. 4.]
Array of type float32

Int array
[1 2 3 4]
Array of type int32

Boolean array
[ True False False False]
Array of type bool


## hstack and vstack


In [40]:
A = np.array([[1, 2, 3, 4],
             [5, 6, 7, 8],
             [9, 10, 11, 12]])

B = np.array([[1, 2, 3, 4],
             [5, 6, 7, 8]])

C = np.array([[1, 2],
             [3, 4],
             [5, 6]])

hstack_array = np.hstack((A, C))
vstack_array = np.vstack((A, B))

print('Matrix A')
print(A)
print()
print('Matrix B')
print(B)
print()
print('Matrix C')
print(C)
print()

print('Horizontal Stack (A and C)')
print(hstack_array)
show_info(hstack_array)
print()
print('Vertical Stack (A and B)')
print(vstack_array)
show_info(vstack_array)

Matrix A
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

Matrix B
[[1 2 3 4]
 [5 6 7 8]]

Matrix C
[[1 2]
 [3 4]
 [5 6]]

Horizontal Stack (A and C)
[[ 1  2  3  4  1  2]
 [ 5  6  7  8  3  4]
 [ 9 10 11 12  5  6]]
a 2-D array of shape (3, 6)

Vertical Stack (A and B)
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [ 1  2  3  4]
 [ 5  6  7  8]]
a 2-D array of shape (5, 4)


## ndarray.reshape

In [41]:
array_1 = np.array([[1, 2, 3, 4, 5, 6],
                   [7, 8, 9, 10, 11, 12]])


array_2 = array_1.reshape((3, 4))

print('Array 1')
print(array_1)
show_info(array_1)
print()
print('Array 2 (array_1.reshape((3, 4)))')
print(array_2)
show_info(array_2)

Array 1
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]
a 2-D array of shape (2, 6)

Array 2 (array_1.reshape((3, 4)))
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
a 2-D array of shape (3, 4)


## ndarray.ravel

In [42]:
array_1 = np.array([[1, 2, 3],
                   [4, 5, 6]])

array_2 = array_1.ravel()

print('Array 1')
print(array_1)
show_info(array_1)
print()
print('Array 2 (array_1.ravel())')
print(array_2)
show_info(array_2)

Array 1
[[1 2 3]
 [4 5 6]]
a 2-D array of shape (2, 3)

Array 2 (array_1.ravel())
[1 2 3 4 5 6]
a 1-D array of shape (6,)


## Indexing

In [46]:
A = np.array([[1, 2, 3],
             [4, 5, 6]])

print(A)
show_info(A)
print()
print(f'Value in the first row, first column: {A[0, 0]}')
print(f'Value in the first row, second column: {A[0, 1]}')

[[1 2 3]
 [4 5 6]]
a 2-D array of shape (2, 3)

Value in the first row, first column: 1
Value in the first row, second column: 2


## Slicing

In [48]:
A = np.array([i for i in range(1, 17)]).reshape((4, 4))

print(A)
show_info(A)
print()

print('Row one, columns two to four')
print(A[1, 2:4])
print()
print('All rows in column one')
print(A[:, 1])
print()

print('All rows after row two, all columns after column two')
print(A[2:, 2:])
print()

print('Every other row after row one, every other column')
print(A[1::2, ::2])
print()

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]
a 2-D array of shape (4, 4)

Row one, columns two to four
[7 8]

All rows in column one
[ 2  6 10 14]

All rows after row two, all columns after column two
[[11 12]
 [15 16]]

Every other row after row one, every other column
[[ 5  7]
 [13 15]]



## Boolean Indexing

In [50]:
A = np.array([[5, 4, 5],
             [4, 4, 4],
             [5, 4, 5]])

mask = A < 5

print('Matrix A')
print(A)
show_info(A)
print()

print('Boolean mask')
print(mask)
show_info(mask)
print()

A[mask] = 10

print('A[A < 5] = 10\n')

print('Matrix A')
print(A)
show_info(A)

Matrix A
[[5 4 5]
 [4 4 4]
 [5 4 5]]
a 2-D array of shape (3, 3)

Boolean mask
[[False  True False]
 [ True  True  True]
 [False  True False]]
a 2-D array of shape (3, 3)

A[A < 5] = 10

Matrix A
[[ 5 10  5]
 [10 10 10]
 [ 5 10  5]]
a 2-D array of shape (3, 3)


## sum, min, argmin, max, argmax, sort, argsort

In [55]:
A = np.array([[5, 0, 3],
             [3, 7, 9]])

# sum
sum_axis_0 = A.sum(axis=0)
sum_axis_1 = A.sum(axis=1)

# min
min_axis_0 = A.min(axis=0)
min_axis_1 = A.min(axis=1)

# argmin
argmin_axis_0 = A.argmin(axis=0)
argmin_axis_1 = A.argmin(axis=1)

# max
max_axis_0 = A.max(axis=0)
max_axis_1 = A.max(axis=1)

# argmax
argmax_axis_0 = A.argmax(axis=0)
argmax_axis_1 = A.argmax(axis=1)

# sort
sort_axis_0 = np.sort(A, axis=0)
sort_axis_1 = np.sort(A, axis=1)

# argsort
argsort_axis_0 = A.argsort(axis=0)
argsort_axis_1 = A.argsort(axis=1)


print('Matrix A')
print(A)
print()

print('Sum on axis 0')
print(sum_axis_0)
print('Sum on axis 1')
print(sum_axis_1)
print()


print('Minimum on axis 0')
print(min_axis_0)
print('Minimum on axis 1')
print(min_axis_1)
print()

print('Argmin on axis 0')
print(argmin_axis_0)
print('Argmin on axis 1')
print(argmin_axis_1)
print()

print('Maximum on axis 0')
print(max_axis_0)
print('Maximum on axis 1')
print(max_axis_1)
print()

print('Argmax on axis 0')
print(argmax_axis_0)
print('Argmax on axis 1')
print(argmax_axis_1)
print()

print('Sorted on axis 0')
print(sort_axis_0)
print('Sorted on axis 1')
print(sort_axis_1)
print()

print('Argsort on axis 0')
print(argsort_axis_0)
print('Argsort on axis 1')
print(argsort_axis_1)
print()

Matrix A
[[5 0 3]
 [3 7 9]]

Sum on axis 0
[ 8  7 12]
Sum on axis 1
[ 8 19]

Minimum on axis 0
[3 0 3]
Minimum on axis 1
[0 3]

Argmin on axis 0
[1 0 0]
Argmin on axis 1
[1 0]

Maximum on axis 0
[5 7 9]
Maximum on axis 1
[5 9]

Argmax on axis 0
[0 1 1]
Argmax on axis 1
[0 2]

Sorted on axis 0
[[3 0 3]
 [5 7 9]]
Sorted on axis 1
[[0 3 5]
 [3 7 9]]

Argsort on axis 0
[[1 0 0]
 [0 1 1]]
Argsort on axis 1
[[1 2 0]
 [0 1 2]]



## Mathematical Functions

In [56]:
A = np.array([[1, 2, 3],
             [4, 5, 6]])

exp_A = np.exp(A)
log_A = np.log(A)
cos_A = np.cos(A)
sin_A = np.sin(A)

print('A')
print(A)
show_info(A)
print()
print('Exponential')
print(exp_A)
print()

print('Logarithm')
print(log_A)
print()

print('Cosine')
print(cos_A)
print()

print('Sine')
print(sin_A)


A
[[1 2 3]
 [4 5 6]]
a 2-D array of shape (2, 3)

Exponential
[[  2.71828183   7.3890561   20.08553692]
 [ 54.59815003 148.4131591  403.42879349]]

Logarithm
[[0.         0.69314718 1.09861229]
 [1.38629436 1.60943791 1.79175947]]

Cosine
[[ 0.54030231 -0.41614684 -0.9899925 ]
 [-0.65364362  0.28366219  0.96017029]]

Sine
[[ 0.84147098  0.90929743  0.14112001]
 [-0.7568025  -0.95892427 -0.2794155 ]]


## Statistics

In [59]:
A = np.array([[5, 0, 3],
             [3, 7, 9]])

mean_A = A.mean()
std_A = A.std()
var_A = A.var()

print(A)
show_info(A)
print()

print(f'Mean of A: {mean_A}')
print(f'Standard deviation of A: {std_A:.3f}')
print(f'Variance of A: {var_A:.3f}')

[[5 0 3]
 [3 7 9]]
a 2-D array of shape (2, 3)

Mean of A: 4.5
Standard deviation of A: 2.930
Variance of A: 8.583


## Correlation

![image.png](attachment:image.png)

In [60]:
correlation = np.corrcoef(A)
print('Correlation Matrix')
print(correlation)

Correlation Matrix
[[ 1.         -0.56362148]
 [-0.56362148  1.        ]]


## Unique value and number of repetitions

In [61]:
A = np.array([[5, 0, 3],
             [3, 7, 9]])

np.unique(A, return_counts=True)

(array([0, 3, 5, 7, 9]), array([1, 2, 1, 1, 1], dtype=int64))

## Sort values with counts

In [73]:
A = np.array([[1, 2, 3, 2, 2, 2, 1],
             [2, 1, 5, 5, 1, 2, 3],
             [8, 8, 1, 2, 4, 2, 9]])

values, counts = np.unique(A, return_counts=True)
value_counts = {value: count for value, count in zip(values, counts)}

sorted_values = values[counts.argsort()]

print('Matrix A')
print(A)
print()

print(f'Unique values\' counts: {value_counts}')
print(f'Values sorted from less frequent to more frequent: {sorted_values}')

Matrix A
[[1 2 3 2 2 2 1]
 [2 1 5 5 1 2 3]
 [8 8 1 2 4 2 9]]

Unique values' counts: {1: 5, 2: 8, 3: 2, 4: 1, 5: 2, 8: 2, 9: 1}
Values sorted from less frequent to more frequent: [4 9 3 5 8 1 2]


## nan (not a number)

In [74]:
A = np.random.randn(5, 5)
A[0, 2] = np.nan
A[4, 3] = np.nan
A

array([[ 1.76405235,  0.40015721,         nan,  2.2408932 ,  1.86755799],
       [-0.97727788,  0.95008842, -0.15135721, -0.10321885,  0.4105985 ],
       [ 0.14404357,  1.45427351,  0.76103773,  0.12167502,  0.44386323],
       [ 0.33367433,  1.49407907, -0.20515826,  0.3130677 , -0.85409574],
       [-2.55298982,  0.6536186 ,  0.8644362 ,         nan,  2.26975462]])

In [78]:
A.mean()

(nan, nan)

Statistical methods return nan if there are missing values in the array, however there are methods that ignore the missing values.

In [77]:
np.nanmean(A)

0.5062075424895804

## Dealing with nan

In [83]:
# Count the number of missing values
n_nan = np.isnan(A).sum()

# Percentage of missing values
pct_nan = np.isnan(A).mean() * 100

print(f'Number of missing values: {n_nan}')
print(f'Percentage of missing values: {pct_nan}%')

Number of missing values: 2
Percentage of missing values: 8.0%


In [84]:
# Make a copy of matrix A
A_copy = A.copy()

# Replace the missing values by 0
A_copy[np.isnan(A_copy)] = 0

print(A_copy)
show_info(A_copy)

[[ 1.76405235  0.40015721  0.          2.2408932   1.86755799]
 [-0.97727788  0.95008842 -0.15135721 -0.10321885  0.4105985 ]
 [ 0.14404357  1.45427351  0.76103773  0.12167502  0.44386323]
 [ 0.33367433  1.49407907 -0.20515826  0.3130677  -0.85409574]
 [-2.55298982  0.6536186   0.8644362   0.          2.26975462]]
a 2-D array of shape (5, 5)


## Linear Algebra

In [85]:
A = np.ones((2, 3))
B = np.ones((3, 2))

print('Matrix A')
print(A)
print()
print('Matrix B')
print(B)

Matrix A
[[1. 1. 1.]
 [1. 1. 1.]]

Matrix B
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [86]:
# dot product
print('A x B')
print(A.dot(B))
print()

print('B x A')
print(B.dot(A))
print()

A x B
[[3. 3.]
 [3. 3.]]

B x A
[[2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]]



In [90]:
A = np.random.randint(0, 10, (3, 3))
print(A)

[[0 4 5]
 [5 6 8]
 [4 1 4]]


In [95]:
print(f'Determinant of A: {np.linalg.det(A)}\n')

print('Inverse of A:')
print(np.linalg.inv(A))
print()

print('Pseudo-Inverse of A:')
print(np.linalg.pinv(A))
print()



Determinant of A: -46.99999999999999

Inverse of A:
[[-0.34042553  0.23404255 -0.04255319]
 [-0.25531915  0.42553191 -0.53191489]
 [ 0.40425532 -0.34042553  0.42553191]]

Pseudo-Inverse of A:
[[-0.34042553  0.23404255 -0.04255319]
 [-0.25531915  0.42553191 -0.53191489]
 [ 0.40425532 -0.34042553  0.42553191]]



In [99]:
eigen_values, eigen_vectors = np.linalg.eig(A)
print(f'Eigen values of A: {eigen_values}\n')
print(f'Eigen vectors of A:\n{eigen_vectors}')

Eigen values of A: [11.70711161 -3.03144729  1.32433567]

Eigen vectors of A:
[[-0.42874248 -0.87114314 -0.19148208]
 [-0.84038624  0.04955314 -0.79053568]
 [-0.33155821  0.48852239  0.58171123]]


## Broadcasting

In [130]:
np.random.seed(0)

A = np.random.randint(0, 10, (2, 3))
B = np.ones((2, 3))
C = np.array([[1, 2, 3]])
D = np.array([[1],
              [2]])

In [131]:
print('Matrix A:')
print(A)
show_info(A)
print('Matrix B:')
print(B)
show_info(B)
print()

print('A + B =')
print(A + B)
print('A + 1 =')
print(A + 1.)

Matrix A:
[[5 0 3]
 [3 7 9]]
a 2-D array of shape (2, 3)
Matrix B:
[[1. 1. 1.]
 [1. 1. 1.]]
a 2-D array of shape (2, 3)

A + B =
[[ 6.  1.  4.]
 [ 4.  8. 10.]]
A + 1 =
[[ 6.  1.  4.]
 [ 4.  8. 10.]]


In [124]:
print('Matrix A:')
print(A)
show_info(A)
print('Matrix C:')
print(C)
show_info(C)
print()

print('A + C =')
print(A + C)

Matrix A:
[[4 3 0]
 [3 5 0]]
a 2-D array of shape (2, 3)
Matrix C:
[[1 2 3]]
a 2-D array of shape (1, 3)

A + C =
[[5 5 3]
 [4 7 3]]


In [125]:
print('Matrix A:')
print(A)
show_info(A)
print('Matrix D:')
print(D)
show_info(D)
print()

print('A + D =')
print(A + D)

Matrix A:
[[4 3 0]
 [3 5 0]]
a 2-D array of shape (2, 3)
Matrix D:
[[1]
 [2]]
a 2-D array of shape (2, 1)

A + D =
[[5 4 1]
 [5 7 2]]


In [138]:
np.random.seed(0)
A = np.random.randint(0, 10, (4, 1))
B = np.ones((1, 3))
C = A + B

print('Matrix A:')
print(A)
show_info(A)
print()
print('Matrix B:')
print(B)
show_info(B)
print()
print('A + B =')
print(C)
show_info(C)

Matrix A:
[[5]
 [0]
 [3]
 [3]]
a 2-D array of shape (4, 1)

Matrix B:
[[1. 1. 1.]]
a 2-D array of shape (1, 3)

A + B =
[[6. 6. 6.]
 [1. 1. 1.]
 [4. 4. 4.]
 [4. 4. 4.]]
a 2-D array of shape (4, 3)
