In [1]:
import numpy as np

In [8]:
data = np.array(np.random.randint(1, 100, size=25)).reshape(5,5)
data

array([[42, 56, 38, 32,  6],
       [90, 92, 36,  9, 92],
       [88, 26, 47, 35, 99],
       [21,  2, 56, 45, 59],
       [54, 40, 80,  9, 18]])

1. Matrix manipulation - Swap the second and fourth rows of the data matrix.

In [9]:
data[[1,3]] = data[[3,1]]
data

array([[42, 56, 38, 32,  6],
       [21,  2, 56, 45, 59],
       [88, 26, 47, 35, 99],
       [90, 92, 36,  9, 92],
       [54, 40, 80,  9, 18]])

2. Normalization- Normalize all the elements in the data matrix. In this context, normalizing means to scale the values in such a way that they fall within the range of 0 and 1. To achieve this, subtract the minimum value from each element and divide by the range (max-min).

In [10]:
data_norm = (data - np.min(data)) / (np.max(data) - np.min(data))
data_norm

array([[0.41237113, 0.55670103, 0.37113402, 0.30927835, 0.04123711],
       [0.19587629, 0.        , 0.55670103, 0.44329897, 0.58762887],
       [0.88659794, 0.24742268, 0.46391753, 0.34020619, 1.        ],
       [0.90721649, 0.92783505, 0.35051546, 0.07216495, 0.92783505],
       [0.53608247, 0.39175258, 0.80412371, 0.07216495, 0.16494845]])

3. Z-score normalization - Standardize the elements in the data matrix using Z-score normalization. In this method, all the elements will be rescaled to have a mean of 0 and a standard deviation of 1. The formula for Z-score normalization is
(X - mean) / std , where ( X ) is the element in the data matrix.

In [11]:
data_z_score = (data - np.mean(data)) / np.std(data)
data_z_score

array([[-0.16866281,  0.3152059 , -0.30691101, -0.51428331, -1.41289662],
       [-0.89446587, -1.55114483,  0.3152059 , -0.06497665,  0.41889205],
       [ 1.42119152, -0.72165561,  0.00414745, -0.41059716,  1.80137407],
       [ 1.49031562,  1.55943972, -0.37603511, -1.30921047,  1.55943972],
       [ 0.2460818 , -0.23778691,  1.14469511, -1.30921047, -0.99815202]])

4. Array splitting - Reshape the data matrix into a vector (Hint: use np.ravel) and split this array into five equal-sized sub-arrays.

In [15]:
data_vector = np.split(data.ravel(), 5)
data_vector

[array([42, 56, 38, 32,  6]),
 array([21,  2, 56, 45, 59]),
 array([88, 26, 47, 35, 99]),
 array([90, 92, 36,  9, 92]),
 array([54, 40, 80,  9, 18])]

5. Dot product - Create two vectors of size 5 with any values. Compute the dot product of the two vectors*.

In [16]:
vector1 = np.array([3, 5, 6, 7, 8])
vector2 = np.array([2, 4, 6, 8, 10])
np.dot(vector1, vector2)

198

6. Matrix multiplication - Create another 3x3 matrix with any values (let’s call it data2). Perform matrix multiplication (dot product of data (first 3x3 part) and data2).

In [22]:
data2 = np.array(np.random.randint(1, 100, size=9)).reshape(3,3)
np.dot(data[0:3, 0:3], data2)

array([[5414, 4620, 5326],
       [2042, 1995, 4288],
       [8845, 5159, 6299]])

7. Inverse of a matrix - Create a 3x3 identity matrix*, multiply it with 2 and compute its inverse.

In [29]:
from numpy import linalg

In [30]:
id_matrix = np.eye(3, k=0)
inv(id_matrix * 2)

array([[0.5, 0. , 0. ],
       [0. , 0.5, 0. ],
       [0. , 0. , 0.5]])

8. Eigenvalues and eigenvectors - For the first 3x3 part of the data matrix, compute the eigenvalues and eigenvectors*.

In [31]:
eigenvalues, eigenvectors = linalg.eig(data[0:3, 0:3])
eigenvalues

array([130.6650356 +0.j        , -19.8325178+31.07256871j,
       -19.8325178-31.07256871j])

In [32]:
eigenvectors

array([[ 0.56421553+0.j        , -0.26116888-0.45706179j,
        -0.26116888+0.45706179j],
       [ 0.40518389+0.j        ,  0.64657295+0.j        ,
         0.64657295-0.j        ],
       [ 0.71936559+0.j        , -0.15413873+0.53016036j,
        -0.15413873-0.53016036j]])

9. Find missing values - Replace random 5 elements in the data matrix with np.nan. Find the indices of the missing values.

In [95]:
data_missing = data.astype('float64')
index_nan = np.random.choice(data_missing.size, 5, replace=False)
data_missing.ravel()[index_nan] = np.nan
data_missing

array([[42., 56., 38., nan,  6.],
       [21.,  2., 56., 45., 59.],
       [88., 26., nan, 35., 99.],
       [90., nan, 36., nan, 92.],
       [54., 40., 80., nan, 18.]])

In [97]:
missing = np.argwhere(np.isnan(data_missing))
missing

array([[0, 3],
       [2, 2],
       [3, 1],
       [3, 3],
       [4, 3]])

10. Replace missing values - Replace the missing values in the data matrix with the mean of the matrix (ignoring the missing values while computing the mean).

In [98]:
data_missing[np.isnan(data_missing)] = np.nanmean(data_missing)
data_missing

array([[42.  , 56.  , 38.  , 49.15,  6.  ],
       [21.  ,  2.  , 56.  , 45.  , 59.  ],
       [88.  , 26.  , 49.15, 35.  , 99.  ],
       [90.  , 49.15, 36.  , 49.15, 92.  ],
       [54.  , 40.  , 80.  , 49.15, 18.  ]])