# Numpy execises

Selected exercises from
https://www.machinelearningplus.com/python/101-numpy-exercises-python/

In [1]:
import numpy as np

1. Create a 1D array of numbers from 0 to 9


In [3]:
np.array(range(10))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

2. Create a 3×3 numpy array of all True’s

In [4]:
np.ones((3,3), dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

3. Extract all odd numbers from arr


In [None]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[arr % 2]

array([1, 3, 5, 7, 9])

4. Replace all odd numbers in arr with -1


In [None]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[arr % 2] = -1
arr

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

5. Replace all odd numbers in arr with -1 in a new output without changing arr


In [8]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.where(arr % 2, -1, arr)

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

6. Convert a 1D array to a 2D array with 2 rows


In [13]:
arr = np.arange(10)
arr.reshape((2,5))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

7. Stack arrays a and b vertically


In [15]:
a = np.arange(10).reshape(2,5)
b = np.ones([2,5])

In [16]:
a

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [17]:
b

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [None]:
np.concatenate([a,b])

array([[0., 1., 2., 3., 4.],
       [5., 6., 7., 8., 9.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

8. Stack the arrays a and b horizontally.

In [22]:
a = np.arange(10).reshape(2,5)
b = np.ones([2,5])

In [23]:
np.concatenate([a,b], axis=1)

array([[0., 1., 2., 3., 4., 1., 1., 1., 1., 1.],
       [5., 6., 7., 8., 9., 1., 1., 1., 1., 1.]])

9. Get the positions where elements of a and b match

In [25]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

In [26]:
np.where(a == b)

(array([1, 3, 5, 7]),)

10. Get all items between 5 and 10 from a.

In [32]:
a = np.array([2, 6, 1, 9, 10, 3, 27])
a[(a >= 5) & (a <= 10)]

array([ 6,  9, 10])

11. Find the mean, median, standard deviation of iris's sepallength (1st column)

In [34]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

In [43]:
mean = np.mean(iris[:, 0].astype(float))
median = np.median(iris[:, 0].astype(float))
stddev = np.std(iris[:, 0].astype(float))
print(f'Mean: {mean}, Median: {median}, St. Dev.: {stddev}')

Mean: 5.843333333333334, Median: 5.8, St. Dev.: 0.8253012917851409


12. Create a normalized form of iris's sepallength whose values range exactly between 0 and 1 so that the minimum has value 0 and maximum has value 1.

In [44]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

In [49]:
sepallength = (sepallength - sepallength.min()) / (sepallength.max() - sepallength.min())
sepallength

array([0.22222222, 0.16666667, 0.11111111, 0.08333333, 0.19444444,
       0.30555556, 0.08333333, 0.19444444, 0.02777778, 0.16666667,
       0.30555556, 0.13888889, 0.13888889, 0.        , 0.41666667,
       0.38888889, 0.30555556, 0.22222222, 0.38888889, 0.22222222,
       0.30555556, 0.22222222, 0.08333333, 0.22222222, 0.13888889,
       0.19444444, 0.19444444, 0.25      , 0.25      , 0.11111111,
       0.13888889, 0.30555556, 0.25      , 0.33333333, 0.16666667,
       0.19444444, 0.33333333, 0.16666667, 0.02777778, 0.22222222,
       0.19444444, 0.05555556, 0.02777778, 0.19444444, 0.22222222,
       0.13888889, 0.22222222, 0.08333333, 0.27777778, 0.19444444,
       0.75      , 0.58333333, 0.72222222, 0.33333333, 0.61111111,
       0.38888889, 0.55555556, 0.16666667, 0.63888889, 0.25      ,
       0.19444444, 0.44444444, 0.47222222, 0.5       , 0.36111111,
       0.66666667, 0.36111111, 0.41666667, 0.52777778, 0.36111111,
       0.44444444, 0.5       , 0.55555556, 0.5       , 0.58333

13. Find the number and position of missing values in iris_2d's sepallength (1st column)

In [59]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20), ] = np.nan

In [61]:
print(np.isnan(iris_2d[:, 0]).sum())
print(np.where(np.isnan(iris_2d[:, 0])))

3
(array([34, 39, 83]),)


14. Filter the rows of iris_2d that have petallength (3rd column) > 1.5 and sepallength (1st column) < 5.0

In [67]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

In [68]:
iris_2d[(iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)]

array([[4.8, 3.4, 1.6, 0.2],
       [4.8, 3.4, 1.9, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [4.9, 2.4, 3.3, 1. ],
       [4.9, 2.5, 4.5, 1.7]])

15. Find out if iris_2d has any missing values.

In [63]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

In [66]:
np.isnan(iris_2d).any()

np.False_

16. Replace all occurrences of nan with 0 in numpy array

In [77]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan

In [78]:
iris_2d

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, nan, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, nan, 1.4, nan],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [nan, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, nan, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [79]:
iris_2d[np.isnan(iris_2d)] = 0

17. Create a new column for volume in iris_2d, where volume is (pi x petallength x sepal_length^2)/3

In [107]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3,4])
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')#only to identify the columns

In [108]:
volume = (np.pi * iris_2d[:, 2] * (iris_2d[:, 0] ** 2)) / 3
volume = volume[:, np.newaxis]
np.hstack([iris_2d, volume])

array([[5.10000000e+00, 3.50000000e+00, 1.40000000e+00, 2.00000000e-01,
                   nan, 3.81326516e+01],
       [4.90000000e+00, 3.00000000e+00, 1.40000000e+00, 2.00000000e-01,
                   nan, 3.52004985e+01],
       [4.70000000e+00, 3.20000000e+00, 1.30000000e+00, 2.00000000e-01,
                   nan, 3.00723721e+01],
       [4.60000000e+00, 3.10000000e+00, 1.50000000e+00, 2.00000000e-01,
                   nan, 3.32380503e+01],
       [5.00000000e+00, 3.60000000e+00, 1.40000000e+00, 2.00000000e-01,
                   nan, 3.66519143e+01],
       [5.40000000e+00, 3.90000000e+00, 1.70000000e+00, 4.00000000e-01,
                   nan, 5.19116770e+01],
       [4.60000000e+00, 3.40000000e+00, 1.40000000e+00, 3.00000000e-01,
                   nan, 3.10221803e+01],
       [5.00000000e+00, 3.40000000e+00, 1.50000000e+00, 2.00000000e-01,
                   nan, 3.92699082e+01],
       [4.40000000e+00, 2.90000000e+00, 1.40000000e+00, 2.00000000e-01,
                   nan, 

18. Find the most frequent value of petal length (3rd column) in iris dataset.

In [109]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [113]:
values, counts = np.unique(iris[:, 2], return_counts=True)
values[counts.argmax()]

b'1.5'

19. From the array a, replace all values greater than 30 to 30 and less than 10 to 10.

In [119]:
np.random.seed(100)
a = np.random.uniform(1,50, 20)

In [121]:
a[a > 30] = 30
a[a < 10] = 10
a

array([27.62684215, 14.64009987, 21.80136195, 30.        , 10.        ,
       10.        , 30.        , 30.        , 10.        , 29.17957314,
       30.        , 11.25090398, 10.08108276, 10.        , 11.76517714,
       30.        , 30.        , 10.        , 30.        , 14.42961361])

20.  Drop all nan values from a 1D numpy array

In [124]:
a = np.array([1,2,3,np.nan,5,6,7,np.nan])
a = a[~np.isnan(a)]
a

array([1., 2., 3., 5., 6., 7.])