# Numpy execises

Selected exercises from
https://www.machinelearningplus.com/python/101-numpy-exercises-python/

In [2]:
import numpy as np

1. Create a 1D array of numbers from 0 to 9


In [2]:
arr = np.array([0,1,2,3,4,5,6,7,8,9])
arr
# Expected output
#> array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

2. Create a 3×3 numpy array of all True’s

In [5]:
arr = np.array([[True, True, True], [True, True, True], [True, True, True]])
arr

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

3. Extract all odd numbers from arr


In [3]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[arr%2==1]
# Expected output
#> array([1, 3, 5, 7, 9])


array([1, 3, 5, 7, 9])

4. Replace all odd numbers in arr with -1


In [8]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[arr%2==1] = -1
arr
# Expected output
#>  array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])


array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [None]:
np.where(arr%2==1,-1,arr) #another option

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

5. Replace all odd numbers in arr with -1 in a new output without changing arr


In [12]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
data = np.where(arr%2==1, -1, arr)
# Expected output
#>  arr array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])
#>  output array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [None]:
arr

In [None]:
data

6. Convert a 1D array to a 2D array with 2 rows


In [26]:
arr = np.arange(10)
arr.reshape(-1,5) #arr.reshape(2,5)
# Expected output
#> array([[0, 1, 2, 3, 4],
#>        [5, 6, 7, 8, 9]])

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

7. Stack arrays a and b vertically


In [3]:
a = np.arange(10).reshape(2,5)
b = np.ones([2,5])
a.reshape(10,1)
b.reshape((-1,1))


array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [4]:
# Expected output
#> array([[0, 1, 2, 3, 4],
#>        [5, 6, 7, 8, 9],
#>        [1, 1, 1, 1, 1],
#>        [1, 1, 1, 1, 1]])
np.concatenate([a, b], axis=0)
np.concatenate([a,b], axis=0, dtype=int, casting='unsafe')


array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

8. Stack the arrays a and b horizontally.

In [44]:
a = np.arange(10).reshape(2,5)
b = np.ones([2,5])
np.hstack([a.astype(dtype=int),b.astype(dtype=int)]) #np.hstack([a,b], dtype=int, casting='unsafe')

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

In [None]:
# Expected output
#> array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
#>        [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

9. Get the positions where elements of a and b match

In [15]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.where(a==b)[0]


array([1, 3, 5, 7])

In [None]:
# Expected output
#> (array([1, 3, 5, 7]),)

10. Get all items between 5 and 10 from a.

In [55]:
a = np.array([2, 6, 1, 9, 10, 3, 27])
a[(a > 4) & (a < 11)]

array([ 6,  9, 10])

In [None]:
# Expected output
#>(array([6, 9, 10]),)

11. Find the mean, median, standard deviation of iris's sepallength (1st column)

In [20]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

In [None]:

iris

In [23]:
np.mean(iris[:,[0]].astype(dtype=float), axis=0)

array([5.84333333])

In [24]:
np.median(iris[:,[0]].astype(dtype=float), axis=0)

array([5.8])

In [25]:
np.std(iris[:,[0]].astype(dtype=float), axis=0)

array([0.82530129])

12. Create a normalized form of iris's sepallength whose values range exactly between 0 and 1 so that the minimum has value 0 and maximum has value 1.

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
sepallength = (sepallength - np.min(sepallength)) / (np.max(sepallength) - np.min(sepallength))
sepallength

13. Find the number and position of missing values in iris_2d's sepallength (1st column)

In [67]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20), ] = np.nan
np.where(np.isnan(iris_2d))

(array([  2,  14,  24,  28,  33,  34,  63,  74,  77,  90,  94, 113, 115,
        129, 130, 133, 135, 144, 145, 148]),
 array([3, 1, 1, 1, 1, 0, 0, 0, 2, 3, 1, 2, 2, 2, 3, 2, 2, 1, 1, 1]))

14. Filter the rows of iris_2d that has petallength (3rd column) > 1.5 and sepallength (1st column) < 5.0

In [56]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.where((iris_2d[:, [2]] > 1.5) & (iris_2d[:, [0]] < 5.0))[0]]

array([[4.8, 3.4, 1.6, 0.2],
       [4.8, 3.4, 1.9, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [4.9, 2.4, 3.3, 1. ],
       [4.9, 2.5, 4.5, 1.7]])

15. Find out if iris_2d has any missing values (missing values = nan)

In [3]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
np.isnan(iris_2d).any() #what is the meaning of missing values

False

16. Replace all occurrences of nan with 0 in numpy array

In [17]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
iris_2d[np.where(np.isnan(iris_2d))] = 0

In [None]:
iris_2d

17. Create a new column for volume in iris_2d, where volume is (pi x petallength x sepal_length^2)/3

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3,4])
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species') #only to identify the columns
volume = (np.pi * iris_2d[:,[2]] * (iris_2d[:, [1]]**2)) / 3
np.hstack([iris_2d,volume])

18. Find the most frequent value of petal length (3rd column) in iris dataset.

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
unique, counts = np.unique(iris_2d[:, [2]], return_counts=True)
unique[counts.argmax()]

19. From the array a, replace all values greater than 30 to 30 and less than 10 to 10.

In [19]:
np.random.seed(100)
a = np.random.uniform(1,50, 20)
#maybe there is a better method
a[(a > 30)] = 30
a[(a < 10)] = 10


In [20]:
a

array([27.62684215, 14.64009987, 21.80136195, 30.        , 10.        ,
       10.        , 30.        , 30.        , 10.        , 29.17957314,
       30.        , 11.25090398, 10.08108276, 10.        , 11.76517714,
       30.        , 30.        , 10.        , 30.        , 14.42961361])

20.  Drop all nan values from a 1D numpy array

In [34]:
a = np.array([1,2,3,np.nan,5,6,7,np.nan])
a[~np.isnan(a)]

array([1., 2., 3., 5., 6., 7.])

In [None]:
#Expected output
# array([ 1.,  2.,  3.,  5.,  6.,  7.])