## NumPy Exercises

#### Import numpy as np and see the version

In [1]:
import numpy as np
print(np.__version__)

1.21.5


#### Create a 1D array?

In [2]:
sa = np.array([
    [3,6,9,12],
    [15,18,21,24],
    [27,30,33,36],
    [39,42,45,48],
    [51,54,57,60]
])

In [3]:
sa[0]

array([ 3,  6,  9, 12])

In [4]:
for a in range(sa.shape[0]):
    if (a+1)%2==1:
        for b in range(sa.shape[1]):
            if (b+1)%2==0:
                print(sa[a,b])
    

6
12
30
36
54
60


In [26]:
sa[::2,1::2]

array([[ 6, 12],
       [30, 36],
       [54, 60]])

In [5]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

#### Create a boolean array?

In [5]:
np.full((3, 3), True, dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

#### Extract items that satisfy a given condition from 1D array?

In [8]:
# Input
arr = np.array([0, 10, 25, 3, 4, 54, 6, 71, 8, 19])

# Solution
arr[arr % 2 == 1]

array([25,  3, 71, 19])

#### Replace items that satisfy a condition with another value in numpy array?

In [7]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
arr[arr % 2 == 1] = -1
arr

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

#### Replace items that satisfy a condition without affecting the original array?

In [11]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
out = np.where(arr % 2 == 1, -1, arr)
print(arr)
out

[0 1 2 3 4 5 6 7 8 9]


array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

#### Reshape an array?

In [16]:
arr = np.arange(10)
print(arr)


[0 1 2 3 4 5 6 7 8 9]


In [17]:
arr.reshape(2, -1)  # Setting to -1 automatically decides the number of cols

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

#### Stack two arrays vertically?

In [7]:
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

In [8]:
a

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [9]:
b

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [10]:
# Method 1:
np.concatenate([a, b], axis=0)  


array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [21]:
# Method 2:
np.vstack([a, b])

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

#### Stack two arrays horizontally?

In [22]:
a = np.arange(10).reshape(2,-1)

b = np.repeat(1, 10).reshape(2,-1)

In [23]:
# Method 1:
np.concatenate([a, b], axis=1)

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

In [24]:
# Method 2:
np.hstack([a, b])

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

#### Generate custom sequences in numpy without hardcoding?

In [25]:
a = np.array([1,2,3])

In [29]:
np.r_[np.repeat(a, 4), np.tile(a, 5)]

array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1,
       2, 3, 1, 2, 3])

In [32]:
# Note...............
# numpy.r_ Translates slice objects to concatenation along the first axis.

V = np.array([1,2,3,4,5,6 ])
Y = np.array([7,8,9,10,11,12])
np.r_[V[0:2],Y[0],V[3],Y[1:3],V[4:],Y[4:]]


array([ 1,  2,  7,  4,  8,  9,  5,  6, 11, 12])

#### Get the common items between two python numpy arrays?

In [33]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

In [34]:
np.intersect1d(a,b)

array([2, 4])

#### Remove from one array those items that exist in another?

In [35]:
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])

In [36]:
np.setdiff1d(a,b)

array([1, 2, 3, 4])

#### Get the positions where elements of two arrays match?

In [3]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

np.where(a == b)

(array([1, 3, 5, 7], dtype=int64),)

#### Extract all numbers between a given range from a numpy array?

In [7]:
a = np.arange(15)


In [8]:

# Method 1
index = np.where((a >= 5) & (a <= 10))
a[index]


array([ 5,  6,  7,  8,  9, 10])

In [9]:

# Method 2:
index = np.where(np.logical_and(a>=5, a<=10))
a[index]


array([ 5,  6,  7,  8,  9, 10])

In [10]:


# Method 3: 
a[(a >= 5) & (a <= 10)]

array([ 5,  6,  7,  8,  9, 10])

#### Make a python function that handles scalars to work on numpy arrays?

In [12]:
def maxx(x, y):
    """Get the maximum of two items"""
    if x >= y:
        return x
    else:
        return y

pair_max = np.vectorize(maxx, otypes=[float])

a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])

pair_max(a, b)

array([6., 7., 9., 8., 9., 7., 5.])

#### Swap two columns in a 2d numpy array?

In [14]:
# Input
arr = np.arange(9).reshape(3,3)
arr



array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [15]:
# Solution
arr[:, [1,0,2]]

array([[1, 0, 2],
       [4, 3, 5],
       [7, 6, 8]])

#### Swap two rows in a 2d numpy array?

In [16]:
# Input
arr = np.arange(9).reshape(3,3)



In [17]:
# Solution
arr[[1,0,2], :]


array([[3, 4, 5],
       [0, 1, 2],
       [6, 7, 8]])

#### Reverse the rows of a 2D array?

In [18]:
# Input
arr = np.arange(9).reshape(3,3)


In [19]:

# Solution
arr[::-1]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

#### Reverse the columns of a 2D array?

In [20]:
# Input
arr = np.arange(9).reshape(3,3)



In [21]:
# Solution
arr[:, ::-1]

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

#### Create a 2D array containing random floats between 5 and 10?

In [22]:
# Input
arr = np.arange(9).reshape(3,3)





In [23]:
# Solution Method 1:
rand_arr = np.random.randint(low=5, high=10, size=(5,3)) + np.random.random((5,3))
print(rand_arr)

[[6.05807358 9.48966862 9.70114728]
 [8.88096244 6.40684282 9.64670276]
 [5.49806991 5.6267993  6.76956644]
 [9.2580831  6.92584967 7.69379136]
 [5.85074131 9.70289929 9.63963225]]


In [24]:
# Solution Method 2:
rand_arr = np.random.uniform(5,10, size=(5,3))
print(rand_arr)

[[9.79668935 5.83542549 5.60044117]
 [5.43707762 5.47438502 8.71221621]
 [5.05216222 7.98082437 8.53851518]
 [7.69370234 8.01317555 7.699162  ]
 [7.38776641 7.4664525  6.00270686]]


#### Print only 3 decimal places in python numpy array?

In [3]:
# Create the random array
rand_arr = np.random.random([5,3])

# Limit to 3 decimal places
np.set_printoptions(precision=3)
rand_arr[:4]

array([[0.127, 0.688, 0.177],
       [0.259, 0.715, 0.413],
       [0.332, 0.814, 0.241],
       [0.626, 0.991, 0.687]])

#### Limit the number of items printed in output of numpy array?

In [4]:
np.set_printoptions(threshold=6)
a = np.arange(15)

#### Import a dataset with numbers and texts keeping the text intact in python numpy?

In [6]:
# Solution
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

# Print the first 3 rows
iris[:3]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa']], dtype=object)

#### Extract a particular column from 1D array of tuples?

In [7]:
# Input:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)
print(iris_1d.shape)

# Solution:
species = np.array([row[4] for row in iris_1d])
species[:5]

(150,)


  iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)


array([b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa'], dtype='|S18')

#### Convert a 1d array of tuples to a 2d numpy array?

In [8]:
# Input:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)

  iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)


In [9]:
iris_1d

array([(5.1, 3.5, 1.4, 0.2, b'Iris-setosa'),
       (4.9, 3. , 1.4, 0.2, b'Iris-setosa'),
       (4.7, 3.2, 1.3, 0.2, b'Iris-setosa'), ...,
       (6.5, 3. , 5.2, 2. , b'Iris-virginica'),
       (6.2, 3.4, 5.4, 2.3, b'Iris-virginica'),
       (5.9, 3. , 5.1, 1.8, b'Iris-virginica')],
      dtype=[('f0', '<f8'), ('f1', '<f8'), ('f2', '<f8'), ('f3', '<f8'), ('f4', 'S15')])

In [10]:
# Solution:
# Method 1: Convert each row to a list and get the first 4 items
iris_2d = np.array([row.tolist()[:4] for row in iris_1d])
iris_2d[:4]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2]])

#### Compute the mean, median, standard deviation of a numpy array?

In [12]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])



In [13]:
# Solution
mu, med, sd = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
print(mu, med, sd)

5.843333333333334 5.8 0.8253012917851409


#### Normalize an array so the values range exactly between 0 and 1?

In [14]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])



In [15]:
# Solution
Smax, Smin = sepallength.max(), sepallength.min()
S = (sepallength - Smin)/(Smax - Smin)

#### Find the percentile scores of a numpy array?

In [17]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

# Solution
np.percentile(sepallength, q=[5, 95])

array([4.6  , 7.255])

#### Insert values at random positions in an array?

In [20]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')

iris_2d

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       ...,
       [b'6.5', b'3.0', b'5.2', b'2.0', b'Iris-virginica'],
       [b'6.2', b'3.4', b'5.4', b'2.3', b'Iris-virginica'],
       [b'5.9', b'3.0', b'5.1', b'1.8', b'Iris-virginica']], dtype=object)

In [21]:
# Method 1
i, j = np.where(iris_2d)

# i, j contain the row numbers and column numbers of 600 elements of iris_x
np.random.seed(100)
iris_2d[np.random.choice((i), 20), np.random.choice((j), 20)] = np.nan

In [22]:
iris_2d

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       ...,
       [b'6.5', b'3.0', b'5.2', b'2.0', nan],
       [b'6.2', b'3.4', b'5.4', b'2.3', b'Iris-virginica'],
       [b'5.9', b'3.0', b'5.1', b'1.8', b'Iris-virginica']], dtype=object)

#### Find the position of missing values in numpy array?

In [23]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan

# Solution
print("Number of missing values: \n", np.isnan(iris_2d[:, 0]).sum())
print("Position of missing values: \n", np.where(np.isnan(iris_2d[:, 0])))

Number of missing values: 
 5
Position of missing values: 
 (array([13, 17, 47, 53, 91], dtype=int64),)


#### Filter a numpy array based on two or more conditions?

In [25]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])



In [26]:

# Solution
condition = (iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)
iris_2d[condition]

array([[4.8, 3.4, 1.6, 0.2],
       [4.8, 3.4, 1.9, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [4.9, 2.4, 3.3, 1. ],
       [4.9, 2.5, 4.5, 1.7]])

#### Drop rows that contain a missing value from a numpy array?

In [27]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan



In [28]:
# Solution
# No direct numpy function for this.
# Method 1:
any_nan_in_row = np.array([~np.any(np.isnan(row)) for row in iris_2d])
iris_2d[any_nan_in_row][:5]

array([[4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4]])

#### Find the correlation between two columns of a numpy array?

In [29]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])


In [30]:

# Solution 1
np.corrcoef(iris[:, 0], iris[:, 2])[0, 1]

0.8717541573048714

#### Find if a given array has any null values?

In [31]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

np.isnan(iris_2d).any()

False

#### Replace all missing values with 0 in a numpy array?

In [32]:
# Import iris keeping the text column intact
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

# Solution
# Extract the species column as an array
species = np.array([row.tolist()[4] for row in iris])

#### Create a new column from existing columns of a numpy array?

In [34]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')

# Solution
# Compute volume
sepallength = iris_2d[:, 0].astype('float')
petallength = iris_2d[:, 2].astype('float')
volume = (np.pi * petallength * (sepallength**2))/3

####  Do probabilistic sampling in numpy?

In [35]:
# Import iris keeping the text column intact
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

# Solution
# Get the species column
species = iris[:, 4]

# Approach 1: Generate Probablistically
np.random.seed(100)
a = np.array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])
species_out = np.random.choice(a, 150, p=[0.5, 0.25, 0.25])

#### Get the second largest value of an array when grouped by another array?

In [36]:
# Import iris keeping the text column intact
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')



In [37]:
# Solution
# Get the species and petal length columns
petal_len_setosa = iris[iris[:, 4] == b'Iris-setosa', [2]].astype('float')

#### Sort a 2D array by a column

In [38]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [39]:
# Sort by column position 0: SepalLength
print(iris[iris[:,0].argsort()][:20])

[[b'4.3' b'3.0' b'1.1' b'0.1' b'Iris-setosa']
 [b'4.4' b'3.2' b'1.3' b'0.2' b'Iris-setosa']
 [b'4.4' b'3.0' b'1.3' b'0.2' b'Iris-setosa']
 ...
 [b'4.9' b'2.5' b'4.5' b'1.7' b'Iris-virginica']
 [b'4.9' b'3.1' b'1.5' b'0.1' b'Iris-setosa']
 [b'4.9' b'3.1' b'1.5' b'0.1' b'Iris-setosa']]


#### Find the most frequent value in a numpy array?

In [40]:
# Input:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

# Solution:
vals, counts = np.unique(iris[:, 2], return_counts=True)
print(vals[np.argmax(counts)])

b'1.5'


#### Position of the first occurrence of a value greater than a given value?

In [41]:
# Input:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

# Solution: (edit: changed argmax to argwhere. Thanks Rong!)
np.argwhere(iris[:, 3].astype(float) > 1.0)[0]

array([50], dtype=int64)

#### Compute the row wise counts of all possible values in an array?

In [42]:
# Input:
np.random.seed(100)
arr = np.random.randint(1,11,size=(6, 10))
arr

array([[ 9,  9,  4, ...,  3,  6,  3],
       [ 3,  3,  2, ..., 10,  7,  3],
       [ 5,  2,  6, ...,  8,  2,  2],
       [ 8,  8,  1, ...,  3,  6,  9],
       [ 2,  1,  8, ...,  3,  6,  2],
       [ 9,  2,  6, ...,  6,  1, 10]])

In [43]:
# Solution
def counts_of_all_values_rowwise(arr2d):
    # Unique values and its counts row wise
    num_counts_array = [np.unique(row, return_counts=True) for row in arr2d]

    # Counts of all values row wise
    return([[int(b[a==i]) if i in a else 0 for i in np.unique(arr2d)] for a, b in num_counts_array])

# Print
print(np.arange(1,11))
counts_of_all_values_rowwise(arr)

[ 1  2  3 ...  8  9 10]


[[1, 0, 2, 1, 1, 1, 0, 2, 2, 0],
 [2, 1, 3, 0, 1, 0, 1, 0, 1, 1],
 [0, 3, 0, 2, 3, 1, 0, 1, 0, 0],
 [1, 0, 2, 1, 0, 1, 0, 2, 1, 2],
 [2, 2, 2, 0, 0, 1, 1, 1, 1, 0],
 [1, 1, 1, 1, 1, 2, 0, 0, 2, 1]]

## Good Luck!