# Getting started with NumPy

Name: Seyed Ali Mirferdos

Student Number: 99201465

## Part 1: Importing NumPy

In [1]:
import numpy as np

In [3]:
np.version.version

'1.19.5'

## Part 2: 1D array with step of 2

In [4]:
np.arange(25, 0, -2)

array([25, 23, 21, 19, 17, 15, 13, 11,  9,  7,  5,  3,  1])

## Part 3: Random 3*3 boolean array

Citation: [Source](https://stackoverflow.com/questions/43528637/create-large-random-boolean-matrix-with-numpy)

In [21]:
random_bool_arr = np.random.choice([True, False], 9).reshape((3, 3))

In [22]:
random_bool_arr

array([[ True,  True,  True],
       [ True,  True, False],
       [ True,  True, False]])

## Part 4: Changing the values of boolean array

In [23]:
new_arr4 = random_bool_arr.astype(int)

In [24]:
new_arr4[new_arr4 == 1] = 2
new_arr4[new_arr4 == 0] = 3
new_arr4

array([[2, 2, 2],
       [2, 2, 3],
       [2, 2, 3]])

## Part 5: Changing the values of boolean array without replacement

In [27]:
new_arr5 = np.where(random_bool_arr, 2, 3)
new_arr5

array([[2, 2, 2],
       [2, 2, 3],
       [2, 2, 3]])

## Part 6: Intersection of two arrays

In [28]:
arr1 = np.array([1, 2, 3, 2, 3, 4, 3, 4, 5, 6])
arr2 = np.array([7, 2, 10, 2, 7, 4, 9, 4, 9, 8])

In [29]:
np.intersect1d(arr1, arr2)

array([2, 4])

## Part 7: Swapping first and second column of a 1D array 

In [33]:
new_arr7 = np.arange(9).reshape(3, 3)

In [31]:
new_arr7

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [32]:
new_arr7[:, [0, 1]] = new_arr7[:, [1, 0]]
new_arr7

array([[1, 0, 2],
       [4, 3, 5],
       [7, 6, 8]])

## Part 8: Changing the order of columns and rows

In [34]:
new_arr8 = np.arange(9).reshape(3, 3)

In [35]:
new_arr8

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [37]:
np.flip(np.flip(new_arr8, axis=1), axis=0)

array([[8, 7, 6],
       [5, 4, 3],
       [2, 1, 0]])

## Part 9: Normalizing an array

In [38]:
new_arr9 = np.arange(9).reshape(3, 3)

In [39]:
new_arr9

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [40]:
new_arr9 / np.amax(new_arr9)

array([[0.   , 0.125, 0.25 ],
       [0.375, 0.5  , 0.625],
       [0.75 , 0.875, 1.   ]])

## Part 10: Uniform random array

randint will provide random integers in the interval using a uniform distribution which is what is asked:

In [41]:
new_arr10 = np.random.randint(low=1, high=201, size=(10, 10))

In [42]:
new_arr10

array([[  4, 174,   7, 125,  94, 180, 143,  55,  42,  97],
       [116, 102, 158,  25, 161, 199, 195,  52,  97,  16],
       [184,  32, 152, 152, 103,  68,  43, 139,  40, 104],
       [126,  14, 138,  67,  44,  58,  69, 195, 197, 181],
       [ 26, 192,  25,  41,  54, 132,  51, 180,  24, 178],
       [ 75,  28, 120,  11,  74,  89, 132,  16, 164,  97],
       [171, 114, 183, 117,  43,   4, 170,  26,  15,  31],
       [ 56,   4, 156, 109,  68, 171,  32,  95, 134, 114],
       [109, 125, 125, 139, 139,  61, 200, 197, 155,  45],
       [123, 184, 199,  71, 151,   3,  77, 197, 163,  55]])

Citation: [Source](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flat.html)

In [43]:
new_arr10.flat[np.random.choice(10*10, 10, replace=False)] = 0
new_arr10

array([[  4, 174,   7, 125,  94,   0, 143,  55,  42,   0],
       [116, 102, 158,  25, 161, 199,   0,  52,  97,  16],
       [184,  32, 152, 152, 103,  68,  43, 139,  40, 104],
       [126,  14, 138,  67,  44,  58,   0, 195, 197, 181],
       [ 26, 192,  25,  41,  54, 132,  51, 180,  24, 178],
       [ 75,   0, 120,  11,  74,  89, 132,  16, 164,  97],
       [  0, 114, 183, 117,  43,   4, 170,  26,   0,  31],
       [ 56,   0, 156, 109,  68, 171,  32,   0, 134, 114],
       [109, 125, 125, 139, 139,  61, 200, 197, 155,  45],
       [123,   0, 199,  71, 151,   3,  77, 197, 163,  55]])

## Part 11: Getting the indices of zeroed cells

In [45]:
indices = np.argwhere(new_arr10 == 0)

In [47]:
print(len(indices))
print(indices)

10
[[0 5]
 [0 9]
 [1 6]
 [3 6]
 [5 1]
 [6 0]
 [6 8]
 [7 1]
 [7 7]
 [9 1]]


## Part 12: Replace nan values by zero in Iris dataset

In [48]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
iris_num = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0, 1, 2, 3])
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
iris_num[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan

In [50]:
new_iris = np.nan_to_num(iris_num, nan=0)

## Part 13: Getting non-nan rows

Citation: [Source](https://stackoverflow.com/a/11453235)

In [51]:
new_iris2 = iris_num[~np.isnan(iris_num).any(axis=1)]

## Part 14: Most frequent element in each Column and number of unique numbers

In [57]:
for col in iris.T:
  vals, _, counts = np.unique(col, return_index=True, return_counts=True)

  unique_values = len(vals)
  freq_value = vals[np.argmax(counts)]

  print("Most frequent value: " + str(freq_value))
  print("unique vals: " + str(unique_values))
  print("-------")

Most frequent value: b'5.0'
unique vals: 35
-------
Most frequent value: b'3.0'
unique vals: 23
-------
Most frequent value: b'1.5'
unique vals: 43
-------
Most frequent value: b'0.2'
unique vals: 22
-------
Most frequent value: b'Iris-setosa'
unique vals: 3
-------


## Part 15: Categorizing the third iris dataset

In [58]:
col3 = iris[:,2]
new_iris15 = iris.copy()

In [61]:
new_iris15[:,2] = np.where(col3 < b'3', 'S', col3)
new_iris15[:,2] = np.where(col3 > b'5', 'L', new_iris15[:,2])
new_iris15[:,2] = np.where((b'3' <= col3) & (col3 <= b'5'), 'M', new_iris15[:,2])

In [62]:
new_iris15

array([[b'5.1', b'3.5', 'S', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', 'S', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', 'S', b'0.2', b'Iris-setosa'],
       [b'4.6', b'3.1', 'S', b'0.2', b'Iris-setosa'],
       [b'5.0', b'3.6', 'S', b'0.2', b'Iris-setosa'],
       [b'5.4', b'3.9', 'S', b'0.4', b'Iris-setosa'],
       [b'4.6', b'3.4', 'S', b'0.3', b'Iris-setosa'],
       [b'5.0', b'3.4', 'S', b'0.2', b'Iris-setosa'],
       [b'4.4', b'2.9', 'S', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.1', 'S', b'0.1', b'Iris-setosa'],
       [b'5.4', b'3.7', 'S', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.4', 'S', b'0.2', b'Iris-setosa'],
       [b'4.8', b'3.0', 'S', b'0.1', b'Iris-setosa'],
       [b'4.3', b'3.0', 'S', b'0.1', b'Iris-setosa'],
       [b'5.8', b'4.0', 'S', b'0.2', b'Iris-setosa'],
       [b'5.7', b'4.4', 'S', b'0.4', b'Iris-setosa'],
       [b'5.4', b'3.9', 'S', b'0.4', b'Iris-setosa'],
       [b'5.1', b'3.5', 'S', b'0.3', b'Iris-setosa'],
       [b'5.7', b'3.8', 'S',

## Part 16: Finding second max value in rows which have setosa

In [68]:
setosa_values = iris[iris[:, 4] == b'Iris-setosa'][:,2]

If all values are intended:

In [72]:
np.sort(setosa_values)[-2]

b'1.9'

If unique values are intended:

In [75]:
setosa_unique_values = np.unique(setosa_values)
np.unique(setosa_values)[-2]

b'1.7'

## Part 17: Get indices of max 20 elements in the first column

In [76]:
first_col = iris[:,0]

If all values are intended:

In [81]:
np.argsort(first_col)[-20:]

array([143,  76, 112, 139, 141,  52, 120,  50, 102, 129, 125, 109, 107,
       130, 105, 122, 118, 117, 135, 131])

If unique values are intended:

In [83]:
_, idx, _ = np.unique(first_col, return_index=True, return_counts=True)
idx[-20:]

array([ 14,  61,  62,  63,  68,  56,  51,  54,  58,  65,  76,  52,  50,
       102, 109, 107, 130, 105, 117, 131])

## Part 18: Indices of the local maxima in first colmun

Citation: [Source](https://stackoverflow.com/questions/4624970/finding-local-maxima-minima-with-numpy-in-a-1d-numpy-array)

In [86]:
mask = np.r_[True, first_col[1:] > first_col[:-1]] & np.r_[first_col[:-1] > first_col[1:], True]

In [88]:
np.where(mask)

(array([  0,   5,   7,  10,  14,  18,  20,  23,  31,  33,  36,  39,  44,
         46,  48,  50,  52,  54,  56,  58,  63,  65,  68,  72,  76,  83,
         86,  91,  97, 100, 102, 105, 107, 109, 112, 120, 122, 125, 131,
        135, 137, 139, 141, 143, 147]),)