# Numpy tricks

In [1]:
import numpy as np

# np.sort()

    This top-level method is used to return us a sorted nd-array from a given nd-array.

In [30]:
a = np.random.random((10, ))
a

array([0.98027082, 0.4854332 , 0.51180143, 0.13952073, 0.86404506,
       0.15538173, 0.50136511, 0.34880586, 0.28674002, 0.95228457])

In [31]:
np.sort(a)

array([0.13952073, 0.15538173, 0.28674002, 0.34880586, 0.4854332 ,
       0.50136511, 0.51180143, 0.86404506, 0.95228457, 0.98027082])

    We can also sort a 2D array but then by default it sorts row-wise. If we want to sort acc to our convenience
    then we can use the axis parameter. axis = 0 stands for rows and axis = 1 stands for columns

In [4]:
b = np.random.randint(1, 100, (4, 5))
b

array([[ 4, 68, 95, 90, 26],
       [40, 98, 21, 50, 26],
       [81,  3, 39,  7, 22],
       [43, 73, 20,  9, 20]])

In [5]:
np.sort(b)

array([[ 4, 26, 68, 90, 95],
       [21, 26, 40, 50, 98],
       [ 3,  7, 22, 39, 81],
       [ 9, 20, 20, 43, 73]])

In [6]:
np.sort(b, axis = 0)

array([[ 4,  3, 20,  7, 20],
       [40, 68, 21,  9, 22],
       [43, 73, 39, 50, 26],
       [81, 98, 95, 90, 26]])

# np.append()

    - This is used to append a value to a numpy nd-array.
    - Numpy arrays are immutable. This method returns us a new array with the appended value

In [7]:
import numpy as np

In [8]:
a = np.array([1,2,3,4,5])
np.append(a, 45)

array([ 1,  2,  3,  4,  5, 45])

In [9]:
b = np.random.randint(1, 100, (5, 7))
b

array([[23, 20, 62, 92, 32, 97, 57],
       [21, 91, 55,  1, 58, 93, 52],
       [58, 61, 96, 73, 64, 89, 58],
       [99, 72, 15, 46, 82, 16, 81],
       [71, 11, 70, 52, 88, 39, 54]])

In [12]:
ones = np.ones((5, ))
ones

array([1., 1., 1., 1., 1.])

    We can now append ones as a column to b

In [16]:
np.append(b, ones[::, np.newaxis], axis = 1)

array([[23., 20., 62., 92., 32., 97., 57.,  1.],
       [21., 91., 55.,  1., 58., 93., 52.,  1.],
       [58., 61., 96., 73., 64., 89., 58.,  1.],
       [99., 72., 15., 46., 82., 16., 81.,  1.],
       [71., 11., 70., 52., 88., 39., 54.,  1.]])

    or may be we could have appended like below

In [18]:
np.append(b, np.ones((5, 1)), axis = 1)

array([[23., 20., 62., 92., 32., 97., 57.,  1.],
       [21., 91., 55.,  1., 58., 93., 52.,  1.],
       [58., 61., 96., 73., 64., 89., 58.,  1.],
       [99., 72., 15., 46., 82., 16., 81.,  1.],
       [71., 11., 70., 52., 88., 39., 54.,  1.]])

# np.concatenate()

    This is the wrapper for hstack and vstack. It gives us the functionality of both the stack functions. We just need to
    pass in the axis parameter and it would perform the stacking along that direction.

In [22]:
a = np.arange(12).reshape((6,2)) # 6 rows 2 columnss
a

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])

In [25]:
b = np.round(np.linspace(-10, 10, 24)).astype(int).reshape((6,4))
b

array([[-10,  -9,  -8,  -7],
       [ -7,  -6,  -5,  -4],
       [ -3,  -2,  -1,   0],
       [  0,   1,   2,   3],
       [  4,   5,   6,   7],
       [  7,   8,   9,  10]])

    Since the number of rows in a and b is same (6), we could only stack them along the horizontal axis. Stacking along that
    direction would increase the number of columns i.e increase the second value of the shape tuple. Thus axis = 1.

In [26]:
np.concatenate((a, b), axis = 1)

array([[  0,   1, -10,  -9,  -8,  -7],
       [  2,   3,  -7,  -6,  -5,  -4],
       [  4,   5,  -3,  -2,  -1,   0],
       [  6,   7,   0,   1,   2,   3],
       [  8,   9,   4,   5,   6,   7],
       [ 10,  11,   7,   8,   9,  10]])

In [27]:
a = np.round(np.linspace(-40, 40, 12)).astype(int).reshape((2, 6))
a

array([[-40, -33, -25, -18, -11,  -4],
       [  4,  11,  18,  25,  33,  40]])

In [28]:
b = np.round(np.linspace(-23, 45, 18)).astype(int).reshape((3,6))
b

array([[-23, -19, -15, -11,  -7,  -3],
       [  1,   5,   9,  13,  17,  21],
       [ 25,  29,  33,  37,  41,  45]])

    Stacking a and b is only possible in the vertical direction which would then increase the number of rows in the 
    final resulting matrix. Thus, axis = 0

In [29]:
np.concatenate((a, b), axis = 0)

array([[-40, -33, -25, -18, -11,  -4],
       [  4,  11,  18,  25,  33,  40],
       [-23, -19, -15, -11,  -7,  -3],
       [  1,   5,   9,  13,  17,  21],
       [ 25,  29,  33,  37,  41,  45]])

# np.unique()

    This is used to return the unique values in an array.

In [32]:
e = np.array([1,2,3])
e

array([1, 2, 3])

In [33]:
e = np.tile(e, 3)
e

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [35]:
f = np.repeat(e, 3)
f

array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2,
       2, 2, 3, 3, 3])

In [36]:
np.unique(e)

array([1, 2, 3])

In [37]:
np.unique(f)

array([1, 2, 3])

# np.where()

    This function expects us to pass in a boolean mask and it would return us the indices of all the true positions in
    the mask.

In [54]:
mask = [True, False, True, True, True, False, True, False]

In [57]:
np.where(mask)

(array([0, 2, 3, 4, 6], dtype=int64),)

    We can also replace the values where we get True and False with some values. These values can be provided as an
    extra argument to the np.where() method.

In [58]:
a = np.random.randint(1, 100, (12, ))
a

array([93, 20, 49, 66, 92,  3, 77, 31, 23, 64, 97,  1])

In [60]:
np.where(a % 2 == 0, # mask
         'even', # array for True values (must be broadcastable to the shape of mask)
         'odd' # array for False values (must be broadcastable to the shape of mask)
        )

array(['odd', 'even', 'odd', 'even', 'even', 'odd', 'odd', 'odd', 'odd',
       'even', 'odd', 'odd'], dtype='<U4')

# np.argmax() and np.argmin()

    These functions are used to find the index of maximum and minimum values in an array.
    
    - np.argmax() is used to find the index of the maximum value.
    - In case of multiple occurrences of the maximum values, the indices corresponding to the first 
      occurrence are returned.

In [61]:
a = np.random.randint(1, 100, (100, ))
a

array([16, 94, 22, 10, 57, 57, 31, 78, 19, 49, 22, 46, 69, 12, 11, 26,  7,
       23, 83, 10, 97,  5, 65, 26, 59, 54, 36, 13, 48, 64, 34, 34, 17, 20,
       11, 35, 73, 89, 30, 75, 48,  8, 97, 44,  9, 13, 11, 44, 62, 53, 84,
       16, 26, 45, 78, 10,  7, 15, 12, 83, 71, 33, 86, 12, 91, 89, 54, 36,
        1, 44, 76, 33, 47, 85, 62, 90, 63, 86, 12, 31, 85, 10,  8, 98, 87,
       53, 55, 29, 14, 13, 45, 23, 21, 86, 94, 33, 63, 48, 29, 93])

In [62]:
np.argmax(a)

83

In [63]:
# maximum value
a[83]

98

    We can also use this function to find the index of maximum values in a 2D array as well.

In [65]:
b = np.random.randint(1, 100, (4, 9))
b

array([[ 6, 57, 65, 60, 35, 65, 27, 12, 58],
       [83, 21, 82, 21, 66, 63, 89, 41, 94],
       [34, 36, 56, 46, 77, 88, 13,  2, 35],
       [45, 41, 30, 69, 40, 14, 41, 60, 69]])

    To find the indices of the maximum elements in the downward direction, we use axis = 0

In [66]:
np.argmax(b, axis = 0)

array([1, 0, 1, 3, 2, 2, 1, 3, 1], dtype=int64)

    To find the indices of the maximum elements in the horizontal direction, we use axis = 1

In [67]:
np.argmax(b, axis = 1)

array([2, 8, 5, 3], dtype=int64)

# cumprod() and cumsum()

    These functions are used to perform the operation of cumulative sum and cumulative product of an array.
    In case of 1D arrays this is very simple as we do not need to pass in the value of axis. However, when
    we use these functions on a 2D array we have the option of passing in the value of axis as well.

In [68]:
import numpy as np

In [72]:
a = np.random.randint(1, 10, (10, ))
a

array([8, 5, 8, 7, 8, 4, 8, 3, 3, 1])

In [73]:
np.cumsum(a)

array([ 8, 13, 21, 28, 36, 40, 48, 51, 54, 55])

In [74]:
np.cumproduct(a)

array([      8,      40,     320,    2240,   17920,   71680,  573440,
       1720320, 5160960, 5160960])

    We can also do the same for 2D arrays as well. We just need to pass in the value of the axis parameter as well.

In [76]:
b = np.random.randint(1, 100, (7, 8))
b

array([[64, 90, 18,  6,  9, 96, 36, 22],
       [18,  9, 76, 72, 98, 36, 87, 30],
       [99, 40, 65,  9, 40,  2, 41, 35],
       [73, 85,  6, 86, 99, 95, 79, 76],
       [68, 17,  9, 47, 23, 98, 19, 31],
       [54, 63, 91, 89, 92, 21,  2, 19],
       [26,  3, 95, 93, 60,  4, 78, 18]])

In [77]:
np.cumsum(b, axis = 0) # cummulative sum in downward direction

array([[ 64,  90,  18,   6,   9,  96,  36,  22],
       [ 82,  99,  94,  78, 107, 132, 123,  52],
       [181, 139, 159,  87, 147, 134, 164,  87],
       [254, 224, 165, 173, 246, 229, 243, 163],
       [322, 241, 174, 220, 269, 327, 262, 194],
       [376, 304, 265, 309, 361, 348, 264, 213],
       [402, 307, 360, 402, 421, 352, 342, 231]])

In [78]:
np.cumsum(b, axis = 1) # cummulative sum in horizontal direction

array([[ 64, 154, 172, 178, 187, 283, 319, 341],
       [ 18,  27, 103, 175, 273, 309, 396, 426],
       [ 99, 139, 204, 213, 253, 255, 296, 331],
       [ 73, 158, 164, 250, 349, 444, 523, 599],
       [ 68,  85,  94, 141, 164, 262, 281, 312],
       [ 54, 117, 208, 297, 389, 410, 412, 431],
       [ 26,  29, 124, 217, 277, 281, 359, 377]])

# np.histogram()

    This is to count the frequency of elements in bin. We make certain number of bins in an array and then
    we count the number of elements of an array which lie in that bin.

In [6]:
nums = [1, 2, 13, 11, 12, 13, 21, 22, 23]
np.histogram(nums, bins = [0, 10, 20, 30])

(array([2, 4, 3], dtype=int64), array([ 0, 10, 20, 30]))

### `Q - 1` Given an array a, find the number of elements which are between i and j ( i < j ).

In [7]:
def no_elements(a, i, j):
    return np.histogram(a, bins = [i, j])[0][0]

In [8]:
no_elements([1,2,2,3,5,56,32,21,45,67,33,44], 40, 50)

2

# np.corrcoef()

    This is used to find the coffecient of correlation between two or more set of values.

In [9]:
salary = np.array([10000, 40000, 25000, 35000, 60000])
experience = np.array([1, 3, 2, 4, 2])

In [16]:
coff = np.corrcoef(salary, experience)
coff

array([[1.        , 0.37912989],
       [0.37912989, 1.        ]])

In [17]:
coff = np.round(coff, 2) # round all the values to 2 decimal places
coff

array([[1.  , 0.38],
       [0.38, 1.  ]])

# np.flip()

    The np.flip() method is used to flip an array along a axis.
    
    In case of 1D arrays, it just reverses the array.

In [18]:
a = np.round(np.linspace(-10, 3, 10)).astype(int)
a

array([-10,  -9,  -7,  -6,  -4,  -3,  -1,   0,   2,   3])

In [19]:
np.flip(a) # It would just reverse the array since a is 1D

array([  3,   2,   0,  -1,  -3,  -4,  -6,  -7,  -9, -10])

In [20]:
b = np.random.randint(1, 100, (4,5))
b

array([[10, 89,  9, 99, 19],
       [17, 20, 33, 20,  8],
       [72, 41, 21, 86,  2],
       [43,  4, 44, 28, 99]])

        In case of 2D arrays, we can pass the value of the axis parameter as well. If axis = 0, then
        it would take the mirror image of the array with respect to the horizontal line.

In [21]:
np.flip(b, axis = 0)

array([[43,  4, 44, 28, 99],
       [72, 41, 21, 86,  2],
       [17, 20, 33, 20,  8],
       [10, 89,  9, 99, 19]])

    If we pass axis = 1, then it would take the mirror image with respect to the vertical line bisecting the array

In [22]:
np.flip(b, axis = 1)

array([[19, 99,  9, 89, 10],
       [ 8, 20, 33, 20, 17],
       [ 2, 86, 21, 41, 72],
       [99, 28, 44,  4, 43]])

    If we however, do not specify the value of the axis parameter, then it would flip subsequently by taking axis = 0
    and then axis = 1.

In [23]:
np.flip(b)

array([[99, 28, 44,  4, 43],
       [ 2, 86, 21, 41, 72],
       [ 8, 20, 33, 20, 17],
       [19, 99,  9, 89, 10]])

# np.put()

    This is used to modify an array. It takes the following parameters:
    1. array to be modified.
    2. list of indices where modification needs to be done.
    3. values which need to inserted in the place of those to be modified indices.

In [30]:
a = np.array([1,2,3,4,5,6,7])

In [31]:
np.put(a, [0,-1], [10, 70]) # replace the values at index 0 and -1 with values 10 and 70

In [32]:
a

array([10,  2,  3,  4,  5,  6, 70])

### `Question` Given an array `nums` with missing values. Replace all the missing values with the -786.

In [55]:
arr = np.array([1,2,3,4,5,5, np.nan, 34, 45, 3,12, 12, 2, np.nan])

In [56]:
def replace_nans(arr):    
    # find all the indices where we have missing values
    indices = np.where(np.isnan(arr))
    # print(indices)
    return np.put(arr, indices, -786)

In [58]:
replace_nans(arr)
arr

array([   1.,    2.,    3.,    4.,    5.,    5., -786.,   34.,   45.,
          3.,   12.,   12.,    2., -786.])

# np.clip()

    used to clip the values of an array in a range.

In [59]:
a = np.round(np.linspace(1, 100, 12))
a

array([  1.,  10.,  19.,  28.,  37.,  46.,  55.,  64.,  73.,  82.,  91.,
       100.])

In [60]:
np.clip(a, a_min = 30, a_max = 50) # any value lower than 30 would be replaced by 30
# any value greater than 50 would be replaced with 50

array([30., 30., 30., 30., 37., 46., 50., 50., 50., 50., 50., 50.])

In [61]:
a

array([  1.,  10.,  19.,  28.,  37.,  46.,  55.,  64.,  73.,  82.,  91.,
       100.])