In [1]:
import numpy as np

In [2]:
data = np.random.normal(50000, 10000, 100)
data

array([50822.23298708, 45563.50063163, 43283.45607685, 42377.24592646,
       52974.94802177, 50553.6968508 , 49351.64928835, 65461.62832277,
       42003.26091085, 66585.17761174, 61029.24659511, 41237.29173042,
       46535.60325725, 52886.01932314, 52204.90827513, 41119.58992564,
       43247.76301669, 54116.490297  , 41033.64425082, 51889.37238109,
       43142.20083007, 38039.49959444, 58409.3835612 , 34206.07743143,
       66232.03869163, 51778.15873708, 54505.72030554, 42024.67796949,
       71398.30669617, 57262.79951891, 25505.1347953 , 46897.03942677,
       50421.24484205, 57777.4586061 , 58235.45126354, 67904.37131903,
       47176.02119385, 62570.70460471, 53525.93214158, 41731.21504309,
       51178.64323073, 69494.09376899, 64597.48121383, 44259.82769825,
       40624.22164502, 63406.4094105 , 45698.44734719, 38134.82182725,
       46042.82140675, 42405.11856395, 36844.96559493, 50943.54732253,
       36888.27815682, 49527.31381228, 39622.8674175 , 47316.69898733,
      

In [3]:
# Numpy has many helpful functions that
# can help with things like data science
# and data exploration.

# The following are some of these functions.

In [4]:
# argmax take an NDArray and returns
# the INDEX of the maximum value in
# the NDArray

max_value_index = np.argmax(data)

display("Max Value Index",
        max_value_index,
        "Max Value",
        data[max_value_index])

'Max Value Index'

94

'Max Value'

71545.26265345576

In [5]:
# argmin is similar to argmax except
# it returns the INDEX of the minimum
# value in the NDArray

min_value_index = np.argmin(data)

display("Min Value Index",
        min_value_index,
        "Min Value",
        data[min_value_index])

'Min Value Index'

30

'Min Value'

25505.13479530024

In [6]:
# Continueing the trend of returning
# indices, the argsort function takes
# an NDArray and returns a new NDArray
# that contains the indices of sorted 
# values in the source NDArray.

np.argsort(data)

array([30, 57, 56, 77, 23, 86, 79, 50, 52, 62, 64, 21, 47, 54, 82, 72, 98,
       44, 18, 15, 11, 68, 83, 67, 39,  8, 27,  3, 49, 20, 16,  2, 97, 43,
        1, 46, 48, 78, 12, 31, 36, 60, 71, 55, 63, 99, 90, 80, 73,  6, 53,
       74, 32,  5, 75, 87, 59,  0, 51, 40, 91, 66, 25, 19, 65, 14, 69, 96,
       13,  4, 92, 88, 38, 17, 26, 84, 95, 29, 61, 33, 34, 76, 22, 93, 10,
       58, 70, 85, 37, 89, 45, 42,  7, 24, 81,  9, 35, 41, 28, 94])

In [7]:
# argsort accepts a "kind" argument
# that lets you supply the type of
# sorting algorithm to use.
# SEE: https://numpy.org/doc/stable/reference/generated/numpy.argsort.html

np.argsort(data, kind="mergesort")

array([30, 57, 56, 77, 23, 86, 79, 50, 52, 62, 64, 21, 47, 54, 82, 72, 98,
       44, 18, 15, 11, 68, 83, 67, 39,  8, 27,  3, 49, 20, 16,  2, 97, 43,
        1, 46, 48, 78, 12, 31, 36, 60, 71, 55, 63, 99, 90, 80, 73,  6, 53,
       74, 32,  5, 75, 87, 59,  0, 51, 40, 91, 66, 25, 19, 65, 14, 69, 96,
       13,  4, 92, 88, 38, 17, 26, 84, 95, 29, 61, 33, 34, 76, 22, 93, 10,
       58, 70, 85, 37, 89, 45, 42,  7, 24, 81,  9, 35, 41, 28, 94])

In [8]:
# The where function takes a boolean NDArray
# (in this case created by a boolean operation)
# and returns the indices of the True values.
# This can be thought of as returning the indices
# matching a condition, as demonstrated below.

greater_than_65k = np.where(data > 65000)

display( greater_than_65k )


# Indexing into the original array enables returning
# matching elements.

data[greater_than_65k]

(array([ 7,  9, 24, 28, 35, 41, 81, 94]),)

array([65461.62832277, 66585.17761174, 66232.03869163, 71398.30669617,
       67904.37131903, 69494.09376899, 66464.01113954, 71545.26265346])

In [9]:
# The where function can also be provided
# two extra arguments that enable the where
# function to operate as an applied if-else
# function.

display( np.where(data > 65000) )
display( np.where(data > 65000, data, 10000) )

(array([ 7,  9, 24, 28, 35, 41, 81, 94]),)

array([10000.        , 10000.        , 10000.        , 10000.        ,
       10000.        , 10000.        , 10000.        , 65461.62832277,
       10000.        , 66585.17761174, 10000.        , 10000.        ,
       10000.        , 10000.        , 10000.        , 10000.        ,
       10000.        , 10000.        , 10000.        , 10000.        ,
       10000.        , 10000.        , 10000.        , 10000.        ,
       66232.03869163, 10000.        , 10000.        , 10000.        ,
       71398.30669617, 10000.        , 10000.        , 10000.        ,
       10000.        , 10000.        , 10000.        , 67904.37131903,
       10000.        , 10000.        , 10000.        , 10000.        ,
       10000.        , 69494.09376899, 10000.        , 10000.        ,
       10000.        , 10000.        , 10000.        , 10000.        ,
       10000.        , 10000.        , 10000.        , 10000.        ,
       10000.        , 10000.        , 10000.        , 10000.        ,
      

In [10]:
# Contrary to where which returns an NDArray
# of indices, the extract function returns
# the actual elements.

np.extract(data > 65000, data)

array([65461.62832277, 66585.17761174, 66232.03869163, 71398.30669617,
       67904.37131903, 69494.09376899, 66464.01113954, 71545.26265346])