## This is a Jupyter Notebook that shows the basics about how to use NumPy lib along with Anaconda managed environments

In [634]:
import numpy as np

## Datatypes, Attributes and Functions at Arrays

- numpy main datatype is n-dimensional array type, which can be created with different dimensions, shapes and sizes

In [635]:
# 1-dimensional array - X dimnesion only - axis=0 is the single row of elements with any number of columns that compose the single-row 
# array, also known as row-vector
# the value for the shape of 1-dimensional arrays is the number of elements at that axis, i.e., the number of columns
# array() function creates an array of any number of dimensions, with any datatype or value for each element
array_1 = np.array([1,2,3])

In [636]:
array_1

array([1, 2, 3])

In [637]:
# the type is a numpy.ndarray - an n-dimensional array
type(array_1)

numpy.ndarray

In [638]:
# 2-dimensional array - or matrix - Y and X dimensions - axis=0 and axis=1 are the rows and columns of elements that compose the single 
# matrix, with multiple rows and columns, also known as array of arrays
# differently from 1-dimensional arrays, at 2-dimensional arrays, the axis=0 becomes the Y axis, and the axis=1 is the X axis. Therefore, 
# the values for the shape are, for axis=0, the number of elements at that Y axis, i.e., the number of rows (and not columns), while, for 
# axis=1, the value of the shape is the elements at that X axis (at each row), i.e., the number of columns (and not rows)
# this is exactly the same axis configuration as with Pandas Dataframes (axis=0 is the Y axis, axis=1 is the X axis)
array_2 = np.array([[1,2,3],[4,5,6]])

# 3-dimensional array - multiple planes or levels of matrices - Z, Y and X dimnesions - axis=0, axis=1 and axis=2 are the planes, rows and 
# columns of elements that compose the multiple planes, each plane with one matrix, each matrix with multiple rows and columns, also known 
# as an array of matrices. Here, axis configuration is twisted once more, compared to 2-dimensional arrays, so that the axis=0 becomes the 
# Z axis, the axis=1 becomes the Y axis and the axis=2 is the X axis. Therefore, the values for the shape are, for axis=0, the number of 
# planes (or levels) of matrices at Z axis (1 matrix per plane), for axis=1, the number of rows at Y axis of each matrix at each plane, for 
# axis=2, the number of columns at each row of each matrix at each plane
array_3 = np.array([[[1,2,3],
                     [4,5,6],
                     [7,8,9]],
                    [[10,11,12],
                     [13,14,15],
                     [16,17,18]],
                    [[19,20,21],
                     [22,23,24],
                     [25,26,27]]])

# a 4-dimensional array would be an array of 3D-arrays or an array of planes of matrices... starting from 4 dimensions and on, there's no 
# way for it to be represented graphically for humans (only dimensions X, Y and Z are visualizable graphically)... but arrays or vectors of 
# thousands of dimensions can be represented at computer's code or in math... just by using more []s. Each new surrounding [] is a new 
# dimension, encompassing whatever is inside it. Now, there will be sets of planes, each set with multiple planes and therefore with multiple
# matrices (still, 1 matrix per plane)

# full() function creates a higher dimnensional array automatically, with the shape and a fixed value (for all elements) as 
# parameters. other similar generating functions would be zeros(), ones(), ones_like() and empty()
array_4 = np.full((2,2,3,4),1)


In [639]:
array_2

array([[1, 2, 3],
       [4, 5, 6]])

In [640]:
array_3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]],

       [[19, 20, 21],
        [22, 23, 24],
        [25, 26, 27]]])

In [641]:
array_4

array([[[[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]],

        [[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]]],


       [[[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]],

        [[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]]]])

In [642]:
# ndim attribute stores the number of dimensions of an ndarray
array_1.ndim, array_2.ndim, array_3.ndim, array_4.ndim

(1, 2, 3, 4)

In [643]:
# shape attribute shows the X, Y, Z or more dimensional axes (and their values) of an ndarray.

# 1-dimensional arrays have only 1 single axis, which is axis=0. The shape value, at this axis, is the number of elements along the 
# horizontal array line, the X axis, from [0] to the [length-1] position at the row-vector (there are no vertical array rows) 
# When printed, the shape attribute of a 1D-array shows a tuple with only one value (axis=0 value), which is the number of elements 
# disposed horizontally at that array (throughout its axis 0)
print(f"1D-array: {array_1.shape}")

# 2-dimensional arrays have 2 axes, which are axis=0 and axis=1. The values at these axes are:
# - for axis=0: the number of elements along the vertical line (number of rows), the Y axis - don't confuse that with axis=0 of 1D-arrays,
# or axis=0 of 3D arrays, which are different axes!
# - for axis=1: the number of elements along the horizontal line (number of columns), the X axis
# When printed, the shape attribute of a 2D-array shows a tuple with those two values, which are the number of elements disposed vertically 
# at that matrix (throughout its axis 0) and the number of elements disposed horizontally at the matrix (throughout its axis 1)
# this is the same axis disposition or configuration we have at Pandas Dataframes
print(f"2D-array: {array_2.shape}")

# 3-dimensional arrays have 3 axes, which are axis=0, axis=1 and axis=2. The values at these axes are:
# - for axis=0: the number of elements (planes or matrices - 1 matrix per plane) along the depth (Z) axis. You may have many 
# planes of matrices, one behind another, disposed along the depth 3D axis, forming the axis 0 of this 3D array. Don't confuse that 
# with axis=0 of 1D or 2D-arrays, which are different !
# - for axis=1: the number of elements along the vertical line (number of rows) - the Y axis. Don't confuse that with axis=1 of 2D-arrays, 
# which is different !
# - for axis=2: the number of elements along the horizontal line (number of columns) - the X axis.
# When printed, the shape attribute of a 3D-array shows a tuple with those three values, which are the number of planes (matrices) 
# disposed along the depth (Z) axis (axis 0), the elements disposed vertically at each matrix (throughout its axis 1) and the number 
# of elements disposed horizontally at each matrix (throughout its axis 2).
print(f"3D-array: {array_3.shape}")

# For 4D and on, we cannot represent sets of matrices graphically, but it's easy to represent in math or at computers, just adding one more 
# external leval of [], wrappping all other []s inside it. That way, for relating between a 4D-array and its shape tuple values, just relate
# the innermost [] of the array to the rightmost value of the tuple; then the 2nd innermost [] of the array to the 2nd rightmost value
# at the tuple... until you get to the outermost [] of the array which must correspond to the leftmost value of the tuple. Just look
# to the the array inner-out and the shape tuple from right to left.
print(f"4D-array: {array_4.shape}")

1D-array: (3,)
2D-array: (2, 3)
3D-array: (3, 3, 3)
4D-array: (2, 2, 3, 4)


In [644]:
# dtype attribute stores the type of elements that compose the ndarray
array_1.dtype, array_2.dtype, array_3.dtype

(dtype('int64'), dtype('int64'), dtype('int64'))

In [645]:
# size attribute shows the number of total elements of the ndarray
array_1.size, array_2.size, array_3.size, array_4.size

(3, 6, 27, 48)

In [646]:
# type function shows the type of each ndarray
type(array_1), type(array_2), type(array_3), type(array_4)

(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [647]:
# for using a NumPy array to create a Pandas DataFrame, just create a DataFrame as usual passing the array, as argument:
import pandas as pd
print("2D NumPy array:")
print(array_2)
print("\nconverting to")
print("↓↓↓↓↓↓↓↓↓↓↓↓↓\n")
print("Pandas DataFrame:")
pd.DataFrame(array_2)

2D NumPy array:
[[1 2 3]
 [4 5 6]]

converting to
↓↓↓↓↓↓↓↓↓↓↓↓↓

Pandas DataFrame:


Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


## Creating Arrays

In [648]:
# array() receives a list of elements and creates the array - simplest way (not the easiest for multi-D arrays
sample_array = np.array([1,2,3])
sample_array

array([1, 2, 3])

In [649]:
sample_array.dtype

dtype('int64')

In [650]:
type(sample_array)

numpy.ndarray

In [651]:
# ones() creates an array of fixed values equal to 1
sample_array_2 = np.ones(shape=(2,3),dtype=float)
sample_array_2

array([[1., 1., 1.],
       [1., 1., 1.]])

In [652]:
# zeros() creates an array of fixed values equal to 0
sample_array_3 = np.zeros(shape=(3,2),dtype=int)
sample_array_3

array([[0, 0],
       [0, 0],
       [0, 0]])

In [653]:
# empty() creates an array of uninitilized arbitrary values
sample_array_4 = np.empty(shape=(3,2),dtype=int)
sample_array_4

array([[4607182418800017408, 4607182418800017408],
       [4607182418800017408, 4607182418800017408],
       [4607182418800017408, 4607182418800017408]])

In [654]:
# full() creates an array of fixed values equal to fill_value arg
sample_array_5 = np.full(shape=(3,3), fill_value=5, dtype=int)
sample_array_5

array([[5, 5, 5],
       [5, 5, 5],
       [5, 5, 5]])

In [655]:
# arange() creates an array with values between start and stop with a step interval between them
sample_array_6 = np.arange(0,100,10)
sample_array_6

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [656]:
# linspace() creates an array with a defined number "num" of values betweem start and stop
# "num" param is number of elements (not a specific value or step) you wanna have that interval automatically and evenly devided
# the final numbers of the array will depend both on the "num" of elements you want as on the interval you set, so that the intervals
# between each element is the same
# the result is a tuple with the first element being the generated array and the second element being the number of elements of the 
# generated array
sample_array_7 = np.linspace(0,100,num=10,endpoint=False,retstep=True)
sample_array_7

(array([ 0., 10., 20., 30., 40., 50., 60., 70., 80., 90.]), 10.0)

In [657]:
# ramdom.randint() creates an array of ints with pseudo-random values between start and stop
# the "size" param is not actually the size of the ndarray (total elements) but its shape
sample_array_8 = np.random.randint(1,50, size=(2,3,3))
sample_array_8

array([[[15,  5,  8],
        [34, 34, 14],
        [13, 31, 22]],

       [[32,  1, 48],
        [37, 15,  7],
        [42, 15, 28]]])

In [658]:
f"min = {sample_array_8.min()}, max = {sample_array_8.max()}, size = {sample_array_8.size}, shape = {sample_array_8.shape}"

'min = 1, max = 48, size = 18, shape = (2, 3, 3)'

In [659]:
# random.random() creates an array of values betweem 0 and 1 - optionally, as other functions above, receiving a tuple setting the shape
sample_array_9 = np.random.random((2,3))
sample_array_9

array([[0.31433034, 0.14451904, 0.62193849],
       [0.12425974, 0.91997849, 0.12971221]])

In [660]:
# random.rand() creates an array of values betweem 0 and 1 - receiving values as dimensions for the shape
sample_array_10 = np.random.rand(2,3)
sample_array_10

array([[0.12116711, 0.67206066, 0.2251634 ],
       [0.36995248, 0.87815032, 0.99654607]])

In [661]:
# Regarding np.random.seed() it turns the pseudo-random numbers fixed for the current cell where seed is manually defined. 
# This way it keeps generating those same pseudo-numbers for anyone who executes the same project, while, when seed is not defined, 
# the system changes the seed automatically before each execution, in order to generate different numbers everytime the project runs.
# the seed is defined once per cell and reused at all generating functions called at that cell

In [662]:
np.random.seed(seed=0)
sample_array_11 = np.random.randint(100, size=(3,4))
sample_array_11

array([[44, 47, 64, 67],
       [67,  9, 83, 21],
       [36, 87, 70, 88]])

In [663]:
np.random.seed(seed=8)
sample_array_12 = np.random.rand(3,4)
sample_array_12

array([[0.8734294 , 0.96854066, 0.86919454, 0.53085569],
       [0.23272833, 0.0113988 , 0.43046882, 0.40235136],
       [0.52267467, 0.4783918 , 0.55535647, 0.54338602]])

In [664]:
# random.unique() returns an ordered list with the unique elements of an array passed as argument
np.unique(sample_array_11)

array([ 9, 21, 36, 44, 47, 64, 67, 70, 83, 87, 88])

## Selecting and viewing values from Arrays and Matrices

In [665]:
# For the following 4D array (matrix) we have 4 dimensions or 4 axis:
# axis=3 or d3 (innermost []) having 5 elements (each element being a single int at a column - X axis)
# axis=2 or d2 (rows of axis=3 int elements) having 4 elements (each element being each row - Y axis)
# axis=1 or d1 (blocks of rows of axis=2) having 3 elements (each element being each block or matrix - Z axis)
# axis=0 or d0 (outermost []) (blocks of blocks of axis=1) having 2 elements (each element being each block of blocks or 
# matrix of matrices)
# so that, d3 has 5 int elements at the five columns, d2 has 4 row elements, d1 has 3 matrices of rows and d0 has 2 matrices of matrices
# the innermost [] contains the highest dimension elements (4th dimension or d3 or axis=3), while the outermost [] contains the 
# lowest dimension elements (1st dimension or d0 or axis=0).
np.random.seed(0)
sample_array_13 = np.random.randint(100, size=(2,3,4,5))
sample_array_13

array([[[[44, 47, 64, 67, 67],
         [ 9, 83, 21, 36, 87],
         [70, 88, 88, 12, 58],
         [65, 39, 87, 46, 88]],

        [[81, 37, 25, 77, 72],
         [ 9, 20, 80, 69, 79],
         [47, 64, 82, 99, 88],
         [49, 29, 19, 19, 14]],

        [[39, 32, 65,  9, 57],
         [32, 31, 74, 23, 35],
         [75, 55, 28, 34,  0],
         [ 0, 36, 53,  5, 38]]],


       [[[17, 79,  4, 42, 58],
         [31,  1, 65, 41, 57],
         [35, 11, 46, 82, 91],
         [ 0, 14, 99, 53, 12]],

        [[42, 84, 75, 68,  6],
         [68, 47,  3, 76, 52],
         [78, 15, 20, 99, 58],
         [23, 79, 13, 85, 48]],

        [[49, 69, 41, 35, 64],
         [95, 69, 94,  0, 50],
         [36, 34, 48, 93,  3],
         [98, 42, 77, 21, 73]]]])

In [666]:
sample_array_13.shape, sample_array_13.ndim

((2, 3, 4, 5), 4)

In [667]:
# For viewing arrays and matrices by using indexes [], you can either ask for a specific single index or for a slice (interval of indexes) 
# of the values at each respective dimension.
# For a 4D array (matrix), when using the selection by using the index call, there'll be 4 dimensions or 4 index space values to be set at 
# the selection function call, respectively for the 1st (d0) (outermost [] at the array), 2nd (d1), 3rd (d2) and 4th (d3) 
# (innermost [] at the array) dimensions.

![4d-selection.png](4d-selection.png)

In [668]:
# e.g. as in the picture above, if you ask for all values at dimensions d0, d1 and d2, but only for the [0]-indexed values at dimension d3 
# (X axis), that is, only for the elements at first column of each row, then the 4D matrix will be reduced to a 3D matrix, and, from each 
# set of 5 elements at the original d3, only the [0]-indexed values (first-column element of each row) will be returned... This will change 
# the 4D (size=(2,3,4,5))) to the 3D (size=(2,3,4)) matrix below. A similar analysis can be done for different dimension arrays and/or for 
# different index values or intervals of values. Just use the slicing at each dimension of any ndarray, as you wish... of course, the slicing 
# must match the number of dimensions at the ndarray, as well as the number of elements at each dimension.
sample_array_13[:,:,:,0]

array([[[44,  9, 70, 65],
        [81,  9, 47, 49],
        [39, 32, 75,  0]],

       [[17, 31, 35,  0],
        [42, 68, 78, 23],
        [49, 95, 36, 98]]])

In [669]:
# For the same original 4D array (matrix) above, instead of a specific single index at d3, we can use slicing to ask for an interval of 
# indexes. We may, for instance, ask for [0] and [1] elements of d3 as below (elements from the first two columns of each row). Observe 
# that, now, we have a 4D array (as the original one), but from the 5 elements at d3 (at each row) only [0] and [1]-indexed values are 
# returned (1st and 2nd columns of each row). Remember that, for slicing, the "stop" value is exclusive, so [0:2] would be the slice for 
# getting [0] and [1] (but not [2]) values of d3. In this case we're also using slicing for d0, d1 and d2, but getting all elements from 
# them with ":" (i.e. from the first until the last of the elements of these dimensions), constraining the selection only at d3... for 
# simplicity. Of course you could constrain the selection of elements of one, more or all dimensions here.
sample_array_13[:,:,:,0:2]

array([[[[44, 47],
         [ 9, 83],
         [70, 88],
         [65, 39]],

        [[81, 37],
         [ 9, 20],
         [47, 64],
         [49, 29]],

        [[39, 32],
         [32, 31],
         [75, 55],
         [ 0, 36]]],


       [[[17, 79],
         [31,  1],
         [35, 11],
         [ 0, 14]],

        [[42, 84],
         [68, 47],
         [78, 15],
         [23, 79]],

        [[49, 69],
         [95, 69],
         [36, 34],
         [98, 42]]]])

In [670]:
# Another example:

In [671]:
# For the following 3D array (matrix) we have 3 dimensions or 3 axis:
# axis=2 or d2 (innermost []) having 4 elements at each column (each element being a single int)
# axis=1 or d1 (rows of axis=2 int elements) having 3 elements (each element being each row)
# axis=0 or d0 (outermost []) (blocks of rows of axis=1) having 2 elements (each element being each block or matrix)
# so that, d2 has 4 int elements, d1 has 3 row elements and d0 has 2 matrices of rows
# the innermost [] contains the highest dimension elements (3rd dimension or d2 or axis=2), while the outermost [] contains the 
# lowest dimension elements (1st dimension or d0 or axis=0).
np.random.seed(3)
sample_array_14 = np.random.randint(100,size=(2,3,4))
sample_array_14

array([[[24,  3, 56, 72],
        [ 0, 21, 19, 74],
        [41, 10, 21, 38]],

       [[96, 20, 44, 93],
        [39, 14, 26, 81],
        [90, 22, 66,  2]]])

In [672]:
# viewing only [0]-indexed elements from d2 and all elements at other dimensions: it'll return a matrix reduced from 3D (size=(2,3,4))to 
# 2D (size=(2,3)), where the rows will contain the [0]-indexed values from d2 of the original array (matrix).
sample_array_14[:,:,0]

array([[24,  0, 41],
       [96, 39, 90]])

In [673]:
# viewing [0] and [1]-indexed elements from d2 and all elements from other dimensions. It keeps the matrix as a 3D array but only shows
# the first two elements - or two columns - from each row at d2 (from the original array (matrix)):
sample_array_14[:,:,0:2]

array([[[24,  3],
        [ 0, 21],
        [41, 10]],

       [[96, 20],
        [39, 14],
        [90, 22]]])

In [674]:
# slicing other dimensions other than d2, we can, for instance, select only [0] and [1] at d2, only [0] and [1] at d1 and only [0] at d0:
sample_array_14[0,0:2,0:2]

array([[24,  3],
       [ 0, 21]])

In [675]:
# for 2D (d0,d1) and 1D (d0) arrays, we have very similar approaches.

## Manipulating and Comparing Arrays

### Arithmetic

In [676]:
array_1

array([1, 2, 3])

In [677]:
ones_array = np.ones(3, dtype=int)
ones_array

array([1, 1, 1])

In [678]:
# sum of arrays - sums up each respective position value
array_1 + ones_array

array([2, 3, 4])

In [679]:
# subtraction of arrays - subtracts each value at the 2nd array from the respective position value at the first
array_1 - ones_array

array([0, 1, 2])

In [680]:
# multiplication of arrays - multiplies corresponding position values at two arrays
array_1 * ones_array

array([1, 2, 3])

In [681]:
array_2

array([[1, 2, 3],
       [4, 5, 6]])

In [682]:
array_1 * array_2

array([[ 1,  4,  9],
       [ 4, 10, 18]])

In [683]:
array_3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]],

       [[19, 20, 21],
        [22, 23, 24],
        [25, 26, 27]]])

In [684]:
# numpy does array multiplication via broadcasting. For broadcasting to be possible, some prerequisites must be met, in terms of the shape 
# of the arrayrs being multiplied. The arrays below, for instance, are not broadcastable:
# array_2 * array_3
# So, for broadcasting to be possible, these arrays would have to be reshaped to turn broadcasting possible during multiplication.
# Not all pairs of arrays can be reshaped to compatible shapes.
# There's a section below (Reshaping and Transposing - Broadcasting) showing how to do just that.

In [685]:
array_1 / ones_array

array([1., 2., 3.])

In [686]:
array_2 / array_1

array([[1. , 1. , 1. ],
       [4. , 2.5, 2. ]])

In [687]:
array_2 // array_1

array([[1, 1, 1],
       [4, 2, 2]])

In [688]:
array_2 ** 2

array([[ 1,  4,  9],
       [16, 25, 36]])

In [689]:
array_1 ** array_2

array([[  1,   4,  27],
       [  1,  32, 729]])

In [690]:
np.square(array_2)

array([[ 1,  4,  9],
       [16, 25, 36]])

In [691]:
np.add(array_1, ones_array)

array([2, 3, 4])

In [692]:
array_2 % array_1

array([[0, 0, 0],
       [0, 1, 0]])

In [693]:
np.mod(array_2, array_1)

array([[0, 0, 0],
       [0, 1, 0]])

In [694]:
np.exp(array_1)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [695]:
np.log(array_1)

array([0.        , 0.69314718, 1.09861229])

### Aggregation

In [696]:
array_massive = np.random.random(100000)
array_massive.size, array_massive.shape

(100000, (100000,))

In [697]:
array_massive[:10]

array([0.91330145, 0.76582638, 0.97364827, 0.40236064, 0.55277195,
       0.30636063, 0.58661997, 0.27492011, 0.45460777, 0.75766525])

In [698]:
# when using numpy datatyptes, use also numpy aggregation functions (instead of python's) for maximum efficiency, specially when dealing with big sized structures
%timeit sum(array_massive) # Python's sum()
%timeit np.sum(array_massive) # NumPy's np.sum()
# at the tested machine, the second method above (numpy's) was 222x faster than the first one (python's), for instance.

5.98 ms ± 66.2 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
26.6 μs ± 1.38 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [699]:
array_2

array([[1, 2, 3],
       [4, 5, 6]])

In [700]:
np.mean(array_2)

3.5

In [701]:
np.max(array_2)

6

In [702]:
np.min(array_2)

1

In [703]:
np.std(array_2) # standard deviation taking all elements

1.707825127659933

In [704]:
np.std(array_2, axis=0) # standard deviation taking each column's elements

array([1.5, 1.5, 1.5])

In [705]:
np.std(array_2, axis=1) # standard deviation taking each row's elements

array([0.81649658, 0.81649658])

In [706]:
np.var(array_2) # standard deviation is the square root of the variance

2.9166666666666665

In [707]:
np.sqrt(np.var(array_2)) == np.std(array_2)

True

### Reshaping and Transposing

In [708]:
# for arrays to be multiplied (broadcastable), they must have exactly the same shape (same dimensions and same number of elements on each of 
# these dimensions) or their shape can be different as long as they have the same dimensions and, at the dimensions that have different number 
# of elements, and in at least one of the multiplicand arrays, this number of elements is 1.

In [709]:
array_2

array([[1, 2, 3],
       [4, 5, 6]])

In [710]:
array_2.shape

(2, 3)

In [711]:
np.size(array_2)

6

In [712]:
sample_array_11

array([[44, 47, 64, 67],
       [67,  9, 83, 21],
       [36, 87, 70, 88]])

In [713]:
sample_array_11.shape

(3, 4)

In [714]:
np.size(sample_array_11)

12

In [715]:
# the two arrays above have different sizes (number of elements) and their shapes are not compatible and thus they can't be multiplied 
# (broadcasted) with such shapes. But they can be reshaped to turn that possible.
# Use numpy reshape() to change the shape of an ndarray, when that is possible (it may not be), in order to be able to multiply that 
# array with other compatible-shape arrays via broadcasting.

In [716]:
array_2_reshaped = array_2.reshape((3,2,1))
array_2_reshaped

array([[[1],
        [2]],

       [[3],
        [4]],

       [[5],
        [6]]])

In [717]:
sample_array_11_reshaped = sample_array_11.reshape((3,2,2))
sample_array_11_reshaped

array([[[44, 47],
        [64, 67]],

       [[67,  9],
        [83, 21]],

       [[36, 87],
        [70, 88]]])

In [718]:
# now, both reshaped arrays can be multiplied - (3,2,1)*(3,2,2), because: d0=3 and d1=2 on both arrays and d2=1 on the first and 
# whichever other number of elements on the second.

In [719]:
np.multiply(array_2_reshaped, sample_array_11_reshaped)

array([[[ 44,  47],
        [128, 134]],

       [[201,  27],
        [332,  84]],

       [[180, 435],
        [420, 528]]])

In [720]:
# use "T" on an ndarray to transpose (invert) its shape:

In [721]:
array_2

array([[1, 2, 3],
       [4, 5, 6]])

In [722]:
array_2.shape

(2, 3)

In [723]:
array_2.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [724]:
array_2.T.shape

(3, 2)

In [725]:
# The transposed array (matrix) is not necessarily the same as the reshaped one, even for the same shape (the values' positions 
# may be different). You can combine both techniques together in order to turn two ndarrays brodcastable and multiply them.
array_2.reshape((3,2))

array([[1, 2],
       [3, 4],
       [5, 6]])

### Element-wise versus Dot product array multiplication

In [726]:
# Differently from the array multiplication shown at the initial "Manipulating Arrays - Arithmetic" section, called the element-wise method,
# there's the Dot Product multiplication method for array matrices, which is shown below. For Dot Multiplication, the matrices must
# also be compatible in shape, which, in this case, means: if Aij x Bmn => 'j' must be equal to 'm'. The resultant matrix will have the 
# shape (i,n). You can combine reshape or transpose together in order to turn two ndarrays' (matrices') shapes compatible for Dot Product 
# multiplication, as below:

In [727]:
matrix_1 = np.array([[5,0,3],
                    [3,7,9],
                    [3,5,2]])
matrix_1

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2]])

In [728]:
matrix_1.shape

(3, 3)

In [729]:
matrix_2 = np.array([[4, 6, 8],
                    [7, 8, 1]])
matrix_2

array([[4, 6, 8],
       [7, 8, 1]])

In [730]:
matrix_2.shape

(2, 3)

In [731]:
# matrix_1 x matrix_2 via Dot Product is not possible. But you can transpose matrix_2 so that shapes become compatible:

In [732]:
matrix_2_transposed = matrix_2.T
matrix_2_transposed

array([[4, 7],
       [6, 8],
       [8, 1]])

In [733]:
matrix_2_transposed.shape

(3, 2)

In [734]:
resultant_matrix = np.dot(matrix_1, matrix_2_transposed)
resultant_matrix

array([[ 44,  38],
       [126,  86],
       [ 58,  63]])

In [735]:
resultant_matrix.shape

(3, 2)

In [736]:
# observe that the matrices were compatible in shape after the transposition of matrix_2: matrix_1 (3,3) x matrix_2_T (3,2), and that the resultant
# matrix's shape is (3,2). Being compatible for dot product multiplication doesn't mean being compatible for element-wise broadcasting and vice-versa.

### Comparison

In [737]:
# comparison between arrays is similar to comparison between numerical values in python; in this case, a resulting ndarray is generated 
# with the results for the comparisons, position versus position, between the two arrays:

In [738]:
array_2

array([[1, 2, 3],
       [4, 5, 6]])

In [739]:
sample_array_9

array([[0.31433034, 0.14451904, 0.62193849],
       [0.12425974, 0.91997849, 0.12971221]])

In [740]:
array_2 > sample_array_9

array([[ True,  True,  True],
       [ True,  True,  True]])

In [741]:
array_2 == sample_array_9

array([[False, False, False],
       [False, False, False]])

In [742]:
array_2 <= sample_array_9

array([[False, False, False],
       [False, False, False]])

In [743]:
array_1

array([1, 2, 3])

In [744]:
array_1 == array_2

array([[ True,  True,  True],
       [False, False, False]])

### Sorting Arrays

In [745]:
# you can suffle all elements of an ndarray (not only the first axis, but all) by creating a random_generator instance and 
# calling permuted() method

In [746]:
array_3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]],

       [[19, 20, 21],
        [22, 23, 24],
        [25, 26, 27]]])

In [747]:
random_generator = np.random.default_rng()

In [748]:
array_3_shuffled = random_generator.permuted(array_3)
array_3_shuffled

array([[[ 7, 12,  1],
        [ 6, 13, 14],
        [21, 15, 24]],

       [[27,  3,  4],
        [ 8, 25, 20],
        [19, 23, 10]],

       [[ 9, 26, 11],
        [ 5,  2, 18],
        [16, 22, 17]]])

In [749]:
# you can sort an array by using the sort() numpy function, specifiying the axis or leaving the default, which is the last axis - 
# d2 or axis=2 - in this case

In [750]:
np.sort(array_3_shuffled)
# same as:
# np.sort(array_3_shuffled, axis=2)

array([[[ 1,  7, 12],
        [ 6, 13, 14],
        [15, 21, 24]],

       [[ 3,  4, 27],
        [ 8, 20, 25],
        [10, 19, 23]],

       [[ 9, 11, 26],
        [ 2,  5, 18],
        [16, 17, 22]]])

In [751]:
# observe, in the example above, that only d2 elements were sorted amongst themselves, d1 (rows) and d0 (blocks of rows) were not sorted.
# you can sort the other dimensions as well setting them explicitly at the sort() call:

In [752]:
np.sort(array_3_shuffled, axis=1)

array([[[ 6, 12,  1],
        [ 7, 13, 14],
        [21, 15, 24]],

       [[ 8,  3,  4],
        [19, 23, 10],
        [27, 25, 20]],

       [[ 5,  2, 11],
        [ 9, 22, 17],
        [16, 26, 18]]])

In [753]:
# observe above that not only d0 but d1 elements were scrambled up until d1 level, i.e, numbers were shuffled inside their own row as well as 
# between different rows of the same block of rows, but not between different blocks. d0 elements did not change.

In [754]:
np.sort(array_3_shuffled, axis=0)

array([[[ 7,  3,  1],
        [ 5,  2, 14],
        [16, 15, 10]],

       [[ 9, 12,  4],
        [ 6, 13, 18],
        [19, 22, 17]],

       [[27, 26, 11],
        [ 8, 25, 20],
        [21, 23, 24]]])

In [755]:
# now, at the example above, all elements from all axis were shuffled, i.e., numbers were swapped inside their own rows (d2), between 
# different rows (d1) and also between different blocks of rows (d0).

In [756]:
# you also can sort the values by getting the corresponding indexes of the values when sorted, i.e., not getting at the output an array
# of values but an array of indexes of the values after these being sorted.

In [757]:
np.argsort(array_3_shuffled)

array([[[2, 0, 1],
        [0, 1, 2],
        [1, 0, 2]],

       [[1, 2, 0],
        [0, 2, 1],
        [2, 0, 1]],

       [[0, 2, 1],
        [1, 0, 2],
        [0, 2, 1]]])

In [783]:
# for instance, the example above shows a same-shape matrix with the indexes of the values of the previous shuffled array, after
# the elements of it were sorted along the axis d2 (as no specific axis was passed). The same as before but returning not the values
# but their corresponding indexes after sorting.

In [759]:
# you can get the index of the min and max number from each dimension using argmin() and argmax():

In [760]:
array_3_shuffled

array([[[ 7, 12,  1],
        [ 6, 13, 14],
        [21, 15, 24]],

       [[27,  3,  4],
        [ 8, 25, 20],
        [19, 23, 10]],

       [[ 9, 26, 11],
        [ 5,  2, 18],
        [16, 22, 17]]])

In [761]:
np.argmin(array_3_shuffled, axis=2)

array([[2, 0, 1],
       [1, 0, 2],
       [0, 1, 0]])

In [762]:
np.argmax(array_3_shuffled, axis=2)

array([[1, 2, 2],
       [0, 1, 1],
       [1, 2, 1]])

In [763]:
# at the two examples above, the index for the min and max elements inside each row (axis=2) were outputted on a 2D matrix.
# at the shuffled array, the min numbers for each row at the first block of rows have as indexes: [1,0,1] ; at the second block: [1,0,1] 
# and at the third block: [0,0,1].
# i.e., for the first block and its rows, the min numbers (for those returned indexes) are: d2[1]=2 d2[0]=9 d2[1]=4, and so on.
# For the max number indexes and values it is the same.

In [764]:
# if we set the axis as 1, for the same example above, and as it is a 3D-array, the comparison will not be between each element of d2 
# (inside each row), but between each corresponding element of the rows inside one same block - column by column - d1. I.E, for the first 
# block and for the first position of each row (or columns), then for the second position of each row and finally for the last position 
# of each row.

In [765]:
np.argmax(array_3_shuffled, axis=1)

array([[2, 2, 2],
       [0, 1, 1],
       [2, 0, 1]])

In [766]:
# at the example above, when comparing the columns of the rows of the first block, the max numbers (for those returned indexes) are: 
# d1[0]=13 d1[1]=24 d1[2]=27, and so on.

In [767]:
# for a 1D-array, it would be the easiest case scenario:

In [768]:
array_1

array([1, 2, 3])

In [769]:
np.argmin(array_1), np.argmax(array_1)

(0, 2)

In [770]:
# i.e., indexes 0 and 2 of d0, at this 1D array, point out to the min and max values, which are, in this case, d0[0]=1 and d0[2]=3.

## Images into ndarrays

<img src="./panda.png" />

In [771]:
from matplotlib.image import imread
panda_img = imread("./panda.png")

In [772]:
type(panda_img), panda_img.size, panda_img.shape

(numpy.ndarray, 24465000, (2330, 3500, 3))

In [773]:
panda_img[:1]

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]]], dtype=float32)

In [774]:
# above, we've imported a png img into an ndarray using matplotlib and numpy. And we can see it generated a 3D-array, of size (number 
# of elements) equal to 24465000, and this array with a shape of (2330, 3500, 3), i.e., with 3 positions at d2, 3500 positions at d1 and 
# 2330 positions at d0... each position with an element, totalizing 2330*3500*3 = 24465000 elements for the whole ndarray or for the 
# whole image numerical representation.

In [775]:
# Another examples:

<img src="./car-photo.png">

In [776]:
car_img = imread("./car-photo.png")

In [777]:
type(car_img), car_img.size, car_img.shape

(numpy.ndarray, 991300, (431, 575, 4))

In [778]:
car_img[:1]

array([[[0.5019608 , 0.50980395, 0.4862745 , 1.        ],
        [0.3372549 , 0.34509805, 0.30588236, 1.        ],
        [0.20392157, 0.21568628, 0.14901961, 1.        ],
        ...,
        [0.64705884, 0.7058824 , 0.54901963, 1.        ],
        [0.59607846, 0.63529414, 0.45882353, 1.        ],
        [0.44705883, 0.47058824, 0.3372549 , 1.        ]]], dtype=float32)

<img src="./dog-photo.png">

In [779]:
dog_img = imread("./dog-photo.png")

In [780]:
type(dog_img), dog_img.size, dog_img.shape

(numpy.ndarray, 993600, (432, 575, 4))

In [781]:
dog_img[:1]

array([[[0.70980394, 0.80784315, 0.88235295, 1.        ],
        [0.72156864, 0.8117647 , 0.8862745 , 1.        ],
        [0.7411765 , 0.8156863 , 0.8862745 , 1.        ],
        ...,
        [0.49803922, 0.6862745 , 0.8392157 , 1.        ],
        [0.49411765, 0.68235296, 0.8392157 , 1.        ],
        [0.49411765, 0.68235296, 0.8352941 , 1.        ]]], dtype=float32)

In [782]:
# algorithms in machine learning would then try to find patterns and eventually manipulate those pixels numerical representations 
# at those numpy ndarrays.