# Fundamental of Numpy

Numpy is a library built on top of C++, commonly used in data science and machine learning. This is typically the backbone for many other machine learning algs. For instance, pandas DataFrame's are built on numpy arrays.

In [None]:
from matplotlib.image import imread
import numpy as np
import pandas as pd

## DataTypes & Attributes

Numpy's main datatype is ndarray

In [None]:
a1 = np.array([1, 2, 3])
a1, type(a1)

In [None]:
# Shape = (2, 3)
a2 = np.array([[1, 2.0, 3.3],
               [4, 5,6.5]])

# Shape = (2, 3, 3)
a3 = np.array([[[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]],
               
               [[10, 11, 12],
                [13, 14, 15],
                [16, 17, 18]]])
a2, a3

In [None]:
# Looking at shapes
a1.shape, a2.shape, a3.shape

In [None]:
# Looking at dims
a1.ndim, a2.ndim, a3.ndim

In [None]:
# Looking at data types
a1.dtype, a2.dtype, a3.dtype

In [None]:
# Looking at size
a1.size, a2.size, a3.size

## Creating Numpy Arrays

In [None]:

sample_array= np.array([1,2 ,3])
sample_array, sample_array.dtype

In [None]:
# ones array
ones = np.ones((2, 3))
ones, ones.dtype

In [None]:
# zeros array
zeros = np.zeros((2, 3))
zeros, zeros.dtype

In [None]:
# Range array
range_array = np.arange(0, 10, 2)
range_array, range_array.dtype

In [None]:
# Random array of integers
random_array = np.random.randint(0, 10, size=(3, 5))
random_array

In [None]:
# Random array of floats between 0 and 1
random_array_2 = np.random.random((5, 3))
random_array_2

In [None]:
# Random array between 0 and 1
random_array_3 = np.random.rand(5, 3)
random_array_3

In [None]:
# Setting random seed (random is actualy pseudo-random numbers)
# Setting a seed makes the number reproduceable.
np.random.seed(5)
random_array_4 = np.random.randint(10, size=(5, 3))
random_array_4

## Viewing Arrays and Matrices

In [None]:
# Resetting the random_array
random_array_4 = np.random.randint(10, size=(5, 3))
random_array_4

In [None]:
# Listing all unique values in an array/matrix
np.unique(random_array_4)

In [None]:
a1, a2, a3

In [None]:
# Indexing
a1[0], a2[0], a3[0]

In [None]:
a1[1], a2[1,2], a3[1,2,2]

In [None]:
# Slicing
a1[:2], a2[:2, :2], a3[:2, :2, :2]

## Manipulating & Comparing Arrays

### Arithmetic

In [None]:
a1, ones

In [None]:
# Summing arrays
a1 + ones

In [None]:
# Subtracting
a1 - ones

In [None]:
# Multiplying (element wise)
a1 * ones

In [None]:
# Dividing (element wise)
a1 / ones

In [None]:
# Broadcasting error (improper shapes when arithmetic)
## a2 + a3

# The above code results in the following error
# ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3)

# this is because (2, 3) != (2, 3, 3) (the shape of a2 does not equal to a3

In [None]:
# Powers (element wise)
a1 ** 2

In [None]:
# Modulo (element wise)
a1 % 2

In [None]:
# Exponential (element wise)
np.exp(a1)

In [None]:
# log (element wise)
np.log(a1)

### Aggregation

In [None]:
# Summing elements in a list
# NOTE: sum(a1) and np.sum(a1) return the same thing, but when working with numpy, use numpy methods.
#       and when working with python data, use python methods.
# Reason being for this, the numpy sum is signifcantly faster for numpy arrays.

massive_array = np.random.random(100000)
%timeit sum(massive_array)  # Python's sum()
%timeit np.sum(massive_array)  # Numpy's sum()

In [None]:
# Mean
np.mean(a2)

In [None]:
# Max & min
np.max(a2),  np.min(a2)

In [None]:
# Standard Deviation & Variance
np.std(a2), np.var(a2)

### Reshaping & Transposing

In [None]:
a2, a2.shape

In [None]:
# Reshape
a2_reshape = a2.reshape((2, 3, 1))
a2_reshape

In [None]:
# Refer back to Broadcasting Error code bit above, and let's see what happens after reshaping the a2 array to work
a2_reshape + a3

In [None]:
# Transpose
# Same transpose as linalg
a2, a2.transpose()

### Matrix Multiplication (Dot Product vs. Element wise)

In [None]:
np.random.seed(0)
mat1 = np.random.randint(10, size=(5,3))
mat2 = np.random.randint(10, size=(5,3))
mat1, mat2

In [None]:
# Matrix multiplication means the inner shapes must match 
# mat1.shape = (5, 3) & mat2.shape = (5, 3)
# To make inner shapes match, transpose mat2 -> mat1.shape = (5, 3) & mat2.tranpose().shape = (3, 5)
# (5, 3) @ (3, 5) -> (5, 5)
np.dot(mat1, mat2.transpose())

In [None]:
# Using the @ operator to perform np.dot (matrix multiplication)
mat1 @ mat2.transpose()

### Exercise: Nut Butter Store Sales

In [None]:
# Getting the total sales of 3 different nut butters for 5 days
np.random.seed(0)
total_sales = np.random.randint(20, size=(5,3))
pd.DataFrame(total_sales, columns=['Almond Butter', 'Peanut Butter', 'Cashew Butter'])

In [None]:
# Setting the prices for the butters
prices = np.array([10, 8, 12])

In [None]:
# Getting the total sales amounts for each day (Note: prices.T is shorthand way of prices.transpose())
total_sales @ prices.T

### Comparison Operators

In [None]:
a1, a2

In [None]:
# Checks if a1 cell is greater than a2 (element wise)
a1 > a2, a1 >= a2, a1 == a2, a1 != a2, a1 < a2, a1 <= a2

### Sorting Arrays

In [None]:
random_array = np.random.randint(10, size=(3, 5))
random_array

In [None]:
# Sorts each element in each row in the array
np.sort(random_array)

In [None]:
# Sort the values and return the index of that array
np.argsort(random_array)

In [None]:
# Get the index of the minimum value and the maximum value
np.argmin(a1), np.argmax(a1)

In [None]:
# What about multi-dimensional arrays

# NOTE: the return value below is an int, which is the index of the max value in a multi-dimensional array, if that
#       multi-dimensional array was a single dimensional array.
np.argmax(random_array)

## Practical Example - Numpy in Action

In [None]:
image_data_dir_path = './data/numpy-images'

<img src='./data/numpy-images/panda.png'/>

In [None]:
# Turn an image into a Numpy array (The array returned grabs the color values for each pixel in the image)
# The shape is (num_y_pixels, num_x_pixels, rgb_color_of_pixel)
panda = imread(f'{image_data_dir_path}/panda.png')
panda[:1]

In [None]:
panda.size, panda.shape, panda.ndim

<img src='./data/numpy-images/car-photo.png'/>

In [None]:
# Getting numpy array of car photo
car = imread(f'{image_data_dir_path}/car-photo.png')
car[:1]

<img src='./data/numpy-images/dog-photo.png'/>

In [None]:
# Getting numpy array of dog photo
dog = imread(f'{image_data_dir_path}/dog-photo.png')
dog[:1]