In [None]:
import numpy as np
import pandas as pd

# What are we going to cover ?

- Most useful functions
- NumPy data types & attributes (ndarray)
- Creating arrays
- Viewing arrays & matrices
- Manipulating & comparing arrays
- Sorting arrays
- Use cases

## Data types and attributes

In [None]:
# NumPy's main datatype is ndarray
a1 = np.array([1, 2, 3, 4])

In [None]:
a1

In [None]:
type(a1)

In [None]:
a2 = np.array([[1, 2, 3.0],
             [5.3, 7.6, 9.0]])

In [None]:
a3 = np.array([[1, 2, 3.0],
             [5.3, 7.6, 9.0],
              [3.0, 6.7, 0.9]])

In [None]:
a2

In [None]:
a3

In [None]:
a1.shape

In [None]:
a2.shape

In [None]:
a3.shape

In [None]:
a1.ndim, a2.ndim, a3.ndim

In [None]:
a1.dtype, a2.dtype

In [None]:
a1.size, a2.size

In [None]:
# Create a DataFrame from a Numpy array
df = pd.DataFrame(a2)
df

## 2. Creating arrays 

In [None]:
sample_array = np.array([1, 2, 3])
sample_array

In [None]:
ones = np.ones((2, 3))

In [None]:
ones

In [None]:
zeros = np.zeros((2, 3))

In [None]:
zeros

In [None]:
range_array = np.arange(0, 10, 2)

In [None]:
range_array

In [None]:
random_array = np.random.randint(0, 10, size=(3, 5))

In [None]:
random_array

In [None]:
random_array.size

In [None]:
random_array.shape

In [None]:
np.random.random((5, 4))

When we want someone to reproduce the same code as us and get the same result, we use the function follow:

- np.random.seed(seed=0)

In [None]:
# Pseudo-random numbers
np.random.seed(seed=0)
random_array_3 = np.random.randint(10, size=(5, 3))
random_array_3

## 3- Viewing Arrays and Matrices

In [None]:
array_unique_elements = np.unique(random_array_3)
array_unique_elements

In [None]:
random_array_3[0]

In [None]:
random_array_3[1:3,1:]

In [None]:
a4 = np.random.randint(10, size= (2, 3, 4, 5))
a4

In [None]:
a4.shape, a4.ndim

In [None]:
# Get the first 4 numbers of the inner most arrays
a4[:, :, :, :3]

## 4. Manipulating & comparing arrays

### Arithmetic

In [None]:
a1

In [None]:
ones = np.ones(4)

In [None]:
ones

In [None]:
a1 + ones

In [None]:
a1 - ones

In [None]:
a1 * ones

In [None]:
a1

In [None]:
a2 = np.array([[1, 2, 3.0, 4.0],
             [5.3, 7.6, 9.0, 2.5]])
a2

In [None]:
a1 * a2

Use Python's method (`sum()`) on Python datatypes and use Numpy's methods on Numpy arrays (`np.sum()`).

In [None]:
# Creative a massive Numpy array
massive_array = np.random.random(1000000)

In [None]:
massive_array[:10]

In [None]:
%timeit sum(massive_array) # Pyhton's sum()
%timeit np.sum(massive_array) # Numpy's np.sum()

In [None]:
np.mean(a2)

In [None]:
np.max(a2)

In [None]:
np.std(a2)

## Reshaping & transposing

Broadcasting takes place when you perform operations between arrays of different shapes

In [None]:
a = np.array([
    [0, 1],
    [2, 3],
    [4, 5],
    ])

In [None]:
b = np.array([10, 100])

In [None]:
a.shape

In [None]:
b.shape

In [None]:
a * b

In [None]:
c = np.array([
    [0, 1, 2],
    [3, 4, 5],
    ])

In [None]:
c * b

The solution to multiplying c and b above is to specifically tell Numpy that it must add that extra dimension as the second dimension of b. This is done by using  

- (`None`) to index that second dimension. The shape of b then becomes (2, 1), which is compatible for broadcasting with c, or

- (`reshape`) b as the below example:

In [None]:
c * b[:, None]

In [None]:
c * b.reshape(2, 1)

In [None]:
# Transpose = switches the axis
a2.T

In [None]:
a2.T.shape

## Dot product

In [None]:
np.random.seed(0)

mat1 = np.random.randint(10, size=(5, 3))
mat2 = np.random.randint(10, size=(5, 3))

In [None]:
mat1

In [None]:
mat2

In [None]:
# Elements wise multiplication (Hadamard product)
mat1 * mat2

In [None]:
# Dot Product
np.dot(mat1, mat2)

In [None]:
np.dot(mat1, mat2.T)

## Dot product example (nut butter sales)

In [None]:
np.random.seed(0)

sales_amounts = np.random.randint(20, size=(5, 3))
sales_amounts

In [None]:
# Create weekly_sales DataFrame
weekly_sales = pd.DataFrame(sales_amounts, 
                           index=['Mon', 'Tue', 'Wed', 'Thu', 'Fri'],
                           columns=['Almond butter', 'Peanut butter', 'Cashew butter'])

In [None]:
weekly_sales

In [None]:
# Create prices array
prices = np.array([10, 8, 12])
prices

In [None]:
# Create butter_prices DataFrame
butter_prices = pd.DataFrame(prices, 
                            index=['Price'],
                            columns=['Almond butter', 'Peanut butter', 'Cashew butter'])

In [None]:
prices.shape

In [None]:
butter_prices = pd.DataFrame(prices.reshape(1, 3),
                            index=['Price'],
                            columns=['Almond butter', 'Peanut butter', 'Cashew butter'])

In [None]:
weekly_sales

In [None]:
butter_prices

In [None]:
sales_amounts.shape

In [None]:
prices.shape

In [None]:
prices.dot(sales_amounts)

shapes (3,) and (5,3) not aligned: the solutions to that error are

- 1 : change the order of the dot product from (`prices.dot(sales_amounts)`) to (`sales_amounts.dot(prices)`) in order to transform shapes (`(3,) and (5,3)`) into (`(5,3) and (3,)`)

- 2 : Transpose (`sales_amounts`) in order to to transform shapes (`(3,) and (5,3)`) into (`(5,3) and (3,)`)

- 3 : Compute the daily_sales and then add it as column of weekly_sales

In [None]:
daily_sales = butter_prices.dot(weekly_sales.T)

In [None]:
daily_sales

In [None]:
daily_sales.shape

In [None]:
weekly_sales['Total ($)'] = daily_sales.T

In [None]:
weekly_sales

## Comparison Operators

In [None]:
a1 = np.array([1, 2, 3])

In [None]:
a2 = np.array([[1, 2, 3.0],
             [5.3, 7.6, 9.0]])

In [None]:
a1 > a2

In [None]:
bool_array = a1 >= a2
bool_array

In [None]:
type(bool_array), bool_array.dtype

In [None]:
a1 > 5

In [None]:
a1 < 5

## Sorting arrays

In [None]:
random_array = np.random.randint(10, size=(3, 5))
random_array

In [None]:
random_array.shape

In [None]:
np.sort(random_array)

In [None]:
np.argsort(random_array)# np.argsort() provides values indexes after sorting

In [None]:
np.argmin(random_array, axis=1),np.argmax(random_array, axis=1)

## Practical Example - Numpy in Action !!!

NB : In Machine Learning, whatever we data we have, we turn it into numbers and we use a Machine Learning algorithm to find patterns in those numbers ...

<img src="../data/images/dog-photo-1.jpeg"/>

In [None]:
# Turn an image into a Numpy array
from matplotlib.image import imread

In [194]:
dog = imread("../data/images/dog-photo-1.jpeg")

In [195]:
print(type(dog))

<class 'numpy.ndarray'>


In [196]:
dog.shape, dog.size, dog.ndim

((432, 575, 4), 993600, 3)

In [197]:
dog[:1]

array([[[181, 206, 225, 255],
        [184, 207, 226, 255],
        [189, 208, 226, 255],
        ...,
        [127, 175, 214, 255],
        [126, 174, 214, 255],
        [126, 174, 213, 255]]], dtype=uint8)