### Numpy 

An open source Python library that is the universal standard for working with numerical data in Python
- Contains nd array and matrix data structures 

In [2]:
# packages required
import numpy as np 

What's the difference between a Python list and a np array?
- Python list can contain different data types, but np arrays have to be homogenous 
- Math operations on arrays would be extremely inefficient if the arrays aren't homogenous
- np arrays are faster and more compact than Python lists, consumes less memory, and is convenient to use

What's an array 
- An array is a central data structure of the np library. It is a grid of values that contains: 
    - Data values 
    - Index (how to locate an element)
    - How to interpret an element

In [42]:
# init an array by passing a Python list
arr = np.array([1,2,3,4,5]) # 1D array (vector)

# or a list of lists > 2D array (matrix)
arr2 = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

In [35]:
# create an array filled with 0s
print(np.zeros(4))
# filled with 1's
print(np.ones(4))
# or empty
print(np.empty(2))

# or just about any value required 
print(np.full((2,2), 7))

[0. 0. 0. 0.]
[1. 1. 1. 1.]
[1.48539705e-313 1.06099790e-313]
[[7 7]
 [7 7]]


Use np.arange to create an array of a range of elements 
- stop value is exclusive 

In [44]:
# create an array with a range of elements, equally spaced 
print(np.arange(4))

# arange also allows for start stop and step specifics
print(np.arange(2,9,2))

[0 1 2 3]
[2 4 6 8]


Create a 1d array with 10 equally spaced values 

In [45]:
# use linspace (linear space) to specify the number of equally spaced values to create
np.linspace(1, 10, 7)

array([ 1. ,  2.5,  4. ,  5.5,  7. ,  8.5, 10. ])

In [46]:
# specifying your data type 
# default is floating point (np.float64)
# explicit specify using dtype kw

np.ones(2, dtype = np.int64)

array([1, 1], dtype=int64)

Adding, removing and sorting elements 
- Removing elements is as simple as creating a new array via indexing 

In [47]:
arr = np.array([2, 1, 5, 3, 7, 4, 6, 8])
# sort the elements in ascending order 

sorted_arr = np.sort(arr)
sorted_arr

array([1, 2, 3, 4, 5, 6, 7, 8])

In [48]:
a = np.arange(1, 5)
b = np.arange(5, 9)

# concatenate them 
ab = np.concatenate((a, b))
ab

array([1, 2, 3, 4, 5, 6, 7, 8])

In [49]:
# if you start with 2d arrays 
x = np.array([[1, 2], [3, 4]])
y = np.array([[5, 6]])

xy = np.concatenate((x, y), axis = 0) # axis = 0 for rows
xy

array([[1, 2],
       [3, 4],
       [5, 6]])

Getting the shape and size of an array 


In [39]:
# n.dim tells you the # of axes (dimensions) of the array
a = np.array([[5,10,15],[20,25,20]])
print(a.ndim)

# ndarray.size total # of elements in the array
print(a.size)

# ndarray.shape no. of elements stored in each d; for 2d > rows and cols 
print(a.shape)

2
6
(2, 3)


In [None]:
arr = np.array([[[1,2,3,4], 
                 [5,6,7,8]],
               
                [[1,2,3,4], 
                 [5,6,7,8]],
               
                [[1,2,3,4], 
                 [5,6,7,8]]])

arr.ndim, arr.size, arr.shape

#### Reshaping an array
- Using arr.reshape() will give a new shape to an array w/o changing the data

In [None]:
a = np.arange(6)
print(a)

# reshape array with 3 rows and 2 cols
b = a.reshape(3, 2)
print(b)

# np.reshape has a few optional args
np.reshape(a, newshape = (1, 6), order = 'C')
# order: read/write elements using C-like index order (F fortran)

#### How to convert a 1D array into a 2D array (adding a new axis to an array)

In [None]:
a = np.arange(1, 7)
a.shape # (6,)

# us np.newaxis to add a new axis 
a2 = a[np.newaxis, :]
a2.shape

In [52]:
a = np.arange(1, 7)
a.shape # (6,)

# explicitly convert a 1D array with row/col vector using np.newaxis
row_vector = a[np.newaxis, :]
print(row_vector.shape)

# for col vector
col_vector = a[:, np.newaxis]
print(col_vector.shape)

(1, 6)
(6, 1)


#### Indexing and slicing 
- Index and slice np arrays in the same way as Python lists

In [43]:
data = np.array([1, 2, 3])

print(data[1])
print(data[:2])
print(data[-2:])

a = np.array([1, 2, 3, 4, 5, 6])
print(a[1:5:2])

# conditional slicing
a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print(a[a < 5])
print(a[a >= 5])

2
[1 2]
[2 3]
[2 4]
[1 2 3 4]
[ 5  6  7  8  9 10 11 12]


In [60]:
# starting with this array:
a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

# you can use np.nonzero() to print the indices of elements that meet a condition
b = np.nonzero(a < 5)
print(b) # returns a tuple of arrays: one for each dimension

# to generate the coordinates
for coord in zip(b[0], b[1]):
    print(coord)


(array([0, 0, 0, 0], dtype=int64), array([0, 1, 2, 3], dtype=int64))
(0, 0)
(0, 1)
(0, 2)
(0, 3)


#### Slicing 2d arrays 


In [None]:
# retrieving an element from a 2d array 

#### Slicing 3d arrays
- This is a visualisation of what a 3d array looks like 


Create an array from existing data 

In [65]:
# You can easily create a new array from a section of an existing array
a = np.array([1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
arr1 = a[:5]
print(arr1)

# you can also stack 2 existing arrays, vertically and horizontally
a1 = np.array([[1, 1],
               [2, 2]])
a2 = np.array([[3, 3], 
               [4, 4]])

# stack vertically with vstack:
print(np.vstack((a1, a2)))

# stack horizontally with hstack 
print(np.hstack((a1, a2)))

[1 2 3 4 5]
[[1 1]
 [2 2]
 [3 3]
 [4 4]]
[[1 1 3 3]
 [2 2 4 4]]


In [69]:
# split an array into subarrays using hsplit
x = np.arange(1, 25).reshape(2, 12)
print(x)

# split into 3 different arrays
print(np.hsplit(x, 3))

# split after the 3rd and 4th column 
print(np.hsplit(x, (3, 4)))

[[ 1  2  3  4  5  6  7  8  9 10 11 12]
 [13 14 15 16 17 18 19 20 21 22 23 24]]
[array([[ 1,  2,  3,  4],
       [13, 14, 15, 16]]), array([[ 5,  6,  7,  8],
       [17, 18, 19, 20]]), array([[ 9, 10, 11, 12],
       [21, 22, 23, 24]])]
[array([[ 1,  2,  3],
       [13, 14, 15]]), array([[ 4],
       [16]]), array([[ 5,  6,  7,  8,  9, 10, 11, 12],
       [17, 18, 19, 20, 21, 22, 23, 24]])]


Create an identity matrix 



In [36]:
arr = np.eye(3)
arr

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

You can use the view method to create a new array object that is a shallow copy of the orig arr
- Views are an impt numpy concept as mod data == mod the orig arr

In [72]:
a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

# b1 is a slice of a
b1 = a[0, :]
print(b1)

# modifying first element of b1
b1[0] = 99 

# this modifies a as well
print(a)

# to prevent this, use the copy method which will create a deep copy of the array
b2 = a.copy()

[1 2 3 4]
[[99  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


Basic Array Operations 

In [78]:
data = np.array([1, 2])
ones = np.ones(2, dtype = int)

# add them together 
print(data + ones)
# multiply 
print(data * ones)
# subtract 
print(data - ones)
# divide
print(data / data)

[2 3]
[1 2]
[0 1]
[1. 1.]


In [84]:
# call sum method on a 1D array
a = np.array([1, 2, 3, 4])
a.sum()

# to add the rows/cols in a 2D array, specify the axis
b = np.array([[1, 1], [2, 2]])
b.sum(axis = 0) # rows 
b.sum(axis = 1) # columns 


array([3, 3])

#### Broadcasting 
- Operation between a vector and a scalar 

In [88]:
data = np.array([1, 2], dtype = np.float64)
data * 1.6

array([1.6, 3.2])

#### Numpy also performs agg functions

In [89]:
data = np.array([1, 2], dtype = np.float64)
data.max(), data.min(), data.sum()

(2.0, 1.0, 3.0)

In [92]:
# agg across a row/col is a common use case
a = np.array([[0.45053314, 0.17296777, 0.34376245, 0.5510652],
               [0.54627315, 0.05093587, 0.40067661, 0.55645993],
               [0.12697628, 0.82485143, 0.26590556, 0.56917101]])

a.sum(), a.min()

# you can specify the agg function along which axis 
a.sum(axis = 0)

array([1.12378257, 1.04875507, 1.01034462, 1.67669614])

#### Creating matrices 
- Pass a list of lists to create a 2D array (or matrix)

In [95]:
data = np.array([[1, 2, 3], [3, 4, 5]])
data.shape

# agg the whole matrix by not specifying an axis, or do row/col-wise agg with axis arg
data.sum(axis = 0)

array([4, 6, 8])

In [97]:
# add and multiply the matrices the same way 
data = np.array([[1, 2], [3, 4]])
ones = np.array([[1, 1], [1, 1]])
data + ones 

# broadcasting rules work here with 1 row / col matrices 
data = np.array([[1, 2], [3, 4], [5, 6]])
ones_row = np.array([[1]])
data + ones_row

array([[2, 3],
       [4, 5],
       [6, 7]])

There are instances where we want numpy to init the values of an array 
- numpy offers ones(), zeros() and the random.Generator class for rng 

In [98]:
# pass in the # of elements to generate
ones = np.ones(3)
zeros = np.zeros(3)
randoms = np.random.default_rng(0).random(3)

print(ones, zeros, randoms)

[1. 1. 1.] [0. 0. 0.] [0.63696169 0.26978671 0.04097352]


In [100]:
# create matrices with ones(), zeros() and random() by passing in a tuple
m_ones = np.ones((3, 2))
m_zeros = np.zeros((3, 2))
m_randoms = np.random.default_rng(0).random((3, 2))

print(m_ones, m_zeros, m_randoms)

[[1. 1.]
 [1. 1.]
 [1. 1.]] [[0. 0.]
 [0. 0.]
 [0. 0.]] [[0.63696169 0.26978671]
 [0.04097352 0.01652764]
 [0.81327024 0.91275558]]


Generating random numbers 
- Essential part of the configuration and evaluation of many numerical and ML algos 
- Eg use cases: randomly init weights in an artificial neural network 
- split data into random sets 
- randomly shuffle your dataset 

In [105]:
# generate a 2x4 array of randints between 0 and 4 
m_randints = np.random.default_rng(0).integers(5, size = (2, 4))
print(m_randints)

[[4 3 2 1]
 [1 0 0 0]]


#### How to get unique items and counts 
(covers np.unique())
- Tasks
- Return the unique values of the array 
- Return the indices of unique values of the array
- Return the occurrence counts of the array 

In [7]:
# find the unique elements in an array easily with np.unique()
a = np.array([10, 11, 11, 12, 12, 13, 14, 15, 16, 17, 17, 18, 19, 20, 20])

unique_values = np.unique(a)
print(unique_values)

# pass return_index = True to get unique indices 
unique_values, indices_list = np.unique(a, return_index = True)
print(indices_list)

# pass return_counts = True to get occurrence counts 
unique_values, occurrence_counts = np.unique(a, return_counts = True)
print(occurrence_counts)

[10 11 12 13 14 15 16 17 18 19 20]
[ 0  1  3  5  6  7  8  9 11 12 13]
[1 2 2 1 1 1 1 2 1 1 2]


In [11]:
# also works with 2d arrays 
a_2d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]])

unique_values_2d = np.unique(a_2d)
print(unique_values_2db

# pass axis argument to filter unique rows/cols 
unique_values_rows = np.unique(a_2d, axis = 0)
print(unique_values_rows)

unique_values_cols = np.unique(a_2d, axis = 1)
print(unique_values_cols)


[ 1  2  3  4  5  6  7  8  9 10 11 12]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [ 1  2  3  4]]


Transposing and reshaping a matrix 
- arr.reshape(), arr.transpose(), arr.T() 

It is a common need to transpose matrices 
![image.png](attachment:image.png)

In [21]:
data = np.array([[1,2,3], [4,5,6]])

# arr.T attribute or call arr.transpose() to transpose 
print(data.T)
data.reshape(2, 3)
data.reshape(6, )

[[1 4]
 [2 5]
 [3 6]]


array([1, 2, 3, 4, 5, 6])

In [25]:
# use transpose to reverse or change the axes of the array according to specified values 
arr = np.arange(6).reshape(2, 3)
arr.transpose() 





array([[0, 3],
       [1, 4],
       [2, 5]])

Reverse arrays with np.flip()

In [30]:
# 1d arrays 
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])
rev_arr = np.flip(arr)
print(rev_arr)

# for 2d arrays, specify axis arg to flip by rows or columns only
arr_2d = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
rev_arr_2d = np.flip(arr_2d)
rev_arr_rows = np.flip(arr_2d, axis = 0)
rev_arr_cols = np.flip(arr_2d, axis = 1)

print(rev_arr_2d)
print(rev_arr_rows)
print(rev_arr_cols)

# or reverse *only* one row 
arr_2d[1] = np.flip(arr_2d[1])
arr_2d

[8 7 6 5 4 3 2 1]
[[12 11 10  9]
 [ 8  7  6  5]
 [ 4  3  2  1]]
[[ 9 10 11 12]
 [ 5  6  7  8]
 [ 1  2  3  4]]
[[ 4  3  2  1]
 [ 8  7  6  5]
 [12 11 10  9]]


array([[ 1,  2,  3,  4],
       [ 8,  7,  6,  5],
       [ 9, 10, 11, 12]])

Reshaping / flattening multi-dimensional arrays
- .flatten(), ravel()

In [34]:
x = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

# use flatten to change to 1d array
# flatten creates a copy of the original while ravel creates a reference to the original
x.flatten() 

# this can also be thought of as a shallow copy vs deep copy
x.ravel()
x 

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

Expanding and squeezing a np.array 
- np.expand_dims(), np.squeeze()

In [40]:
# add a new axis using expand_dims()
a = np.array([1,2,3])
b = np.expand_dims(a, axis = 0) # expand along cols 
c = np.expand_dims(a, axis = 1) # expand along rows 

(array([1, 2, 3]),
 array([[1, 2, 3]]),
 array([[1],
        [2],
        [3]]))

In [42]:
# reduce the number of axes using squeeze() 
a = np.array([[[1,2,3], [4,5,6]]]) # 3d to 2d array 
b = np.squeeze(a, axis = 0) 
b # 2d array 

# further calling squeeze however results in an error 
c = np.squeeze(b, axis = 0)
c

ValueError: cannot select an axis to squeeze out which has size not equal to one