# Numpy basics

In [1]:
import numpy as np

#### Basic Numpy 1-D array properties

In [2]:
# Defining an 1-D array
a = np.array([1, 2, 3, 4, 5])
print(f"1-D array: {a}")

# Shape of the array
print(f"Shape of the array: {a.shape}")
print(f"Number of dimensions: {a.ndim}")
print(f"Size of the array: {a.size}")
print(
    f"The lenght of the shape is equal to the number of dimensions: {len(a.shape) == a.ndim}"
)

1-D array: [1 2 3 4 5]
Shape of the array: (5,)
Number of dimensions: 1
Size of the array: 5
The lenght of the shape is equal to the number of dimensions: True


In [3]:
# Change the value of an element
a[0] = 10
print(f"After changing the first element: {a}")

# Create an array with zeros
b = np.zeros(5)
print(f"Array with zeros: {b}")

# Create an array with ones
c = np.ones(5)
print(f"Array with ones: {c}")

# Create an empty array, note that "empty" means an array with random values
d = np.empty(5)
print(f"Empty array: {d}")

# Create an array with consecutive integers
e = np.arange(5)
print(f"Array with consecutive integers: {e}")

After changing the first element: [10  2  3  4  5]
Array with zeros: [0. 0. 0. 0. 0.]
Array with ones: [1. 1. 1. 1. 1.]
Empty array: [1. 1. 1. 1. 1.]
Array with consecutive integers: [0 1 2 3 4]


#### Basic operations

In [4]:
# Sum of two arrays
f = a + e
print(f"Sum of two arrays: {f}")

# Element-wise multiplication
g = a * e
print(f"Element-wise multiplication: {g}")

# Element-wise division
e[0] = 1  # To avoid division by zero
h = a / e
print(f"Element-wise division: {h}")

# Element-wise exponentiation
i = a**2
print(f"Element-wise exponentiation: {i}")

# Appending an element to the array
j = np.append(a, 6)
print(f"Array after appending an element: {j}")

# Element-wise comparison
k = a == e
print(f"Element-wise comparison: {k}")

l = a > 3
print(f"Element-wise greater than comparison: {l}")

# Logical operations
m = np.logical_and(a > 2, a < 5)
print(f"Logical AND operation: {m}")

Sum of two arrays: [10  3  5  7  9]
Element-wise multiplication: [ 0  2  6 12 20]
Element-wise division: [10.          2.          1.5         1.33333333  1.25      ]
Element-wise exponentiation: [100   4   9  16  25]
Array after appending an element: [10  2  3  4  5  6]
Element-wise comparison: [False False False False False]
Element-wise greater than comparison: [ True False False  True  True]
Logical AND operation: [False False  True  True False]


#### Slicing arrays

In [5]:
# Get a slice of the array
a_slice = a[1:4]  # This will get elements from index 1 to 3 (4 is excluded)
print(f"Slice of the array (from index 1 to 3): {a_slice}")

# Slice with step
a_slice_step = a[::2]  # This will get every second element
print(f"Slice of the array with step 2: {a_slice_step}")

# Slice from a given index to the end
a_slice_end = a[2:]  # This will get elements from index 2 to the end
print(f"Slice of the array from index 2 to the end: {a_slice_end}")

Slice of the array (from index 1 to 3): [2 3 4]
Slice of the array with step 2: [10  3  5]
Slice of the array from index 2 to the end: [3 4 5]


#### Copying an array

In [6]:
# Copying an array
n = a.copy()  # equivalent to np.copy(a)
print(f"Copied array: {n}")
print(f"Check if they are the same object {id(a) == id(n)}")

# Create a view of the array
o = a.view()
print(f"View of the array: {o}")
print(f"Check if they are the same object {id(a) == id(o)}")

Copied array: [10  2  3  4  5]
Check if they are the same object False
View of the array: [10  2  3  4  5]
Check if they are the same object False


#### Dealing with nd-array (arrays of dimension greater or equal than 2)

In [7]:
# Create a 2-D array
p = np.array([[1, 2, 3], [4, 5, 6]])
print(f"2-D array: {p}")

# Create a random 2-D array
q = np.random.rand(3, 3)
print(f"Random 2-D array: {q}")

# Dot product
r = np.dot(p, q)
print(f"Dot product: {r}")

# Transpose of a matrix
s = np.transpose(p)
print(f"Transpose of the matrix: {s}")

# Reshape an array
t = np.arange(12).reshape(3, 4)
print(f"Reshaped array (3x4): {t}")

# Reverse an array
u = np.flip(a)
print(f"Reversed array: {u}")

# Flatten an array
v = p.flatten()
print(f"Flattened array: {v}")

# Concatenate arrays
w = np.concatenate((a, e))
print(f"Concatenated array: {w}")

2-D array: [[1 2 3]
 [4 5 6]]
Random 2-D array: [[0.53982727 0.24798391 0.87906094]
 [0.2986484  0.1914917  0.49840203]
 [0.12233632 0.12186247 0.90890639]]
Dot product: [[ 1.50413302  0.99655474  4.60258417]
 [ 4.38656897  2.680569   11.46169225]]
Transpose of the matrix: [[1 4]
 [2 5]
 [3 6]]
Reshaped array (3x4): [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Reversed array: [ 5  4  3  2 10]
Flattened array: [1 2 3 4 5 6]
Concatenated array: [10  2  3  4  5  1  1  2  3  4]


#### Saving and loading Numpy objects

In [8]:
# Save an array
np.save("array.npy", a)

# Load an array
x = np.load("array.npy")
print(f"Loaded array: {x}")

Loaded array: [10  2  3  4  5]


## Advanced operations

#### Broadcasting
Ref: [Broadcasting](https://numpy.org/doc/stable/user/basics.broadcasting.html)

In [9]:
# Broadcasting is how numpy handles arrays of different shapes during arithmetic operations
# And what it does is it expands the smaller array to the size of the larger one by repeating it
# across one axis

a = np.array([[1], [2], [3]])
print(f"Array of shape: {a.shape}")

b = np.array([[1, 2, 3]])
print(f"Array of shape: {b.shape}")

# Broadcasting
c = a + b
print(f"Broadcasted array: {c}")

Array of shape: (3, 1)
Array of shape: (1, 3)
Broadcasted array: [[2 3 4]
 [3 4 5]
 [4 5 6]]


#### Advanced indexing
Ref: [Indexing](https://numpy.org/doc/stable/user/basics.indexing.html)

In [10]:
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# A basic indexing is to get the row and column
row = 1
column = 2
print(f"Element at row {row} and column {column}: {matrix[row][column]}")

# Advanced indexing
print(f"Element at row {row} and column {column}: {matrix[(row, column)]}")

# Select specific rows
rows = [0, 2]
print(f"Selected rows 1 and 2: {matrix[rows]}")

# Dimensional indexing
print(f"Select row 1 and all the columns: {matrix[0, :]}")
print(f"Select column 2 and all the rows: {matrix[:, 1]}")

# Dimensional indexing with newaxis
print(f"Select row 1 and all the columns with newaxis: {matrix[0, :, np.newaxis]}")
print(f"Select column 1 and all the rows with newaxis: {matrix[np.newaxis, :, 0]}")

# Get the diagonal of a matrix
diagonal_with_indexing = matrix[[0, 1, 2], [0, 1, 2]]
print(f"Diagonal with indexing: {diagonal_with_indexing}")

Element at row 1 and column 2: 6
Element at row 1 and column 2: 6
Selected rows 1 and 2: [[1 2 3]
 [7 8 9]]
Select row 1 and all the columns: [1 2 3]
Select column 2 and all the rows: [2 5 8]
Select row 1 and all the columns with newaxis: [[1]
 [2]
 [3]]
Select column 1 and all the rows with newaxis: [[1 4 7]]
Diagonal with indexing: [1 5 9]


#### Note

The advanced dimensional indexing with `newaxis` could be useful while using different AI frameworks for dimensional compatibility.

Ref: [numpy.newaxis](https://numpy.org/doc/stable/reference/constants.html#numpy.newaxis)

#### Sorting

Ref: [Sorting and Searching](https://numpy.org/doc/stable/reference/routines.sort.html#sorting)

In [11]:
# Standard sort
unserted_matrix = np.random.randint(10, size=(3, 3))
print(f"Unsorted array: {unserted_matrix}")

# Note that this sorts each list independently
matrix_standard_sorted = np.sort(unserted_matrix, axis=0)
print(f"Sorted array with standard sort: {matrix_standard_sorted}")

# Sort within axis 0 --- Same as default sort
matrix_axis_0_sorted = np.sort(unserted_matrix, axis=0)
print(f"Sorted array within axis 0: {matrix_axis_0_sorted}")

# Sort within axis 1
matrix_axis_1_sorted = np.sort(unserted_matrix, axis=1)
print(f"Sorted array within axis 1: {matrix_axis_1_sorted}")

Unsorted array: [[0 8 7]
 [2 2 3]
 [0 6 0]]
Sorted array with standard sort: [[0 2 0]
 [0 6 3]
 [2 8 7]]
Sorted array within axis 0: [[0 2 0]
 [0 6 3]
 [2 8 7]]
Sorted array within axis 1: [[0 7 8]
 [2 2 3]
 [0 0 6]]


#### Searching

Ref: [Sorting and Searching](https://numpy.org/doc/stable/reference/routines.sort.html#sorting)

In [12]:
rand_arr = np.random.rand(3, 3)

# find the index of the maximum value
max_index = np.argmax(rand_arr)
print(f"Index of the maximum value: {max_index}")

# find the index of the minimum value
min_index = np.argmin(rand_arr)
print(f"Index of the minimum value: {min_index}")

# Find a value with where mask
mask = np.where(rand_arr > 0.5)
print(f"Values greater than 0.5: {rand_arr[mask]}")

Index of the maximum value: 7
Index of the minimum value: 1
Values greater than 0.5: [0.52826673 0.82862748 0.64805348]


#### Iterating

In [13]:
# Let's use the unserted_matrix, if we can iterate over each element we can do it with a nested for loop

for row in unserted_matrix:
    for element in row:
        print(f"Element: {element}")

# This could be overwhealming if we have a high dimensional ndarray, so the best
# way to do it is using nditer function

for element in np.nditer(unserted_matrix):
    print(f"Element: {element}")

Element: 0
Element: 8
Element: 7
Element: 2
Element: 2
Element: 3
Element: 0
Element: 6
Element: 0
Element: 0
Element: 8
Element: 7
Element: 2
Element: 2
Element: 3
Element: 0
Element: 6
Element: 0


#### Masking

In [14]:
import numpy.ma as ma

array_to_be_masked = np.array([[1, 2, 3], [4, np.inf, 6], [np.nan, 8, 9]])

mask = ma.masked_array(
    array_to_be_masked,
    mask=[[False, False, False], [False, True, False], [True, False, False]],
)
print(f"Masked array: {mask}")
print(f"Mean of the masked array: {mask.mean()}")
print(f"Sum of the masked array: {mask.sum()}")

Masked array: [[1.0 2.0 3.0]
 [4.0 -- 6.0]
 [-- 8.0 9.0]]
Mean of the masked array: 4.714285714285714
Sum of the masked array: 33.0


#### Vectorization - How to apply custom functions to numpy arrays?

In [15]:
def get_modulus(val: int | np.ndarray) -> int:
    if val % 2 == 0:
        return 0
    else:
        return val % 2


# Applying get_modulus directly to the array will fail
try:
    get_modulus(matrix)
except Exception as e:
    print(f"Error: {e}")

vectorized_get_modulus = np.vectorize(get_modulus)
vectorized_result = vectorized_get_modulus(matrix)
print(f"Vectorized result: {vectorized_result}")

Error: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Vectorized result: [[1 0 1]
 [0 1 0]
 [1 0 1]]


#### Matrix multiplications

In [16]:
# Define two 3x3 matrices
matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
matrix2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])

# Algebraic matrix multiplication
algebraic_product = np.matmul(matrix1, matrix2)
print(f"Algebraic product: {algebraic_product}")

# which is equivalent to the new operator @
algebraic_product_operator = matrix1 @ matrix2
print(f"Algebraic product with operator: {algebraic_product_operator}")

Algebraic product: [[ 30  24  18]
 [ 84  69  54]
 [138 114  90]]
Algebraic product with operator: [[ 30  24  18]
 [ 84  69  54]
 [138 114  90]]


## Custom `dtype` and performance

Ref: [Custom dtype](https://numpy.org/doc/stable/reference/arrays.dtypes.html)

Defining custom data type objects can lead to better performance if there are a lot of operations

In [17]:
# Example of memory consumption

dt = np.dtype(
    "U5, f4"
)  # U5 means unicode string of length 5, f4 means float of 4 bytes

default_numpy_dtype = np.array([("hello", 1.23), ("world", 5122.99), ("numpy", 63.23)])

custom_dtype_array = np.array(
    [("hello", 1.23), ("world", 5122.99), ("numpy", 63.23)], dtype=dt
)

object_dtype_array = np.array(
    [("hello", 1.23), ("world", 5122.99), ("numpy", 63.23)], dtype=object
)

print(f"dtype of default numpy array: {default_numpy_dtype.dtype}")
print(f"dtype of custom dtype array: {custom_dtype_array.dtype}")
print(f"dtype of object dtype array: {object_dtype_array.dtype}")

# Memory consumption
print(f"Memory consumption of default numpy array: {default_numpy_dtype.nbytes} bytes")
print(f"Memory consumption of custom dtype array: {custom_dtype_array.nbytes} bytes")
print(f"Memory consumption of object dtype array: {object_dtype_array.nbytes} bytes")

dtype of default numpy array: <U32
dtype of custom dtype array: [('f0', '<U5'), ('f1', '<f4')]
dtype of object dtype array: object
Memory consumption of default numpy array: 768 bytes
Memory consumption of custom dtype array: 72 bytes
Memory consumption of object dtype array: 48 bytes
