## Defining NumPy arrays

To demonstrate the usage of NumPy arrays, we firstly need to define a np.ndarray object. NumPy allows us to define arrays in various different ways:

In [None]:
# This cell only needs to be ran once for each kernel. After running it, the kernel should be restarted
!pip install numpy

In [None]:
import numpy as np

In [None]:
# direct definition
np.array([1, 2, 3, 16, 25, 36])

array([ 1,  2,  3, 16, 25, 36])

NumPy arrays must all have the same type. If possible, NumPy casts all objects to the same type.

In [None]:
np.array([6.7, 1, 2, 3])

array([6.7, 1. , 2. , 3. ])

To avoid ambiguities, we can define the type explicitly

In [None]:
np.array([1, 2, 3, 4.8, 'str'])

array(['1', '2', '3', '4.8', 'str'], dtype='<U32')

Multi-dimensional NumPy arrays

In [None]:
np.array([[1, 2, 3], [4, 5, 6]])

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
np.array([range(i, i+6) for i in range(3, 6)])

array([[ 3,  4,  5,  6,  7,  8],
       [ 4,  5,  6,  7,  8,  9],
       [ 5,  6,  7,  8,  9, 10]])

You can initialize arrays filled with specific values.

In [None]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [None]:
np.full((3, 5), np.pi)

array([[3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265]])

In [None]:
np.eye(13)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

NumPy has a generalization of the built-in range() function

In [None]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

Even for non-integer values.

In [None]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

NumPy allows for random array generation.

In [None]:
np.random.random((3, 3))

array([[0.83930384, 0.87243221, 0.26677942],
       [0.87538808, 0.39078306, 0.7887048 ],
       [0.26294881, 0.23775643, 0.15332005]])

Or just an empty array

In [None]:
np.empty((3, 3))

array([[0.83930384, 0.87243221, 0.26677942],
       [0.87538808, 0.39078306, 0.7887048 ],
       [0.26294881, 0.23775643, 0.15332005]])

In [None]:
np.empty(2)

array([ 3.18403968e+077, -1.73071781e-242])

In [None]:
np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16]])

In [None]:
np.random.random((4, 4))

array([[0.98450683, 0.12600817, 0.29567454, 0.07665038],
       [0.78034969, 0.18299833, 0.09869652, 0.93843435],
       [0.53335424, 0.56049361, 0.90667912, 0.89939534],
       [0.54273584, 0.96972421, 0.53834223, 0.54867719]])

In [None]:
np.empty((4, 4))

array([[0.98450683, 0.12600817, 0.29567454, 0.07665038],
       [0.78034969, 0.18299833, 0.09869652, 0.93843435],
       [0.53335424, 0.56049361, 0.90667912, 0.89939534],
       [0.54273584, 0.96972421, 0.53834223, 0.54867719]])

## Vectorization

In [None]:
import time

# Example 1
# write a code that calculates the sine of every element in a list and compare the times
data = list(range(10000000))

# bad: direct python for loop
result_bad = []
start = time.time()
for x in data:
    result_bad.append(np.sin(x))
print(f"Bad: {time.time() - start} seconds")

# better: list comprehension
start = time.time()
result_mid = [np.sin(x) for x in data]
print(f"Mid: {time.time() - start} seconds")

# best: vectorization
start = time.time()
result_good = np.sin(data)
print(f"Good: {time.time() - start} seconds")


Bad: 14.274943113327026 seconds
Mid: 12.276310920715332 seconds
Good: 0.5340735912322998 seconds


In [None]:
# Example 2
# create a list of the squares of the first 1 million numbers

# Method 1: Loop
result = []
start = time.time()
for x in range(10000000):
    result.append(x**2)

print(f"Loop: {time.time() - start} seconds")

# Method 2: List comprehension
start = time.time()
result = [x**2 for x in range(10000000)]
print(f"List comprehension: {time.time() - start} seconds")

# Method 3: Vectorized
start = time.time()
result = np.arange(10000000)**2
print(f"Vectorized: {time.time() - start} seconds")


Loop: 1.8485486507415771 seconds
List comprehension: 2.134899854660034 seconds
Vectorized: 0.3026432991027832 seconds


In [None]:
# Example 3
# implement RELU
data = [1, -1, 3, 7, 9]

# Method 1: Loop
result = []
for x in data:
    if x < 0:
        result.append(0)
    else:
        result.append(x)
print(result)

data = np.array(data)
result = np.where(data < 0, 0, data)
print(result)

[1, 0, 3, 7, 9]
[1 0 3 7 9]


In [None]:
# Example 4
# Add two arrays element-wise
list_1 = [1, 2, 3]
list_2 = [4, 5, 6]

# Method 1: Loop
result = []
for x, y in zip(list_1, list_2):
  result.append(x + y)
print(result)

# Method 2: List comprehension (if applicable, or combined with loop)
print([x + y for x, y in zip(list_1, list_2)])

# Method 3: Vectorized
array_1 = np.array(list_1)
array_2 = np.array(list_2)

print(array_1 + array_2)

[5, 7, 9]
[5, 7, 9]
[5 7 9]


## Broadcasting

Broadcasting in NumPy allows arrays of different shapes to be combined in arithmetic operations. It's a powerful feature that enables vectorized operations without explicitly creating multiple copies of values to match array shapes.

In [None]:
import numpy as np

# Example 1: Scalar-array broadcasting
a = np.array([1, 2, 3])
b = 2
print("Scalar-array broadcasting:")
print(f"a: {a}")
print(f"b: {b}")
print(f"a + b: {a + b}\n")

# Example 2: One-dimensional array with a two-dimensional array
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([10, 20, 30])
print("1D array with 2D array broadcasting:")
print(f"A:\n{A}")
print(f"b: {b}")
print(f"A + b:\n{A + b}\n")

# Example 3: Two-dimensional arrays with different shapes
c = np.array([[0, 1, 2, 3], [4, 5, 6, 7]]) # Shape (2, 4)
d = np.array([[10], [20]]) # Shape (2, 1)
print("2D arrays with different shapes broadcasting:")
print(f"c:\n{c}")
print(f"d:\n{d}")
print(f"c + d:\n{c + d}\n")

# Example 4: Applying broadcasting with a function (e.g., multiplication)
e = np.array([[1, 2, 3], [4, 5, 6]])
f = np.array([10, 100, 1000])
print("Broadcasting with multiplication:")
print(f"e:\n{e}")
print(f"f: {f}")
print(f"e * f:\n{e * f}\n")

Scalar-array broadcasting:
a: [1 2 3]
b: 2
a + b: [3 4 5]

1D array with 2D array broadcasting:
A:
[[1 2 3]
 [4 5 6]
 [7 8 9]]
b: [10 20 30]
A + b:
[[11 22 33]
 [14 25 36]
 [17 28 39]]

2D arrays with different shapes broadcasting:
c:
[[0 1 2 3]
 [4 5 6 7]]
d:
[[10]
 [20]]
c + d:
[[10 11 12 13]
 [24 25 26 27]]

Broadcasting with multiplication:
e:
[[1 2 3]
 [4 5 6]]
f: [  10  100 1000]
e * f:
[[  10  200 3000]
 [  40  500 6000]]



In [None]:
import numpy as np
import time

In [None]:
# normalize a stack of images: 10 images, each 1024x1024 pixels
images = np.random.rand(10, 1000, 1000)
# start = time.time()
# normalization_array = np.full((images.shape[1], images.shape[2]), 255)
# images_normalized = images / normalization_array
# print(f"Time taken: {time.time() - start} seconds")

start = time.time()
images_normalized = images / 255
print(f"Time taken: {time.time() - start} seconds")

NameError: name 'np' is not defined

In [None]:
#when broadcasting does NOT work

In [None]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5, 5])
a + b

ValueError: operands could not be broadcast together with shapes (3,) (4,) 

In [None]:
# example: add 5 to each element of an array with and without broadcasting
# with and without vectorization

Now let's imagine we have an actual dataset: the lengths and widths of each finger of a person (2 hands, 5 fingers, 2 dimensions = 20 features) for 1000 people (say for a gender identification problem):

In [None]:
data = np.random.random((1000, 20))

In [None]:
data

array([[0.26448134, 0.00881413, 0.49117747, ..., 0.13602134, 0.16028975,
        0.03476053],
       [0.86035142, 0.94451949, 0.55132118, ..., 0.51617595, 0.02431045,
        0.80664507],
       [0.49300363, 0.56577689, 0.76069481, ..., 0.3029379 , 0.41736729,
        0.59385271],
       ...,
       [0.4448942 , 0.964709  , 0.88408574, ..., 0.03461507, 0.09639768,
        0.53023355],
       [0.45364905, 0.65820559, 0.05431071, ..., 0.23601067, 0.48359205,
        0.50894071],
       [0.35576803, 0.2907447 , 0.86017606, ..., 0.99392418, 0.13159682,
        0.26026748]])

In [None]:
# implement standard scaling
means = np.mean(data, axis=0)
stds = np.std(data, axis=0)

data_scaled = (data - means) / stds

In [None]:
data_scaled

array([[-0.77794142, -1.71981365, -0.02874793, ..., -1.25599327,
        -1.17966701, -1.58023947],
       [ 1.25535492,  1.54875492,  0.18152448, ...,  0.0910882 ,
        -1.65630173,  1.02091391],
       [ 0.00184859,  0.2257464 ,  0.91352949, ..., -0.66452286,
        -0.27855871,  0.30383055],
       ...,
       [-0.16231593,  1.61928011,  1.3449247 , ..., -1.61532734,
        -1.40362153,  0.08944201],
       [-0.13244164,  0.54861464, -1.55610672, ..., -0.90168014,
        -0.04642763,  0.01768781],
       [-0.46644248, -0.73498505,  1.26133247, ...,  1.78399358,
        -1.28024148, -0.82030954]])

In [None]:
np.std(data_scaled, axis=0)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1.])