# Lab 5: NumPy and Vectorization

Reference: NumPy.org

In [5]:
import numpy as np
import time

## Vector Creation

Data creation routines in NumPy will generally have a first parameter which is the shape of the object. This can either be a single value for a 1-D result or a tuple (n,m,...) specifying the shape of the result. 

In [20]:
some_vector = np.zeros(4)
print(f"\nvector: {some_vector}; shape: {some_vector.shape}; data type: {some_vector.dtype}")

some_vector = np.zeros((4, ))
print(f"\nvector: {some_vector}; shape: {some_vector.shape}; data type: {some_vector.dtype}")

some_vector = np.random.random_sample(4)
print(f"\nvector: {some_vector}; shape: {some_vector.shape}; data type: {some_vector.dtype}")

some_vector = np.arange(4.)
print(f"\nvector: {some_vector}; shape: {some_vector.shape}; data type: {some_vector.dtype}")

some_vector = np.random.rand(4)
print(f"\nvector: {some_vector}; shape: {some_vector.shape}; data type: {some_vector.dtype}")

some_vector = np.array([5, 4, 3, 2])
print(f"\nvector: {some_vector}; shape: {some_vector.shape}; data type: {some_vector.dtype}")

some_vector = np.array([5., 4, 3, 2])
print(f"\nvector: {some_vector}; shape: {some_vector.shape}; data type: {some_vector.dtype}")


vector: [0. 0. 0. 0.]; shape: (4,); data type: float64

vector: [0. 0. 0. 0.]; shape: (4,); data type: float64

vector: [0.56005463 0.357479   0.17670795 0.73894208]; shape: (4,); data type: float64

vector: [0. 1. 2. 3.]; shape: (4,); data type: float64

vector: [0.7765739  0.08746815 0.41262386 0.23463003]; shape: (4,); data type: float64

vector: [5 4 3 2]; shape: (4,); data type: int64

vector: [5. 4. 3. 2.]; shape: (4,); data type: float64


## Vector Access

In [30]:
#vector indexing operations on 1-D vectors
arange_vector = np.arange(10)
print(f"vector: {arange_vector}")
print(f"vector shape: {arange_vector.shape}")
print(f"element: {arange_vector[2]}; element shape: {arange_vector[2].shape}")

vector: [0 1 2 3 4 5 6 7 8 9]
vector shape: (10,)
element: 2; element shape: ()


In [26]:
#vector slicing operations
v = np.arange(10)
print(f"full vector:{v}")

#access 5 consecutive elements (start:stop:step)
single_step_slice = v[2:7:1]
print("v[2:7:1] = ", single_step_slice)

# access 3 elements separated by two 
double_step_slice = v[2:7:2]
print("v[2:7:2] = ", double_step_slice)

full vector:[0 1 2 3 4 5 6 7 8 9]
v[2:7:1] =  [2 3 4 5 6]
v[2:7:2] =  [2 4 6]


## Vector Operations

In [36]:
x = np.array([1, 2, 3, 4])
print(x)

negated = -x
print(negated)

the_sum = np.sum(x)
print(the_sum)

the_mean = np.mean(x)
print(the_mean)

squared = x**2
print(squared)

multiplied = 5 * x
print(multiplied)

[1 2 3 4]
[-1 -2 -3 -4]
10
2.5
[ 1  4  9 16]
[ 5 10 15 20]


### Element-wise operations

In [54]:
v1 = np.array([1, 2, 3, 4])
v2 = np.array([-1, 4, 3, 2])
added = a + b
print(f"\nElement-wise addition: {added}")


try:
    v3 = np.array([2, 3, 4])
    mismatch = a + d
except Exception as e:
    print(f"\nVectors of different lengths fail with exception: {e}")
    


Element-wise addition: [1 3 5 7]

Vectors of different lengths fail with exception: operands could not be broadcast together with shapes (4,) (3,) 


#### Sequential Dot Product Calculation(slow)

In [56]:
def sequential_dot_product(a, b):
    dot_product = 0

    for i in range(a.shape[0]):
        dot_product += a[i] * b[i]

    return dot_product

sequential_result = sequential_dot_product(v1, v2)
print(sequential_result)

24


#### Parallelized Dot Product Calculation (fast)

In [62]:
parallelized_result = np.dot(v1, v2)
print(parallelized_result)

reversed_result = np.dot(v2, v1)
print(reversed_result)

assert sequential_result == parallelized_result
assert parallelized_result == reversed_result

24
24


#### Speed comparison of Sequential vs Parallelized Dot Product

In [66]:
# Create very large arrays
np.random.seed(1)
v8 = np.random.rand(10000000)
v9 = np.random.rand(10000000)

# Run sequential method
start1 = time.time()
sequential = sequential_dot_product(v8, v9)
end1 = time.time()
print(f"Sequential method consumed {end1 - start1} ms")

# Run parallelized method
start2 = time.time()
parallelized = np.dot(v8, v9)
end2 = time.time()
print(f"Parallelized method consumed {end2 - start2} ms")

# Tear Down
del(v8)
del(v9)

Sequential method consumed 1.4156959056854248 ms
Parallelized method consumed 0.004889249801635742 ms


## Matrices
* NumPy uses brackets to denote each dimension.
* When printing, NumPy prints one row per line.
* By convention: m = number of rows = count of training examples
* By convention: n = number of columns = count of features/variables

In [80]:
matrix1 = np.zeros((1, 5))
print(f"\nmatrix1: {matrix1}\nshape: {matrix1.shape}")

matrix2 = np.zeros((3, 1))
print(f"\nmatrix2: {matrix2}\nshape: {matrix2.shape}")

matrix3 = np.zeros((1, 1))
print(f"\nmatrix3: {matrix3}\nshape: {matrix3.shape}")


matrix1: [[0. 0. 0. 0. 0.]]
shape: (1, 5)

matrix2: [[0.]
 [0.]
 [0.]]
shape: (3, 1)

matrix3: [[0.]]
shape: (1, 1)


In [84]:
manual_matrix = np.array([[5], [4], [3]])
print(f"\nmatrix4: {manual_matrix}\nshape: {manual_matrix.shape}")


matrix4: [[5]
 [4]
 [3]]
shape: (3, 1)


### Reshaping Matrices

In [91]:
one_dimension = np.arange(6)
print("\n")
print(one_dimension)

# Reshape from 1D to 2D, specify both the number of rows and cols
two_dimensions_both_specified = one_dimension.reshape(3, 2)
print("\n")
print(two_dimensions_both_specified)

# Reshape from 1D to 2D, specify only 1 dimension, use -1 to autocompute the other dimension
two_dimensions_one_specified = one_dimension.reshape(-1, 2)
print("\n")
print(two_dimensions_one_specified)




[0 1 2 3 4 5]


[[0 1]
 [2 3]
 [4 5]]


[[0 1]
 [2 3]
 [4 5]]


### Slicing Matrices

In [96]:
vector5 = np.arange(20).reshape(-1, 10)
print(f"\noriginal vector: {vector5}; \nshape: {vector5.shape}")

# Access all elements
all_elements = vector5[:, :]
print(f"\nall elements: {all_elements}; \nshape: {all_elements.shape}")

# Access a single row
row1 = vector5[1,:] # this is equivalent to vector5[1]
print(f"\nrow1: {row1}; \nshape: {row1.shape}")

# Access a single column
col1 = vector5[:,1]
print(f"\ncol1: {col1}; \nshape: {col1.shape}")

# Access one part of a row
portion = vector5[1, 4:7]
print(f"\nportion: {portion}; \nshape: {portion.shape}")

# Access a block from within the matrix
block = vector5[0:2,4:7]
print(f"\nblock: {block}; \nshape: {block.shape}")


original vector: [[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]; 
shape: (2, 10)

all elements: [[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]; 
shape: (2, 10)

row1: [10 11 12 13 14 15 16 17 18 19]; 
shape: (10,)

col1: [ 1 11]; 
shape: (2,)

portion: [14 15 16]; 
shape: (3,)

block: [[ 4  5  6]
 [14 15 16]]; 
shape: (2, 3)
