**This notebook covers examples of working with vectors using numpy**

In [2]:
# Import required libraries
import numpy as np
import time

In [17]:
# Let's create out vector and look at various properties of the vector
# In below examples numpy arrays take shape of the array and instantiate them

# define 1d zeros array with 4 elements for one d array
arr_zeros = np.zeros(4)
print(f"Size of 1d zeros array is: {arr_zeros.size}\nShape of 1d zeros array is: {arr_zeros.shape}\nDimensions of 1d zeros array is: {arr_zeros.ndim}\nData type of 1d zeros array is: {arr_zeros.dtype}\nValues of 1d zeros array is: {arr_zeros}\n")

# define 2d zeros array with 16 elements for one 2d array
arr_zeros = np.zeros((4,4))
print(f"Size of 2d zeros array is: {arr_zeros.size}\nShape of 2d zeros array is: {arr_zeros.shape}\nDimensions of 2d zeros array is: {arr_zeros.ndim}\nData type of 2d zeros array is: {arr_zeros.dtype}\nValues of 2d zeros array is: {arr_zeros}\n")

# define 1d random array with 4 elements
arr_rand = np.random.random_sample(4)
print(f"Size of 1d random array is: {arr_rand.size}\nShape of 1d random array is: {arr_rand.shape}\nDimensions of 1d random array is: {arr_rand.ndim}\nData type of 1d random array is: {arr_rand.dtype}\nValues of 1d random array is: {arr_rand}\n")

# define 2d random array with 4 elements
arr_rand = np.random.random_sample(4)
print(f"Size of 2d random array is: {arr_rand.size}\nShape of 2d random array is: {arr_rand.shape}\nDimensions of 2d random array is: {arr_rand.ndim}\nData type of 2d random array is: {arr_rand.dtype}\nValues of 2d random array is: {arr_zeros}\n")

# definte 1d arange array with 4 numbers starting from 0 or a specific range
arr_arange = np.arange(4)
print(f"Size of 1d arange array is: {arr_arange.size}\nShape of 1d arange array is: {arr_arange.shape}\nDimensions of 1d arange array is: {arr_arange.ndim}\nData type of 1d arange array is: {arr_arange.dtype}\nValues of 1d arange array is: {arr_arange}\n")

Size of 1d zeros array is: 4
Shape of 1d zeros array is: (4,)
Dimensions of 1d zeros array is: 1
Data type of 1d zeros array is: float64
Values of 1d zeros array is: [0. 0. 0. 0.]

Size of 2d zeros array is: 16
Shape of 2d zeros array is: (4, 4)
Dimensions of 2d zeros array is: 2
Data type of 2d zeros array is: float64
Values of 2d zeros array is: [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]

Size of 1d random array is: 4
Shape of 1d random array is: (4,)
Dimensions of 1d random array is: 1
Data type of 1d random array is: float64
Values of 1d random array is: [0.03137682 0.19061002 0.24799402 0.55075466]

Size of 2d random array is: 4
Shape of 2d random array is: (4,)
Dimensions of 2d random array is: 1
Data type of 2d random array is: float64
Values of 2d random array is: [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]

Size of 1d arange array is: 4
Shape of 1d arange array is: (4,)
Dimensions of 1d arange array is: 1
Data type of 1d arange array is: int6

In [21]:
# Here are some single vector operations

arr_1d_a = np.array([1,2,3,4,5,6])

sum_arr_1d = np.sum(arr_1d_a)

mean_arr_1d = np.mean(arr_1d_a)

print(f"Sum of 1d array is: {sum_arr_1d}\nMean of 1d array is: {mean_arr_1d}")

# Here are some multiple vector operations

arr_1d_b = np.array([2,34,54,32,2,34])

add_arr_1d = arr_1d_a + arr_1d_b
sub_arr_1d = arr_1d_b - arr_1d_a
mul_arr_1d = arr_1d_a * arr_1d_b

print(f"Sum of multiple vector arrays is: {add_arr_1d}\nSubtraction of multiple vector arrays is: {sub_arr_1d}\nMultiplication of multiple vector arrays is: {mul_arr_1d}")

Sum of 1d array is: 21
Mean of 1d array is: 3.5
Sum of multiple vector arrays is: [ 3 36 57 36  7 40]
Subtraction of multiple vector arrays is: [ 1 32 51 28 -3 28]
Multiplication of multiple vector arrays is: [  2  68 162 128  10 204]


In [22]:
# let's try to add two arrays with different shapes
arr_1d_c = np.array([1,2])

try:
    add_arr_1d = arr_1d_a + arr_1d_c
except Exception as e:
    print(f"There is an error adding arrays: {e}")

There is an error adding arrays: operands could not be broadcast together with shapes (6,) (2,) 


In [24]:
# let's now look at dot product of two vectors
# Dot product is multiplication of every corresponding index element in two vectors and sum of the resulting values

dot_product = np.dot(arr_1d_a, arr_1d_b)
print(f"Dot product of two vectors is: {dot_product}")

Dot product of two vectors is: 574


In [26]:
# now to further study the difference between processing time of NP dot vector vs traditional approach to dot product calculation

# define a function to compute dot product

def dot_product_calc(a,b):
    size_a = a.shape[0]
    size_b = b.shape[0]

    if size_a == size_b:
        dot_prod=0
        for i in range(size_a):
            ind_prod = a[i]*b[i]
            dot_prod += ind_prod
    else:
        print("Cannot calculate dot product, size of the vector arrays is not same")

    return dot_prod

In [27]:
# let's calculate dot product using our function
print(f"Dot product using custom defined function is: {dot_product_calc(arr_1d_a,arr_1d_b)}")

Dot product using custom defined function is: 574


In [33]:
# This is to ensure we get same set of random numbers everytime
np.random.seed(1)

rand_arr_1d_a = np.random.rand(1000000) # large array with random numbers
rand_arr_1d_b = np.random.rand(1000000)

start_time_np_dot = time.time()
dot_prod_np = np.dot(rand_arr_1d_a, rand_arr_1d_b)
finish_time_np_dot = time.time()

start_time_cust_dot = time.time()
dot_prod_cust = dot_product_calc(rand_arr_1d_a, rand_arr_1d_b)
finish_time_cust_dot = time.time()

print(f"Time taken to process numpy dot calculation: {finish_time_np_dot-start_time_np_dot}")
print(f"Time taken to process custom dot calculation: {finish_time_cust_dot-start_time_cust_dot}")

print(f"NP dot calculation is {(finish_time_cust_dot-start_time_cust_dot)/(finish_time_np_dot-start_time_np_dot):.4f} times faster than custom dot calculation")

# let's cleanup and remove these arrays from memory
del(rand_arr_1d_a)
del(rand_arr_1d_b)

Time taken to process numpy dot calculation: 0.0048143863677978516
Time taken to process custom dot calculation: 0.4202547073364258
NP dot calculation is 87.2914 times faster than custom dot calculation


**Vectorization provides a large speed up in this example. This is because NumPy makes better use of available data parallelism in the underlying hardware. GPU's and modern CPU's implement Single Instruction**

In [34]:
# let's not using vectorization to calculate dot product of a multidimensional array

arr_2d_a = np.array([[1,2,3],[2,5,6],[7,5,4]])
arr_2d_b = np.array([[2,2,3],[2,5,6],[7,5,4]])

dot_2d_arr = np.dot(arr_2d_a,arr_2d_b)

print(f"Dot product of two 2d vectors is: {dot_2d_arr}")

Dot product of two 2d vectors is: [[27 27 27]
 [56 59 60]
 [52 59 67]]
