Vectorization for utilization of parallel computing advantages

In [11]:
import numpy as np
import time

a = np.array([1, 2,3, 4])
print(a)


[1 2 3 4]


Percentage improvement of vectorization vs for loop

In [10]:
size = 1000000
a = np.random.rand(size)
b = np.random.rand(size)

tic = time.time()
c1 = np.dot(a,b)
toc = time.time()
dt1 = 1000*(toc-tic)

print("Vecotrized version: " + str(dt1) + "ms")

c2 = 0
tic = time.time()
for i in range(size):
    c2 += a[i]*b[i]
toc = time.time()
dt2 = 1000*(toc - tic)
vp = dt2/dt1
print("Vectorized time: " + str(dt1) + "ms" + "\nLoop time: " + str(dt2) + "ms" + "\nPercent improvement: " + str(int(vp+100)))
print("Do values match? " + str(c1) + " = " + str(c2))

Vecotrized version: 13.964176177978516ms
Vectorized time: 13.964176177978516ms
Loop time: 6596.362352371216ms
Percent improvement: 572
Do values match? 2499848.0880826344 = 2499848.088082611


Efficient numpy operators 

In [13]:
# Log of matrix
np.log(a)
# Absolute values
np.abs(a)
# Maximum wrt floor
np.maximum(a,0) # Floor of 0


array([0.        , 0.69314718, 1.09861229, 1.38629436])

Broadcasting example - Broadcasting occurs anytime python has to expand a matrix or constant to suit the operation. i.e. [1 2 3] + 100 = [1 2 3] + [100 100 100] = [101 102 103]

In [14]:
import numpy as np

A = np.array([[56.0, 0.0, 4.0, 68.0], [1.2, 104.0, 52.0, 8.0], [1.8, 135.0, 99.0, 0.9]])


In [15]:
cal = A.sum(axis=0) # axis 0 is to sum vertically 1 is to sum horizontally
print(cal)

[ 59.  239.  155.   76.9]


In [16]:
percentage = 100*A/cal.reshape(1,4) # reshape is redundant here O(1) efficency, very cheap to call
print(percentage) # Broadcasting occured by the cal matrix being expanded row wise to match the dimensions of A

[[94.91525424  0.          2.58064516 88.42652796]
 [ 2.03389831 43.51464435 33.5483871  10.40312094]
 [ 3.05084746 56.48535565 63.87096774  1.17035111]]


In [24]:
import numpy as np

a = np.random.randn(5) # Rank 1 array (5,)
b = np.random.randn(5,1) # Commit to shaping vectors like this to ensure they are column or row vectors (5, 1) or (1, 5)


In [25]:

print(a)
print(b)

[ 0.49326035  1.1020425  -0.07130365 -0.39798997  0.00448504]
[[ 0.06497221]
 [ 0.75339259]
 [ 0.20001257]
 [ 0.3065863 ]
 [-0.33932825]]


In [26]:
print(a.shape)
print(b.shape)

(5,)
(5, 1)


In [27]:
print(a.T)
print(b.T)

[ 0.49326035  1.1020425  -0.07130365 -0.39798997  0.00448504]
[[ 0.06497221  0.75339259  0.20001257  0.3065863  -0.33932825]]


In [29]:
print(np.dot(a,a.T))
print(np.dot(b,b.T))

1.621303779241168
[[ 0.00422139  0.04894958  0.01299526  0.01991959 -0.02204691]
 [ 0.04894958  0.5676004   0.15068799  0.23097985 -0.25564739]
 [ 0.01299526  0.15068799  0.04000503  0.06132112 -0.06786992]
 [ 0.01991959  0.23097985  0.06132112  0.09399516 -0.10403339]
 [-0.02204691 -0.25564739 -0.06786992 -0.10403339  0.11514366]]


In [32]:
a = a.reshape(5,1)
assert(a.shape == (5,1))
print(np.dot(a, a.T))

[[ 2.43305769e-01  5.43593865e-01 -3.51712624e-02 -1.96312669e-01
   2.21229436e-03]
 [ 5.43593865e-01  1.21449767e+00 -7.85796511e-02 -4.38601857e-01
   4.94270910e-03]
 [-3.51712624e-02 -7.85796511e-02  5.08421030e-03  2.83781367e-02
  -3.19800001e-04]
 [-1.96312669e-01 -4.38601857e-01  2.83781367e-02  1.58396013e-01
  -1.78500251e-03]
 [ 2.21229436e-03  4.94270910e-03 -3.19800001e-04 -1.78500251e-03
   2.01156197e-05]]
