# NumPy Vectorization Speed Tests
A series of NumPy-accelerated functions with simple rewriting of functions.

Let's see what NumPy can detect on our machine.

In [None]:
import numpy as np
np.show_config()

We can now start running some calculations.

In [1]:

# importing the modules
import numpy as np
import timeit
 
# vectorized sum
print(np.sum(np.arange(15000)))
 
print("Time taken by vectorized sum : ", end = "")
%timeit np.sum(np.arange(15000))
 
# iterative sum
def iterative_sum(n):
    total = 0
    for item in range(0, n):
        total += item
    a = total
    # print("\n" + str(a))
    return a

 
print("Time taken by iterative sum : ", end = "")
%timeit iterative_sum(15000)

112492500
Time taken by vectorized sum : 17.2 µs ± 361 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
Time taken by iterative sum : 782 µs ± 29.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [29]:

# importing the modules
import numpy as np
import timeit
import math
 
# vectorized operation
print("Time taken by vectorized operation : ", end = "")
%timeit np.exp(np.arange(150))
 
# non-vectorized operation
print("Time taken by non-vectorized operation : ", end = "")
%timeit [math.exp(item) for item in range(150)]

Time taken by vectorized operation : 2.77 µs ± 129 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Time taken by non-vectorized operation : 25.5 µs ± 645 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [30]:
import numpy as np
import timeit
import math

c = np.add(np.random.rand(15000, 0), np.random.rand(15000, 0))
print("Vectorized:")
%timeit c

import random 

c = []
for i in range(15000):
    a = random.random()
    b = random.random()
    c.append(a + b)
print("Iterative:")
%timeit c

Vectorized:
20.6 ns ± 1.35 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
Iterative:
20.1 ns ± 0.996 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [31]:
def pythonsum(n):
    a = list(range(n))
    b = list(range(n))
    c = []

    for i in range(len(a)):
        a[i] = i ** 2
        b[i] = i ** 3
        c.append(a[i] + b[i])

    return c

def numpysum(n):
    a = np.arange(n) ** 2
    b = np.arange(n) ** 3
    c = np.add(a, b)

    return c

%timeit pythonsum(1000)
%timeit numpysum(1000)

904 µs ± 25 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
8.47 µs ± 85.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [32]:
import dis

In [33]:
dis.Bytecode(numpysum)

Bytecode(<function numpysum at 0x000001AF3E4B8708>)

In [5]:
%%timeit
out = np.empty(mat.shape[1]-1)
for i in range(mat.shape[1]-1):
    out[i] = dot(mat[:,i], mat[:,-1])/(norm(mat[:,i])*norm(mat[:,-1]))


13.9 ms ± 350 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:

%%timeit   
p1 = mat[:,-1].dot(mat[:,:-1])
p2 = norm(mat[:,:-1],axis=0)*norm(mat[:,-1])
out1 = p1/p2

# @yatu's soln
a = mat

1.47 ms ± 98.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
%%timeit 
cosine_similarity(a[None,:,-1] , a.T[:-1])

NameError: name 'cosine_similarity' is not defined

In [9]:
%%timeit
p1 = mat[:,-1].dot(mat[:,:-1])
p2 = np.sqrt(np.einsum('ij,ij->j',mat[:,:-1],mat[:,:-1]))*norm(mat[:,-1])
out1 = p1/p2

860 µs ± 34.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
