In [4]:
import numpy as np

In [5]:
n = 100*1000*1000
d = np.random.rand(n)


In [6]:
d.shape

(100000000,)

In [7]:
d.size

100000000

In [8]:
import time

In [9]:
def mean():
    sum = 0
    for i in range(n):
        sum+=d[i]

    mean = sum/n
    return mean



In [10]:
d

array([0.85184116, 0.39522955, 0.92004503, ..., 0.10949249, 0.61450894,
       0.24436804])

In [11]:
start_time = time.time()
m = mean()
end_time = time.time()

print(m)
print(end_time-start_time)

0.49998164361915
19.043489933013916


In [12]:
d.dtype

dtype('float64')

## Multi-processing

In [16]:
from multiprocessing import Process, Queue
import math

def mean_mp(start, end , queue):
    sum = 0
    for i in range(start, end+1):
        sum+=d[i]

    mean = sum/(end-start+1)
    queue.put(mean)
    return

split = math.floor(n/2)

q = Queue()

p1 = Process(target=mean_mp(0, split, q))
p2 = Process(target=mean_mp(split+1, n-1, q))


start_time = time.time()

p1.start()
p2.start()

p1.join()
p2.join()

m = 0

while not q.empty():
    m+=q.get()

m/=2

end_time = time.time()

print(m)
print(end_time-start_time)

0.499981643619535
0.16280531883239746


## Multi-threading

In [19]:
from threading import Thread

means = [0, 0]

def mean_mt(start, end, thread_num):
    sum = 0
    for i in range(start, end+1):
        sum = sum + d[i]

    mean = sum / (end-start+1)
    means[thread_num] = mean
    return

thread_1 = Thread(target=mean_mt, args=(0, split, 0))
thread_2 = Thread(target=mean_mt, args=(split+1, n-1, 1))

start_time = time.time()

thread_1.start()
thread_2.start()

thread_1.join()
thread_2.join()

m = means[0]+means[1]
m=m/2

end_time = time.time()

print(m)
print(end_time-start_time)

0.499981643619535
20.080923080444336


## JobLib

In [20]:
!pip install joblib



### Caching

In [25]:
from joblib import Memory
cache_dir = './cache/'
memory = Memory(cache_dir)

mat_a = np.vander(np.arange(3)).astype(float)
sqare = memory.cache(np.square)

print(mat_a)

mat_b = sqare(mat_a)
print(mat_b)

[[0. 0. 1.]
 [1. 1. 1.]
 [4. 2. 1.]]
[[ 0.  0.  1.]
 [ 1.  1.  1.]
 [16.  4.  1.]]


### Loop parallel programming

In [26]:
from math import sqrt

def fn(i):
    x = 10000
    p = 1
    for j in range(x):
        for k in range(j):
            p*=k
    return sqrt(i**2)

n = 10

start_time = time.time()

for i in range(n):
    fn(i)

end_time = time.time()

print(end_time-start_time)

25.98787546157837


In [28]:
from joblib import Parallel, delayed

start_time = time.time()

Parallel(n_jobs=2)(delayed(fn)(i) for i in range(n))

end_time = time.time()

print(end_time-start_time)

16.9275906085968


In [29]:
start_time = time.time()

Parallel(n_jobs=2, prefer="threads")(delayed(fn)(i) for i in range(n))

end_time = time.time()

print(end_time-start_time)

28.03768801689148


In [30]:
from joblib import Parallel, delayed

start_time = time.time()

Parallel(n_jobs=-1)(delayed(fn)(i) for i in range(n))

end_time = time.time()

print(end_time-start_time)

12.169411897659302


## NumPy vs Numba - *Matrix Multiplication*

In [31]:
!pip install numba



In [33]:
from numba import njit, prange

@njit(parallel=True)
def matrix_multiplication(matrix_a, matrix_b):
    assert matrix_a.shape[1]==matrix_b.shape[0]
    result = np.zeros((matrix_a.shape[0], matrix_b.shape[1]), )
    for i in prange(matrix_a.shape[0]):
        for k in prange(matrix_a.shape[1]):
            for j in prange(matrix_b.shape[1]):
                result[i,j] += matrix_a[i,k]*matrix_b[k,j]
    return result

m, n, c = 1000, 1500, 1200

matrix_a = np.random.randint(1,50,size=(m,n))
matrix_b = np.random.randint(1,50,size=(n,c))


start_time = time.time()

result = matrix_multiplication(matrix_a, matrix_b)

end_time = time.time()

print(end_time-start_time)

2.5895557403564453


In [34]:
start_time = time.time()

np.matmul(matrix_a, matrix_b)

end_time = time.time()

print(end_time-start_time)

5.2315332889556885


# DO NOT RUN

> **CAUTION**
This code takes around **595 sec** on i9, 12th gen
This code takes around **22 min** on i5, 10th gen


## PLEASE NOTICE

> ***WARNING*** **If you are using your sys locally**: This will take a long time. ***DO NOT RUN ON LAPTOP/DESKTOP***

In [None]:
def standard_matrix_multiplication(matrix_a, matrix_b):
    assert matrix_a.shape[1]==matrix_b.shape[0]
    result = np.zeros((matrix_a.shape[0], matrix_b.shape[1]), )
    for i in prange(matrix_a.shape[0]):
        for k in prange(matrix_a.shape[1]):
            for j in prange(matrix_b.shape[1]):
                result[i,j] += matrix_a[i,k]*matrix_b[k,j]
    return result

start_time = time.time()

matrix_multiplication(matrix_a, matrix_b)

end_time = time.time()

print(end_time-start_time)