In [86]:
from collections import Counter
import numpy as np
from threading import Thread
from multiprocessing import Process, Manager, freeze_support, set_start_method, Array, Queue
from time import time, sleep
import concurrent.futures
from mat_mult_helper import mat_mult_worker
from mat_mult_helper_2 import mat_mult_worker_2
import multiprocessing, math
import pandas as pd

In [87]:
def mat_mult_sequential(A,B):
    N = len(A)
    C = np.zeros(shape = (N,N)) 
    for i in range(N):
        for j in range(N):
            for k in range(N):
                C[j,k]+=(A[i,j]*B[j,k])
    return C

In [88]:
def mat_mult_slice(A, B, C, start, end):
    N = len(A)
    for i in range(start,end):
        for j in range(N):
            for k in range(N):
                C[j,k]+=(A[i,j]*B[j,k])

def mat_mult_threads(A,B,cores):
    N = len(A)
    steps = math.ceil(N//cores)#number of rows should be processed by each thread
    start = 0
    threads = []
    C = np.zeros(shape = (N,N))

    if __name__ == '__main__':
        while start<N:
            end = N if start+steps > N else start+steps
            t = Thread(target=mat_mult_slice, args = [A,B,C,start,end])
            threads.append(t)
            start = end #update row number for the start of next process
        for t in threads: t.start()
        for t in threads: t.join()
    return C

In [93]:
def add_vector_multiprocess_shared_object(A,B,cores):
    N = len(A)
    #res = []
    steps = N//cores
    start = 0
    processes = []
    
    if __name__ == '__main__':
        manager = multiprocessing.Manager()
        return_dict = manager.dict()
        for c in range(cores):
            end = start+steps
            if cores-c==1:end = N
            p = Process(target=mat_mult_worker_2, args = [A[start:end],B[:,start:end],c,return_dict])
            processes.append(p)
            start = end
        for p in processes: p.start()
        for p in processes: p.join()
        res_list = []
        #for c in range(cores):print(return_dict[c])
        #for c in range(cores):res_list+=return_dict.get()
        #print(return_dict.values())
        return res_list

In [94]:
def mat_mult_multiprocess_not_shared_object(A,B,cores):
    N = len(A)
    #res = []
    steps = N//cores
    start = 0
    processes = []
    
    if __name__ == '__main__':
        #manager = multiprocessing.Manager()
        #return_dict = manager.dict()
        C = np.zeros(shape = (N,N))
        for c in range(cores):
            end = start+steps
            if cores-c==1:end = N
            p = Process(target=mat_mult_worker, args = [A[start:end],B[:,start:end],C[start:end]])
            processes.append(p)
            start = end
        for p in processes: p.start()
        for p in processes: p.join()

        #print(return_dict.values())
        return C

In [95]:
def compare_proc_sizes_for_N(N):
    procs = [1,2,3,4]
    df_compare_add_value = pd.DataFrame(columns=['Process count','Sequential','Multithreading','Multiprocessing with sharing objects','Multiprocessing without sharing objects'])
    A = np.random.random(size = (N,N))
    B = np.random.random(size = (N,N))
    for i in range(len(procs)):
        proc = procs[i]
        t0 = time()
        C = mat_mult_sequential(A,B)
        t1 = time()
        print(C)
    
        D = mat_mult_threads(A,B,proc)
        t2 = time()
        print(D)

        E = add_vector_multiprocess_shared_object(A,B,proc)
        t3 = time()
        #print(E)
        
        F = mat_mult_multiprocess_not_shared_object(A,B,proc)
        t4 = time()
        #print(E)
        df_compare_add_value.loc[i] = [proc, t1-t0, t2-t1, t3-t2, t4-t3]
    return df_compare_add_value


In [None]:
df_pow_4 = compare_proc_sizes_for_N(pow(2,1))
df_pow_4

[[0.33813448 0.64577329]
 [0.52599353 0.94543969]]
[[0.33813448 0.64577329]
 [0.52599353 0.94543969]]
[[0.33813448 0.64577329]
 [0.52599353 0.94543969]]
[[0.33813448 0.64577329]
 [0.52599353 0.94543969]]
[[0.33813448 0.64577329]
 [0.52599353 0.94543969]]


In [None]:
df_pow_5 = compare_proc_sizes_for_N(pow(10,2))
df_pow_5

In [326]:
df_pow_6 = compare_proc_sizes_for_N(pow(10,6))
df_pow_6

Unnamed: 0,Process count,Sequential,Multithreading,Multiprocessing with sharing objects,Multiprocessing without sharing objects
0,1.0,0.585214,0.561351,11.08202,0.899464
1,2.0,0.561581,0.56654,8.379571,0.948914
2,3.0,0.597298,0.601421,8.189399,1.178641
3,4.0,0.580389,0.710423,9.070524,1.354424


In [None]:
df_pow_7 = compare_proc_sizes_for_N(pow(10,7))
df_pow_7

In [276]:
df_pow_8 = compare_proc_sizes_for_N(pow(10,8))
df_pow_8

KeyboardInterrupt: 

In [130]:
print(C)

[1.54489853 1.61018871 0.48390481 1.75916574 0.70603675 1.7126731
 0.14334312 0.92907731 1.37482789 1.67252468]


In [131]:
print(pow(10,3))

[1.5448985331252656, 1.610188706241756, 0.4839048070721472, 1.759165738468547, 0.7060367455114349, 1.7126730996234998, 0.14334311523055587, 0.9290773071473577, 1.3748278851300046, 1.6725246813069532]


In [14]:
t0 = time()
C = add_vector(A,B)
#func1(3)
t1 = time()
print('not parrallel:',t1-t0)

    #with Manager() as manager:
        #A_shared = manager.list(A)
        #B_shared = manager.list(B)
        #D = manager.list([0]*N)
D = [0]*N  
t2 = time()
add_multi_process(A,B,D,4)
t3 = time()
print('parrallel:',t3-t2)

not parrallel: 0.0002009868621826172


NameError: name 'add_multi_process' is not defined

In [52]:
print(C,E)

[1.0850019  1.87754641 0.52194858 0.40853868 0.93636873 1.07910797
 1.24122817 0.76609194 1.08210379 1.2959469  1.43129455] [1.0850019  1.87754641 0.52194858 0.40853868 0.93636873 1.07910797
 1.24122817 0.76609194 1.08210379 1.2959469  1.43129455]


In [359]:
print(C)

[1.16788693 0.61783093 0.98638013 1.82080089 1.07605383 1.24048252
 0.90385686 1.6390764  0.85667249 0.71656312 1.41557084 0.87900411
 1.32912766 1.0692364  0.79694739 0.38401101 0.86145394 0.83056056
 1.06644518 0.78442853 1.89314258 1.89854683 0.74920631 1.34428936
 0.39913881 1.65981095 1.3198484  0.94434423 1.78249225 1.38073934
 1.0747033  0.73831081 0.40790519 0.96883055 0.89734381 1.14305428
 1.32506025 0.23638513 0.6240125  0.20283083 0.7823441  1.07716659
 1.11497112 0.92270841 1.05455892 1.21246526 0.85039439 0.23381994
 1.07360646 0.34845549 1.98017796 1.30247985 0.41179324 0.98363706
 1.09128912 1.261082   1.1202726  1.34714614 1.10087836 0.84766004
 1.00903042 1.60373388 1.49257549 0.95248656 1.54763746 1.43594051
 0.87877532 1.84015169 1.62780161 1.22973023 0.82147271 1.10298336
 0.53552056 1.50807096 0.77566597 1.25572493 0.81379625 0.96838748
 1.07855494 0.26722157 1.04371699 0.37529921 1.19909075 0.86329346
 1.26548399 1.30639293 0.37740212 1.59751094 1.02916214 0.6414

In [250]:
t0 = time()
C = add_vector(A,B)
#func1(3)
t1 = time()
print('not parrallel:',t1-t0)
tx = time()
if __name__ == '__main__':
    with Manager() as manager:
        A_shared = manager.list(A)
        B_shared = manager.list(B)
        D = manager.list([0]*N)
        #D = [0]*N  
        t2 = time()
        add_vector_parallel(A,B,D,4)
        t3 = time()
        print('parrallel:',t3-t2)
        #print(D)
ty = time()
#add_multi_process(A,B,D,4)
#func2(3)
print('process:',ty-tx)


not parrallel: 0.0006289482116699219
parrallel: 0.9105610847473145
process: 1.5917930603027344


In [109]:
from platform import python_version

print(python_version())

3.8.5
