# Parallel matrix multiplication

Note: Each result cell is calculated by reduction instead of accumulating the sums

In [None]:
import pycompss.interactive as ipycompss

In [None]:
# Start PyCOMPSs runtime with graph and tracing enabled
ipycompss.start(graph=True, trace=True, monitor=2000)

In [None]:
from pycompss.api.task import task
from pycompss.api.parameter import *
import numpy as np
import os

## Task definitions

In [None]:
@task(returns=list)
def createBlock(BSIZE, res, MKLProc):
    os.environ["MKL_NUM_THREADS"]=str(MKLProc)
    if res:
        block = np.array(np.zeros((BSIZE, BSIZE)), dtype=np.double, copy=False)
    else:
        block = np.array(np.random.random((BSIZE, BSIZE)), dtype=np.double,copy=False)
    mb = np.matrix(block, dtype=np.double, copy=False)
    return mb

@task(c=INOUT)
def multiply(a, b, c, MKLProc):
    os.environ["MKL_NUM_THREADS"]=str(MKLProc)
    c += a * b

@task(returns=list)
def dot(A,B,transposeResult=False,transposeB=False):
    if transposeB:
        B = np.transpose(B)
    if transposeResult:
        return np.transpose(np.dot(A,B))
    return np.dot(A,B)

@task(returns=list)
def sumList(A):
    B = A[0]
    for i in range(1,len(A)):
        B += A[i]
    return B

@task(returns=list)
def sumList4(A,B,C,D):
    return A + B + C + D

@task(returns=list)
def sumList2(A,B):
    return A + B

## Functions

In [None]:
def initialize_variables(MKLProc):
    for matrix in [A, B]:
        for i in range(MSIZE):
            matrix.append([])
            for j in range(MSIZE):
                mb = createBlock(BSIZE, False, MKLProc)
                matrix[i].append(mb)

In [None]:
def reduceSum(A, amount = 4):
    if len(A) == 1:
        return A[0]
    if len(A) == 2:
        return sumList2(A[0],A[1])
    if len(A) == 4:
        return sumList4(A[0],A[1],A[2],A[3])
    if len(A) < (amount + 1):
        return sumList(A)
    listToReduce = []
    for i in range(0, len(A), amount):
        listToSum = []
        for j in range(i, min(len(A), i + amount)):
            listToSum.append(A[j])
        listToReduce.append(reduceSum(listToSum))
    return reduceSum(listToReduce)

In [None]:
def reduceSumGen(A, amount = 4):
    if len(A) < (amount + 1):
        return sumList(A)
    listToReduce = []
    for i in range(0, len(A), amount):
        listToSum = []
        for j in range(i, min(len(A), i + amount)):
            listToSum.append(A[j])
        listToReduce.append(reduceSumGen(listToSum))
    return reduceSumGen(listToReduce)

In [None]:
def multiplyBlocked(A,B,BSIZE,MKLProc, transposeB = False):
    if transposeB:
        newB=[]
        for i in range(len(B[0])):
            newB.append([])
            for j in range(len(B)):
                newB[i].append(B[j][i])
        B = newB
    C = []
    for i in range(len(A)):
        C.append([])
        for j in range(len(B[0])):
            listToSum = []
            for k in range(len(A[0])):
                listToSum.append(dot(A[i][k], B[k][j], transposeB=transposeB))
            C[i].append(reduceSum(listToSum))
    return C

## MAIN Code

Parameters (that can be configured in the following cell):
* MSIZE: Matrix size (default: 8)
* BSIZE: Block size (default: 16)
* MKLProc: Number of MKL processes (default: 1)

In [None]:
import time
begginingTime = time.time()

from pycompss.api.api import compss_wait_on, compss_barrier

MSIZE = 8
BSIZE = 16
MKLProc = 1
A = []
B = []

startTime = time.time()

initialize_variables(MKLProc)

compss_barrier()

initTime = time.time() - startTime
startMulTime = time.time()

C = multiplyBlocked(A,B,BSIZE,MKLProc)
C = compss_wait_on(C)

mulTime = time.time() - startMulTime
mulTransTime = time.time() - startMulTime
totalTime = time.time() - startTime
totalTimeWithImports = time.time() - begginingTime

In [None]:
# Plot Result
print "PARAMS:------------------"
print "MSIZE:{}".format(MSIZE)
print "BSIZE:{}".format(BSIZE)
print "initT:{}".format(initTime)
print "multT:{}".format(mulTime) 
print "mulTransT:{}".format(mulTransTime)
print "totalTime:{}".format(totalTime)
import pprint
pprint.pprint(C)

In [None]:
ipycompss.stop()