## INFOH515 Spark code
## Author: Gianluca Bontempi
## Spark implementation of the example of matrix multiplication in the INFOH515 slides "Map-reduce analytics" 


In [49]:
import numpy as np
import pwd
import getpass
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, sum
userName = getpass.getuser()
appName = pwd.getpwuid( os.getuid() )[ 0 ]


# create an instance of SparkSession
spark=SparkSession.builder.appName('s.com').getOrCreate()
sc=spark.sparkContext

In [43]:
sc.version

'3.5.5'

In [44]:
def mapstep(x):
    L=[] 
    matname=x[0]
    if matname=='M':
        i=int(x[1])
        for j in range(1,MC+1):
            L=L+[((int(i),k),(j,matname,int(x[1+j]))) for k in range(1,NC+1)]
    if matname=='N':
        j=int(x[1])
        for k in range(1,NC+1):
            L=L+[((int(i),k),(j,matname,int(x[1+k]))) for i in range(1,MR+1)]
    return(L)

In [45]:
def redstep(x):
    L=len(x)/3
    i=0
    vM=np.zeros(MC)
    vN=np.zeros(MC)
    while i <len(x):
        if x[i+1]=='M':
            vM[x[i]-1]=x[i+2]
        if x[i+1]=='N':
            vN[x[i]-1]=x[i+2]
        i=i+3
    return(np.sum(vM*vN))

In [47]:
matr.collect()

                                                                                

[['M', '1', '1', '2'],
 ['M', '2', '3', '4'],
 ['M', '3', '5', '6'],
 ['N', '1', '-1', '-2', '-3', '-4'],
 ['N', '2', '-5', '-6', '-7', '-8']]

In [46]:
matr = sc.textFile("matrix").map(lambda x : x.split(","))

In [48]:
MR=matr.filter(lambda L: L[0]=='M').count()
NR=matr.filter(lambda L: L[0]=='N').count()
MC=len(matr.filter(lambda L: L[0]=='M').take(1)[0])-2
NC=len(matr.filter(lambda L: L[0]=='N').take(1)[0])-2

In [26]:
matr.flatMap(lambda x:mapstep(x) ).collect()

[((1, 1), (1, 'M', 1)),
 ((1, 2), (1, 'M', 1)),
 ((1, 3), (1, 'M', 1)),
 ((1, 4), (1, 'M', 1)),
 ((1, 1), (2, 'M', 2)),
 ((1, 2), (2, 'M', 2)),
 ((1, 3), (2, 'M', 2)),
 ((1, 4), (2, 'M', 2)),
 ((2, 1), (1, 'M', 3)),
 ((2, 2), (1, 'M', 3)),
 ((2, 3), (1, 'M', 3)),
 ((2, 4), (1, 'M', 3)),
 ((2, 1), (2, 'M', 4)),
 ((2, 2), (2, 'M', 4)),
 ((2, 3), (2, 'M', 4)),
 ((2, 4), (2, 'M', 4)),
 ((3, 1), (1, 'M', 5)),
 ((3, 2), (1, 'M', 5)),
 ((3, 3), (1, 'M', 5)),
 ((3, 4), (1, 'M', 5)),
 ((3, 1), (2, 'M', 6)),
 ((3, 2), (2, 'M', 6)),
 ((3, 3), (2, 'M', 6)),
 ((3, 4), (2, 'M', 6)),
 ((1, 1), (1, 'N', -1)),
 ((2, 1), (1, 'N', -1)),
 ((3, 1), (1, 'N', -1)),
 ((1, 2), (1, 'N', -2)),
 ((2, 2), (1, 'N', -2)),
 ((3, 2), (1, 'N', -2)),
 ((1, 3), (1, 'N', -3)),
 ((2, 3), (1, 'N', -3)),
 ((3, 3), (1, 'N', -3)),
 ((1, 4), (1, 'N', -4)),
 ((2, 4), (1, 'N', -4)),
 ((3, 4), (1, 'N', -4)),
 ((1, 1), (2, 'N', -5)),
 ((2, 1), (2, 'N', -5)),
 ((3, 1), (2, 'N', -5)),
 ((1, 2), (2, 'N', -6)),
 ((2, 2), (2, 'N', -6)),

In [27]:
res=matr.flatMap(lambda x:mapstep(x) ).reduceByKey(lambda a,b:a+b).map(lambda a: (a[0],redstep(a[1])))

In [28]:
res.collect()

[((1, 1), -11.0),
 ((1, 2), -14.0),
 ((1, 3), -17.0),
 ((1, 4), -20.0),
 ((2, 1), -23.0),
 ((2, 2), -30.0),
 ((2, 3), -37.0),
 ((2, 4), -44.0),
 ((3, 1), -35.0),
 ((3, 2), -46.0),
 ((3, 3), -57.0),
 ((3, 4), -68.0)]

In [29]:
M=np.array([[1,2],[3,4],[5,6]])
N=np.array([[-1,-2,-3,-4], [-5,-6,-7,-8]])
M@N

array([[-11, -14, -17, -20],
       [-23, -30, -37, -44],
       [-35, -46, -57, -68]])