# Matrix Matrix multiplication using MrJob

Input data should be in the form of 

i j     valuek

i j+1   valuek+1

i+1 j   valuek+2

i+1 j+1 value k+3

## Configuration 

In [None]:
mydir = "mymrjob"
%env mydir = $mydir

matA = "data/mat/matmat_3x2_A"
matB = "data/mat/matmat_2x2_B"

%env matA $matA
%env matB $matB

myscript = mydir + "/matmat.py"
%env myscript $myscript

%system mkdir -p $mydir
%env myoutput $mydir/output
%env mylog $mydir/log

In [None]:
%%writefile $myscript

import sys
import random
import numpy
import pickle

from mrjob.job import MRJob
from mrjob.compat import jobconf_from_env
# from mrjob.compat import get_jobconf_value --> MRJob < 0.5
import os


class MatMult(MRJob):

    def configure_options(self):
        super(MatMult, self).configure_options()
        self.add_passthrough_option('--A-matrix', default='A', 
            dest='Amatname')

    def parsemat(self):
        """ Return 1 if this is the A matrix, otherwise return 2"""
        # fn = get_jobconf_value('map.input.file')
        fn = jobconf_from_env('map.input.file')
        if self.options.Amatname in fn:
            return 1
        else:
            return 2

    def emit_values(self, _, line):
        mtype = self.parsemat() 
        a, b, v = line.split()

        v = float(v)
        
        if mtype == 1:
            i = int(a)
            j = int(b)
            yield j, (0, i, v)
        else:
            j = int(a)
            k = int(b)
            yield j, (1, k, v)

    def multiply_values(self, j, values):
        values_from1 = []
        values_from2 = []
        
        for v in values:
            if v[0] == 0:
                values_from1.append(v)
            elif v[0] == 1:
                values_from2.append(v)


        for (m, i, v1) in values_from1:
            for (m, k, v2) in values_from2:
                 yield (i, k), v1*v2


    def identity(self, k, v):
        yield k, v

    def add_values(self, k, values):
        yield k, sum(values)


    def steps(self):
        return [self.mr(mapper=self.emit_values,
                        reducer=self.multiply_values),
                # self.mr(mapper=self.identity,
        self.mr(reducer=self.add_values)]

if __name__ == '__main__':
    MatMult.run()

### Execute the code

In [None]:
# ! python $myscript $matA $matB 1> $myoutput 2> $mylog

### Show Output

In [None]:
# %cat $myoutput
# %cat $mylog

### Additional credits

Jure Leskovec Stanford Univ.

Anand Rajaraman Milliway Labs

Jeffrey D. Ullman Stanford Univ.