# Calcul du produit matriciel avec le schéma MapReduce

In [1]:
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 21 16:29:28 2017

@author: User
"""


import json
import collections
import itertools
import numpy as np


def readData(filename):
    with open(filename, mode='r', encoding='utf-8') as file:
        for line in file:
            record = json.loads(line)
            yield(record)

#Etape 1: à partir du fichier avec les ['a',i,k,a(i,k)] et ['b',k,j,b(k,j)]
#Faire une jointure pour faire les [i,k,j,'a',a(i,k),'b',b(k,j)]
#Ensuite le calcul des sommes sur k des a(i,k)*b(k,j) sera fait en re-mappant et en re-reduisant

#['a',i,k,a(i,k)] -> key = k , value = ['a',i,a(i,k)]
#['b',k,j,b(k,j)] -> key = k , value = ['b',j,b(k,j)]            
def mapper1(data):
    #join operation : map1
    
    if data[0] == "a":
        key = data[2]
        value = [data[0],data[1],data[3]]
        
    if data[0] == "b":
        key = data[1]
        value = [data[0],data[2],data[3]]
    return [(key,value)]

#Rassembler les {k: ['a',i,a(i,k)],...,['b',j,b(k,j)]} 
def shuffle(data):
    items = collections.defaultdict(list) #crée un dictionnaire vide dont les valeurs seront des listes
        
    for k,v in itertools.chain.from_iterable(data):     #itertools.chain.from_iterable() permet de fusionner des listes 
        items[k].append(v)
            
    return items.items()
    
def reducer1(data):
    result = []
    k = data[0]
    A = []
    B = []
    for x in data[1]:
        if x[0] == 'a':
            A.append(x)
        else:
            B.append(x)
    for x in A:
        for y in B:
            i = x[1]
            j = y[1]
            result.append([(i,k,j,x[0],x[2],y[0],y[2])])
            
    result = [z[0] for z in result]
    
    return result
    

def mapper2(data):
    resultat = []
    for x_data in data:
        i = x_data[0]
        j = x_data[2]
        key = (x_data[3],x_data[5],i,j)
        value = (x_data[4],x_data[6])
        resultat.append([key,value])
    
    return resultat
    
    
def reducer2(data):
    key = data[0]
    liste_values = data[1]
    resultat = 0
    for x in liste_values:
        resultat += x[0]*x[1]
    return (key,resultat)


a = np.array ([[63, 45, 93, 32, 49],
                   [33, 0, 0, 26, 95],
                   [25, 11, 0, 60, 89],
                   [24, 79, 24, 47, 18],
                   [7, 98, 96, 27, 0]])

b = np.array([[63, 18, 89, 28, 39],
                  [59, 76, 34, 12, 6],
                  [30, 52, 49, 3, 95],
                  [77, 75, 85, 0, 0],
                  [0, 46, 33, 69, 88]])


mapped = map(mapper1,readData('matrix.json'))
shuffled = shuffle(mapped)
reduced = map(reducer1,shuffled)

mapped2 = map(mapper2,reduced)
shuffled2 = shuffle(mapped2)
reduced2 = map(reducer2,shuffled2)

list_prod = list(reduced2)
list_prod.sort()

In [2]:
print(list_prod)

[(('a', 'b', 0, 0), 11878), (('a', 'b', 0, 1), 14044), (('a', 'b', 0, 2), 16031), (('a', 'b', 0, 3), 5964), (('a', 'b', 0, 4), 15874), (('a', 'b', 1, 0), 4081), (('a', 'b', 1, 1), 6914), (('a', 'b', 1, 2), 8282), (('a', 'b', 1, 3), 7479), (('a', 'b', 1, 4), 9647), (('a', 'b', 2, 0), 6844), (('a', 'b', 2, 1), 9880), (('a', 'b', 2, 2), 10636), (('a', 'b', 2, 3), 6973), (('a', 'b', 2, 4), 8873), (('a', 'b', 3, 0), 10512), (('a', 'b', 3, 1), 12037), (('a', 'b', 3, 2), 10587), (('a', 'b', 3, 3), 2934), (('a', 'b', 3, 4), 5274), (('a', 'b', 4, 0), 11182), (('a', 'b', 4, 1), 14591), (('a', 'b', 4, 2), 10954), (('a', 'b', 4, 3), 1660), (('a', 'b', 4, 4), 9981)]


In [3]:
print(np.dot(a,b))

[[11878 14044 16031  5964 15874]
 [ 4081  6914  8282  7479  9647]
 [ 6844  9880 10636  6973  8873]
 [10512 12037 10587  2934  5274]
 [11182 14591 10954  1660  9981]]
