### Methods proposed in the article 

$A$ is a sparse matrix of size $m \times n$ with $L$ nonzero elements per row

In [19]:
import numpy as np
import os
import sys
project_dir = os.getcwd().split('notebooks')[0]
sys.path.append(project_dir)

In [20]:
from src.helper import *

#### 1) Algorithm 1: NaiveMapper

In [21]:
def NaiveMapper(A):
    """
    Function that computes sparse matrix multiplication
    input: 
    A: sparse matrix
    output:
    AProd: dot products between columns of A
    """
    (m,n) = A.shape
    AProd = sum( [A[i][0]*A[i] for i in range(len(A))] ) # we walk through the rows
    for r in range(1,n): # then emit the columns
        AProd = np.vstack((AProd,sum( [A[i][r]*A[i] for i in range(len(A))] ) ))
    return AProd
    

#### 2) Algorithm 2: NaiveReducer

In [22]:
def NaiveReducer(C):
    """
    Function that computes the sum of lists of arrays
    input:
    C: list of arrays
    output:
    sum(C): sum of list of arrays
    """
    return sum(C)

In [23]:
# Example
A = sparse_generator(3,4)
NaiveMapper(A)

#A^T A:
NaiveReducer(NaiveMapper(A))

array([0.        , 0.82547923, 0.24802412, 0.52095399])

#### 3) Algorithm 3: DIMSUMMapper

In [24]:
def norm(A):
    """
    Function that computes the norm 2 of each column of a matrix
    input:
    A: matrix
    output:
    norm: array of norms of each column
    """
    norm = (np.square(A).sum(axis=0))**(1/2) 
    return norm

In [25]:
def mapper(mat, gamma):
    norms_array = norm(mat)
    gamma_copy = gamma
    nrow, ncol = mat.shape
    output = np.zeros((ncol, ncol)) # note that ncol << nrow, so the for loops are OK
    for i_output in range(ncol):
        for j_output in range(ncol):
            # randomly choose pairs
            random_values = np.random.rand(nrow)
            probas = gamma_copy/(norms_array[i_output]*norms_array[j_output])*np.ones((nrow,))
            bool_vect = (probas < random_values)
            # sum chosen pairs
            output[i_output, j_output] = np.sum(mat[bool_vect, i_output]*mat[bool_vect, j_output])
    return output

In [26]:
NaiveMapper(A)

array([[0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.82547923, 0.        , 0.        ],
       [0.        , 0.        , 0.24802412, 0.        ],
       [0.        , 0.        , 0.        , 0.52095399]])

In [30]:
mapper(A, 0.1)

  # Remove the CWD from sys.path while we load stuff.


array([[0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.24802412, 0.        ],
       [0.        , 0.        , 0.        , 0.52095399]])

In [None]:
def DIMSUMMApper(A, gamma):
    (m,n) = A.shape 
    Anorm = norm(A) # norm of columns of A
    AProd = np.zeros((m,m))
    for ci in range(n):
        for cj in range(n):
            prob = min(1, gamma*1/(Anorm[ci]*Anorm[cj]))
            random_value = np.random.rand()
            if random_value 