# Matrix Multiplication

In [1]:
from itertools import accumulate
import numpy as np

A = np.array([[2, 45, -1, 17, -1], [0, 12, 3, -63, 2], [-1, 37, -1, -83, 0]], dtype=np.float16)
B = np.array([[-1, 0], [2, 0], [0, -2], [3, -2], [-1, 2]], dtype=np.float16)

# Regular A @ B
def compute(X1: np.array, X2: np.array) -> np.array:
    assert X1.shape[1] == X2.shape[0]

    return X1 @ X2

print(f"Original: {A @ B}")
%timeit -r 1000 -n 1 A @ B



# A @ B equals sum of each column(A) @ corresponding row(B)
def decompose_elementwise_and_compute(X1: np.array, X2: np.array) -> np.array:
    assert X1.shape[1] == X2.shape[0]

    return sum([X1[:, i:i+1] @ X2[i:i+1, :] for i in range(X1.shape[1])])


print(f"Decompostion 2: {decompose_elementwise_and_compute(A, B)}")
%timeit -r 1000 -n 1 decompose_elementwise_and_compute(A, B)


# Randomly split A by columns and B by corresponding rows
def decompose_and_compute(X1: np.array, X2: np.array) -> np.array:
    assert X1.shape[1] == X2.shape[0]

    regular_ix = [0, 2, 4]
    outlier_ix = [1, 3]
    regular = A[:, regular_ix] @ B[regular_ix, :]
    outlier = A[:, outlier_ix] @ B[outlier_ix, :]
    return regular + outlier

print(f"Decomposition: {decompose_and_compute(A, B)}")
%timeit -r 1000 -n 1 decompose_and_compute(A, B)


Original: [[ 140.  -34.]
 [-167.  124.]
 [-174.  168.]]
The slowest run took 14.43 times longer than the fastest. This could mean that an intermediate result is being cached.
1.35 µs ± 778 ns per loop (mean ± std. dev. of 1000 runs, 1 loop each)
Decompostion 2: [[ 140.  -34.]
 [-167.  124.]
 [-174.  168.]]
The slowest run took 18.67 times longer than the fastest. This could mean that an intermediate result is being cached.
11.2 µs ± 6.04 µs per loop (mean ± std. dev. of 1000 runs, 1 loop each)
Decomposition: [[ 140.  -34.]
 [-167.  124.]
 [-174.  168.]]
10.6 µs ± 1.34 µs per loop (mean ± std. dev. of 1000 runs, 1 loop each)
